summaryrefslogtreecommitdiff
path: root/prism/prism.c
diff options
context:
space:
mode:
Diffstat (limited to 'prism/prism.c')
-rw-r--r--prism/prism.c13029
1 files changed, 6413 insertions, 6616 deletions
diff --git a/prism/prism.c b/prism/prism.c
index cac9832ab6..a2e04ed106 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,4 +1,90 @@
-#include "prism.h"
+#include "prism/compiler/accel.h"
+#include "prism/compiler/fallthrough.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/bit.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/char.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/isinf.h"
+#include "prism/internal/line_offset_list.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/memchr.h"
+#include "prism/internal/node.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
+#include "prism/internal/regexp.h"
+#include "prism/internal/serialize.h"
+#include "prism/internal/source.h"
+#include "prism/internal/static_literals.h"
+#include "prism/internal/stringy.h"
+#include "prism/internal/strncasecmp.h"
+#include "prism/internal/strpbrk.h"
+#include "prism/internal/tokens.h"
+
+#include "prism/excludes.h"
+#include "prism/serialize.h"
+#include "prism/stream.h"
+#include "prism/version.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * When we are parsing using recursive descent, we want to protect against
+ * malicious payloads that could attempt to crash our parser. We do this by
+ * specifying a maximum depth to which we are allowed to recurse.
+ */
+#ifndef PRISM_DEPTH_MAXIMUM
+ #define PRISM_DEPTH_MAXIMUM 10000
+#endif
+
+/**
+ * A simple utility macro to concatenate two tokens together, necessary when one
+ * of the tokens is itself a macro.
+ */
+#define PM_CONCATENATE(left, right) left ## right
+
+/**
+ * We want to be able to use static assertions, but they weren't standardized
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
+ * fail to compile due to a negative array size if the condition is false.
+ */
+#if defined(_Static_assert)
+# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
+#else
+# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
+#endif
+
+/**
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
+ * branch predication.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+ /** The compiler should predicate that this branch will be taken. */
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
+
+ /** The compiler should predicate that this branch will not be taken. */
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_LIKELY(x) (x)
+
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_UNLIKELY(x) (x)
+#endif
/**
* The prism version and the serialization format.
@@ -19,6 +105,51 @@ pm_version(void) {
#define MAX(a,b) (((a)>(b))?(a):(b))
/******************************************************************************/
+/* Helpful AST-related macros */
+/******************************************************************************/
+
+#define U32(value_) ((uint32_t) (value_))
+
+#define FL PM_NODE_FLAGS
+#define UP PM_NODE_UPCAST
+
+#define PM_LOCATION_START(location_) ((location_)->start)
+#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
+
+#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
+#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
+#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
+#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
+
+#define PM_NODE_START(node_) (UP(node_)->location.start)
+#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
+#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
+#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
+
+#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
+#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+
+#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
+#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
+#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
+#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
+
+#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
+#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
+#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
+#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
+
+#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
+#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
+
+#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
+#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
+#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
+
+/******************************************************************************/
/* Lex mode manipulations */
/******************************************************************************/
@@ -26,7 +157,7 @@ pm_version(void) {
* Returns the incrementor character that should be used to increment the
* nesting count if one is possible.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
lex_mode_incrementor(const uint8_t start) {
switch (start) {
case '(':
@@ -43,7 +174,7 @@ lex_mode_incrementor(const uint8_t start) {
* Returns the matching character that should be used to terminate a list
* beginning with the given character.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
lex_mode_terminator(const uint8_t start) {
switch (start) {
case '(':
@@ -85,7 +216,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
/**
* Push on a new list lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
uint8_t incrementor = lex_mode_incrementor(delimiter);
uint8_t terminator = lex_mode_terminator(delimiter);
@@ -103,7 +234,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
// These are the places where we need to split up the content of the list.
// We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.list.breakpoints;
- memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
size_t index = 7;
// Now we'll add the terminator to the list of breakpoints. If the
@@ -132,7 +264,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
* called when we're at the end of the file. We want the parser to be able to
* perform its normal error tolerance.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_list_eof(pm_parser_t *parser) {
return lex_mode_push_list(parser, false, '\0');
}
@@ -140,7 +272,7 @@ lex_mode_push_list_eof(pm_parser_t *parser) {
/**
* Push on a new regexp lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
pm_lex_mode_t lex_mode = {
.mode = PM_LEX_REGEXP,
@@ -155,7 +287,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
// regular expression. We'll use strpbrk to find the first of these
// characters.
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
- memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
size_t index = 4;
// First we'll add the terminator.
@@ -175,7 +308,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
/**
* Push on a new string lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
pm_lex_mode_t lex_mode = {
.mode = PM_LEX_STRING,
@@ -191,7 +324,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.string.breakpoints;
- memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
size_t index = 3;
// Now add in the terminator. If the terminator is not already a NULL byte,
@@ -221,7 +355,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
* called when we're at the end of the file. We want the parser to be able to
* perform its normal error tolerance.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_string_eof(pm_parser_t *parser) {
return lex_mode_push_string(parser, false, false, '\0', '\0');
}
@@ -241,7 +375,7 @@ lex_mode_pop(pm_parser_t *parser) {
} else {
parser->lex_modes.index--;
pm_lex_mode_t *prev = parser->lex_modes.current->prev;
- xfree(parser->lex_modes.current);
+ xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
parser->lex_modes.current = prev;
}
}
@@ -249,7 +383,7 @@ lex_mode_pop(pm_parser_t *parser) {
/**
* This is the equivalent of IS_lex_state is CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
return parser->lex_state & state;
}
@@ -260,7 +394,7 @@ typedef enum {
PM_IGNORED_NEWLINE_PATTERN
} pm_ignored_newline_type_t;
-static inline pm_ignored_newline_type_t
+static PRISM_INLINE pm_ignored_newline_type_t
lex_state_ignored_p(pm_parser_t *parser) {
bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
@@ -273,17 +407,17 @@ lex_state_ignored_p(pm_parser_t *parser) {
}
}
-static inline bool
+static PRISM_INLINE bool
lex_state_beg_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
}
-static inline bool
+static PRISM_INLINE bool
lex_state_arg_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
}
-static inline bool
+static PRISM_INLINE bool
lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
if (parser->current.end >= parser->end) {
return false;
@@ -291,7 +425,7 @@ lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
}
-static inline bool
+static PRISM_INLINE bool
lex_state_end_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_END_ANY);
}
@@ -299,7 +433,7 @@ lex_state_end_p(pm_parser_t *parser) {
/**
* This is the equivalent of IS_AFTER_OPERATOR in CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
lex_state_operator_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
}
@@ -308,7 +442,7 @@ lex_state_operator_p(pm_parser_t *parser) {
* Set the state of the lexer. This is defined as a function to be able to put a
* breakpoint in it.
*/
-static inline void
+static PRISM_INLINE void
lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
parser->lex_state = state;
}
@@ -322,7 +456,7 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
#endif
#if PM_DEBUG_LOGGING
-PRISM_ATTRIBUTE_UNUSED static void
+PRISM_UNUSED static void
debug_state(pm_parser_t *parser) {
fprintf(stderr, "STATE: ");
bool first = true;
@@ -403,140 +537,134 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
/**
* Append an error to the list of errors on the parser.
*/
-static inline void
-pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
}
/**
- * Append an error to the list of errors on the parser using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * given token.
*/
-#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
- pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+ pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
+}
/**
* Append an error to the list of errors on the parser using the location of the
* current token.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
+ pm_parser_err_token(parser, &parser->current, diag_id);
}
/**
- * Append an error to the list of errors on the parser using the given location
- * using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * previous token.
*/
-#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+ pm_parser_err_token(parser, &parser->previous, diag_id);
+}
/**
* Append an error to the list of errors on the parser using the location of the
* given node.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, node->location.start, node->location.end, diag_id);
+ pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
}
/**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string.
- */
-#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
-
-/**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string, and add on the content of the node.
+ * Append an error to the list of errors on the parser using a format string.
*/
-#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
- PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
+#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
+ pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
- * previous token.
+ * given node and a format string.
*/
-static inline void
-pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+ PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
- * given token.
+ * given node and a format string, and add on the content of the node.
*/
-static inline void
-pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, token->start, token->end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
+ PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
/**
* Append an error to the list of errors on the parser using the location of the
* given token and a format string.
*/
-#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
+ PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
* given token and a format string, and add on the content of the token.
*/
-#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
- PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+ PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
/**
* Append a warning to the list of warnings on the parser.
*/
-static inline void
-pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
}
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
- pm_parser_warn(parser, token->start, token->end, diag_id);
+ pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
}
/**
* Append a warning to the list of warnings on the parser using the location of
* the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
- pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
+ pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
}
/**
- * Append a warning to the list of warnings on the parser using a format string.
+ * Append a warning to the list of warnings on the parser using a format string
+ * and the given location.
*/
-#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
- pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
+ pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token and a format string.
*/
-#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
- PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
+ PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token and a format string, and add on the content of the token.
*/
-#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
- PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+ PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given node and a format string.
*/
-#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
- PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+ PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
/**
* Add an error for an expected heredoc terminator. This is a special function
@@ -547,8 +675,8 @@ static void
pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
PM_PARSER_ERR_FORMAT(
parser,
- ident_start,
- ident_start + ident_length,
+ U32(ident_start - parser->start),
+ U32(ident_length),
PM_ERR_HEREDOC_TERM,
(int) ident_length,
(const char *) ident_start
@@ -708,7 +836,7 @@ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t
/**
* Get the current state of constant shareability.
*/
-static inline pm_shareable_constant_value_t
+static PRISM_INLINE pm_shareable_constant_value_t
pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
return parser->current_scope->shareable_constant;
}
@@ -733,12 +861,12 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan
/**
* The point at which the set of locals switches from being a list to a hash.
*/
-#define PM_LOCALS_HASH_THRESHOLD 9
+#define PM_LOCALS_HASH_THRESHOLD 5
static void
pm_locals_free(pm_locals_t *locals) {
if (locals->capacity > 0) {
- xfree(locals->locals);
+ xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
}
}
@@ -810,11 +938,13 @@ pm_locals_resize(pm_locals_t *locals) {
* @return True if the local was added, and false if the local already exists.
*/
static bool
-pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
if (locals->size >= (locals->capacity / 4 * 3)) {
pm_locals_resize(locals);
}
+ locals->bloom |= (1u << (name & 31));
+
if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
for (uint32_t index = 0; index < locals->capacity; index++) {
pm_local_t *local = &locals->locals[index];
@@ -822,7 +952,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
if (local->name == PM_CONSTANT_ID_UNSET) {
*local = (pm_local_t) {
.name = name,
- .location = { .start = start, .end = end },
+ .location = { .start = start, .length = length },
.index = locals->size++,
.reads = reads,
.hash = 0
@@ -843,7 +973,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
if (local->name == PM_CONSTANT_ID_UNSET) {
*local = (pm_local_t) {
.name = name,
- .location = { .start = start, .end = end },
+ .location = { .start = start, .length = length },
.index = locals->size++,
.reads = reads,
.hash = initial_hash
@@ -867,6 +997,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
*/
static uint32_t
pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+ if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
+
if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
for (uint32_t index = 0; index < locals->size; index++) {
pm_local_t *local = &locals->locals[index];
@@ -943,8 +1075,8 @@ pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
* written but not read in certain contexts.
*/
static void
-pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
- pm_constant_id_list_init_capacity(list, locals->size);
+pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
+ pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
// If we're still below the threshold for switching to a hash, then we only
// need to loop over the locals until we hit the size because the locals are
@@ -961,14 +1093,14 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
if (local->name != PM_CONSTANT_ID_UNSET) {
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
- if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
+ if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
if (constant->length >= 1 && *constant->start != '_') {
PM_PARSER_WARN_FORMAT(
parser,
local->location.start,
- local->location.end,
+ local->location.length,
PM_WARN_UNUSED_LOCAL_VARIABLE,
(int) constant->length,
(const char *) constant->start
@@ -986,43 +1118,53 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
/**
* Retrieve the constant pool id for the given location.
*/
-static inline pm_constant_id_t
-pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+static PRISM_INLINE pm_constant_id_t
+pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ /* Fast path: if this is the same token as the last lookup (same pointer
+ * range), return the cached result. */
+ if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
+ return parser->constant_cache.id;
+ }
+
+ pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
+
+ parser->constant_cache.start = start;
+ parser->constant_cache.end = end;
+ parser->constant_cache.id = id;
+
+ return id;
}
/**
* Retrieve the constant pool id for the given string.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
- return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
+ return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
}
/**
* Retrieve the constant pool id for the given static literal C string.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
- return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
+ return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
}
/**
* Retrieve the constant pool id for the given token.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
- return pm_parser_constant_id_location(parser, token->start, token->end);
+ return pm_parser_constant_id_raw(parser, token->start, token->end);
}
/**
- * Retrieve the constant pool id for the given token. If the token is not
- * provided, then return 0.
+ * This macro allows you to define a case statement for all of the nodes that
+ * may result in a void value.
*/
-static inline pm_constant_id_t
-pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
- return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
-}
+#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
+ case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
/**
* Check whether or not the given node is value expression.
@@ -1035,12 +1177,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
while (node != NULL) {
switch (PM_NODE_TYPE(node)) {
- case PM_RETURN_NODE:
- case PM_BREAK_NODE:
- case PM_NEXT_NODE:
- case PM_REDO_NODE:
- case PM_RETRY_NODE:
- case PM_MATCH_REQUIRED_NODE:
+ case PM_CASE_VOID_VALUE:
return void_node != NULL ? void_node : node;
case PM_MATCH_PREDICATE_NODE:
return NULL;
@@ -1049,57 +1186,128 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->ensure_clause != NULL) {
if (cast->rescue_clause != NULL) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
if (vn != NULL) return vn;
}
if (cast->statements != NULL) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn != NULL) return vn;
}
- node = (pm_node_t *) cast->ensure_clause;
+ node = UP(cast->ensure_clause);
} else if (cast->rescue_clause != NULL) {
- if (cast->statements == NULL) return NULL;
+ // https://bugs.ruby-lang.org/issues/21669
+ if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ if (cast->statements == NULL) return NULL;
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
- if (vn == NULL) return NULL;
- if (void_node == NULL) void_node = vn;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
+
if (vn == NULL) {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
void_node = NULL;
break;
}
- if (void_node == NULL) {
- void_node = vn;
- }
}
if (cast->else_clause != NULL) {
- node = (pm_node_t *) cast->else_clause;
+ node = UP(cast->else_clause);
+
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ pm_node_t *vn = pm_check_value_expression(parser, node);
+ if (vn != NULL) return vn;
+ }
} else {
return void_node;
}
} else {
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
}
break;
}
+ case PM_CASE_NODE: {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
+
+ pm_case_node_t *cast = (pm_case_node_t *) node;
+ if (cast->else_clause == NULL) return NULL;
+
+ pm_node_t *condition;
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+ assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
+
+ pm_when_node_t *cast = (pm_when_node_t *) condition;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
+
+ node = UP(cast->else_clause);
+ break;
+ }
+ case PM_CASE_MATCH_NODE: {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
+
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
+ if (cast->else_clause == NULL) return NULL;
+
+ pm_node_t *condition;
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+ assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
+
+ pm_in_node_t *cast = (pm_in_node_t *) condition;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
+
+ node = UP(cast->else_clause);
+ break;
+ }
case PM_ENSURE_NODE: {
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
break;
}
case PM_PARENTHESES_NODE: {
pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
- node = (pm_node_t *) cast->body;
+ node = UP(cast->body);
break;
}
case PM_STATEMENTS_NODE: {
pm_statements_node_t *cast = (pm_statements_node_t *) node;
+
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ pm_node_t *body_part;
+ PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
+ switch (PM_NODE_TYPE(body_part)) {
+ case PM_CASE_VOID_VALUE:
+ if (void_node == NULL) {
+ void_node = body_part;
+ }
+ return void_node;
+ default: break;
+ }
+ }
+ }
+
node = cast->body.nodes[cast->body.size - 1];
break;
}
@@ -1108,7 +1316,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->statements == NULL || cast->subsequent == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
@@ -1123,19 +1331,19 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->statements == NULL || cast->else_clause == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
if (void_node == NULL) {
void_node = vn;
}
- node = (pm_node_t *) cast->else_clause;
+ node = UP(cast->else_clause);
break;
}
case PM_ELSE_NODE: {
pm_else_node_t *cast = (pm_else_node_t *) node;
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
break;
}
case PM_AND_NODE: {
@@ -1165,7 +1373,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
return NULL;
}
-static inline void
+static PRISM_INLINE void
pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
pm_node_t *void_node = pm_check_value_expression(parser, node);
if (void_node != NULL) {
@@ -1193,7 +1401,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
break;
case PM_CALL_NODE: {
const pm_call_node_t *cast = (const pm_call_node_t *) node;
- if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
+ if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
switch (message->length) {
@@ -1406,10 +1614,10 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
* Add a warning to the parser if the value that is being written inside of a
* predicate to a conditional is a literal.
*/
-static inline void
+static PRISM_INLINE void
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
if (pm_conditional_predicate_warn_write_literal_p(node)) {
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
+ pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
}
}
@@ -1547,26 +1755,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr
}
/**
- * In a lot of places in the tree you can have tokens that are not provided but
- * that do not cause an error. For example, this happens in a method call
- * without parentheses. In these cases we set the token to the "not provided" type.
- * For example:
- *
- * pm_token_t token = not_provided(parser);
- */
-static inline pm_token_t
-not_provided(pm_parser_t *parser) {
- return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
-}
-
-#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
-#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
-#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
-#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
-#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
-#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
-
-/**
* This is a special out parameter to the parse_arguments_list function that
* includes opening and closing parentheses in addition to the arguments since
* it's so common. It is handy to use when passing argument information to one
@@ -1592,22 +1780,29 @@ typedef struct {
/**
* Retrieve the end location of a `pm_arguments_t` object.
*/
-static inline const uint8_t *
+static PRISM_INLINE const pm_location_t *
pm_arguments_end(pm_arguments_t *arguments) {
if (arguments->block != NULL) {
- const uint8_t *end = arguments->block->location.end;
- if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
- end = arguments->closing_loc.end;
+ uint32_t end = PM_NODE_END(arguments->block);
+
+ if (arguments->closing_loc.length > 0) {
+ uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
+ if (arguments_end > end) {
+ return &arguments->closing_loc;
+ }
}
- return end;
+ return &arguments->block->location;
}
- if (arguments->closing_loc.start != NULL) {
- return arguments->closing_loc.end;
+ if (arguments->closing_loc.length > 0) {
+ return &arguments->closing_loc;
}
if (arguments->arguments != NULL) {
- return arguments->arguments->base.location.end;
+ return &arguments->arguments->base.location;
+ }
+ if (arguments->opening_loc.length > 0) {
+ return &arguments->opening_loc;
}
- return arguments->closing_loc.end;
+ return NULL;
}
/**
@@ -1618,7 +1813,7 @@ static void
pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
// First, check that we have arguments and that we don't have a closing
// location for them.
- if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
+ if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
return;
}
@@ -1635,7 +1830,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
// If we didn't hit a case before this check, then at this point we need to
// add a syntax error.
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
}
/******************************************************************************/
@@ -1648,7 +1843,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
* reason we have the encoding_changed boolean to check if we need to go through
* the function pointer or can just directly use the UTF-8 functions.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
if (n <= 0) return 0;
@@ -1675,7 +1870,7 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t
* Similar to char_is_identifier but this function assumes that the encoding
* has not been changed.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
if (n <= 0) {
return 0;
@@ -1687,11 +1882,189 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
}
/**
+ * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using
+ * wide operations. Returns the number of leading ASCII identifier bytes.
+ * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8)
+ * with a byte-at-a-time loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ * 1. NEON — processes 16 bytes per iteration on aarch64.
+ * 2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ * 3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ const uint8_t *cursor = start;
+
+ // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
+ // Each high nibble is assigned a unique bit; the low nibble table
+ // contains the OR of bits for all high nibbles that have an
+ // identifier character at that low nibble position. A byte is an
+ // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
+ static const uint8_t low_lut_data[16] = {
+ 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
+ 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
+ };
+ static const uint8_t high_lut_data[16] = {
+ 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ const uint8x16_t low_lut = vld1q_u8(low_lut_data);
+ const uint8x16_t high_lut = vld1q_u8(high_lut_data);
+ const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+
+ while (cursor + 16 <= end) {
+ uint8x16_t v = vld1q_u8(cursor);
+
+ uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+ uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+ uint8x16_t ident = vandq_u8(lo_class, hi_class);
+
+ // Fast check: if the per-byte minimum is nonzero, every byte matched.
+ if (vminvq_u8(ident) != 0) {
+ cursor += 16;
+ continue;
+ }
+
+ // Find the first non-identifier byte (zero in ident).
+ uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
+ uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
+
+ if (lo != 0) {
+ cursor += pm_ctzll(lo) / 8;
+ } else {
+ uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
+ cursor += 8 + pm_ctzll(hi) / 8;
+ }
+
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ const uint8_t *cursor = start;
+
+ while (cursor + 16 <= end) {
+ __m128i v = _mm_loadu_si128((const __m128i *) cursor);
+ __m128i zero = _mm_setzero_si128();
+
+ // Unsigned range check via saturating subtraction:
+ // byte >= lo ⟺ saturate(lo - byte) == 0
+ // byte <= hi ⟺ saturate(byte - hi) == 0
+
+ // Fold case: OR with 0x20 maps A-Z to a-z.
+ __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
+ __m128i letter = _mm_and_si128(
+ _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
+ _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
+
+ __m128i digit = _mm_and_si128(
+ _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
+ _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
+
+ __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
+
+ __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
+ int mask = _mm_movemask_epi8(ident);
+
+ if (mask == 0xFFFF) {
+ cursor += 16;
+ continue;
+ }
+
+ cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+// The SWAR path uses pm_ctzll to find the first non-matching byte within a
+// word, which only yields the correct byte index on little-endian targets.
+// We gate on a positive little-endian check so that unknown-endianness
+// platforms safely fall through to the no-op fallback.
+#elif defined(PRISM_HAS_SWAR)
+
+/**
+ * Portable SWAR fallback — processes 8 bytes per iteration.
+ *
+ * The byte-wise range checks avoid cross-byte borrows by pre-setting the high
+ * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value
+ * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is
+ * impossible. The result has bit 7 set if and only if byte >= lo. The same
+ * reasoning applies to the upper-bound direction.
+ */
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ static const uint64_t ones = 0x0101010101010101ULL;
+ static const uint64_t highs = 0x8080808080808080ULL;
+ const uint8_t *cursor = start;
+
+ while (cursor + 8 <= end) {
+ uint64_t word;
+ memcpy(&word, cursor, 8);
+
+ // Bail on any non-ASCII byte.
+ if (word & highs) break;
+
+ uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
+
+ // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
+ // then check the lowercase range once. A-Z maps to a-z; the
+ // only non-letter byte that could alias into [0x61,0x7A] is one
+ // whose original value was in [0x41,0x5A] — which is exactly
+ // the uppercase letters we want to match.
+ uint64_t lowered = word | (ones * 0x20);
+ uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
+
+ // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
+ // bytes equal to underscore. Safe from cross-byte borrows because
+ // the ASCII guard above ensures all bytes are < 0x80.
+ uint64_t xor_us = word ^ (ones * 0x5F);
+ uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
+
+ uint64_t ident = digit | letter | underscore;
+
+ if (ident == highs) {
+ cursor += 8;
+ continue;
+ }
+
+ // Find the first non-identifier byte. On little-endian the first
+ // byte sits in the least-significant position.
+ uint64_t not_ident = ~ident & highs;
+ cursor += pm_ctzll(not_ident) / 8;
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+#else
+
+// No-op fallback for big-endian or other unsupported platforms.
+// The caller's byte-at-a-time loop handles everything.
+#define scan_identifier_ascii(start, end) ((size_t) 0)
+
+#endif
+
+/**
* Like the above, this function is also used extremely frequently to lex all of
* the identifiers in a source file once the first character has been found. So
* it's important that it be as fast as possible.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
if (n <= 0) {
return 0;
@@ -1729,7 +2102,7 @@ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
#undef BIT
#undef PUNCT
-static inline bool
+static PRISM_INLINE bool
char_is_global_name_punctuation(const uint8_t b) {
const unsigned int i = (const unsigned int) b;
if (i <= 0x20 || 0x7e < i) return false;
@@ -1737,7 +2110,7 @@ char_is_global_name_punctuation(const uint8_t b) {
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
}
-static inline bool
+static PRISM_INLINE bool
token_is_setter_name(pm_token_t *token) {
return (
(token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
@@ -1825,7 +2198,7 @@ pm_local_is_keyword(const char *source, size_t length) {
/**
* Set the given flag on the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
node->flags |= flag;
}
@@ -1833,7 +2206,7 @@ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
/**
* Remove the given flag from the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
node->flags &= (pm_node_flags_t) ~flag;
}
@@ -1841,7 +2214,7 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
/**
* Set the repeated parameter flag on the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
@@ -1869,7 +2242,7 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
/**
* Parse out the options for a regular expression.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
pm_node_flags_t flags = 0;
@@ -1895,9 +2268,9 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin
size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
if (unknown_flags_length != 0) {
const char *word = unknown_flags_length >= 2 ? "options" : "option";
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
}
- pm_buffer_free(&unknown_flags);
+ pm_buffer_cleanup(&unknown_flags);
}
return flags;
@@ -1915,36 +2288,45 @@ static size_t
pm_statements_node_body_length(pm_statements_node_t *node);
/**
- * This function is here to allow us a place to extend in the future when we
- * implement our own arena allocation.
+ * Move an integer's values array into the arena. If the integer has heap-
+ * allocated values, copy them to the arena and free the original.
*/
-static inline void *
-pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
- void *memory = xcalloc(1, size);
- if (memory == NULL) {
- fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
- abort();
+static PRISM_INLINE void
+pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
+ if (integer->values != NULL) {
+ size_t byte_size = integer->length * sizeof(uint32_t);
+ uint32_t *old_values = integer->values;
+ integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
+ xfree(old_values);
}
- return memory;
}
-#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
-#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
-
/**
- * Allocate a new MissingNode node.
+ * Allocate a new ErrorRecoveryNode node with no unexpected child.
*/
-static pm_missing_node_t *
-pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
-
- *node = (pm_missing_node_t) {{
- .type = PM_MISSING_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = start, .end = end }
- }};
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ return pm_error_recovery_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = length }),
+ NULL
+ );
+}
- return node;
+/**
+ * Allocate a new ErrorRecoveryNode node wrapping an unexpected child node.
+ */
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
+ return pm_error_recovery_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ unexpected->location,
+ unexpected
+ );
}
/**
@@ -1953,23 +2335,16 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t
static pm_alias_global_variable_node_t *
pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
-
- *node = (pm_alias_global_variable_node_t) {
- {
- .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
- .new_name = new_name,
- .old_name = old_name,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
- return node;
+ return pm_alias_global_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+ new_name,
+ old_name,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -1978,23 +2353,16 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw
static pm_alias_method_node_t *
pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
- *node = (pm_alias_method_node_t) {
- {
- .type = PM_ALIAS_METHOD_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
- .new_name = new_name,
- .old_name = old_name,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
-
- return node;
+ return pm_alias_method_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+ new_name,
+ old_name,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -2002,23 +2370,15 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n
*/
static pm_alternation_pattern_node_t *
pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
- pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
-
- *node = (pm_alternation_pattern_node_t) {
- {
- .type = PM_ALTERNATION_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_alternation_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2028,23 +2388,15 @@ static pm_and_node_t *
pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
-
- *node = (pm_and_node_t) {
- {
- .type = PM_AND_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
- .left = left,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .right = right
- };
-
- return node;
+ return pm_and_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2052,18 +2404,13 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera
*/
static pm_arguments_node_t *
pm_arguments_node_create(pm_parser_t *parser) {
- pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
-
- *node = (pm_arguments_node_t) {
- {
- .type = PM_ARGUMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .arguments = { 0 }
- };
-
- return node;
+ return pm_arguments_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -2078,19 +2425,22 @@ pm_arguments_node_size(pm_arguments_node_t *node) {
* Append an argument to an arguments node.
*/
static void
-pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
+pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
if (pm_arguments_node_size(node) == 0) {
- node->base.location.start = argument->location.start;
+ PM_NODE_START_SET_NODE(node, argument);
}
- node->base.location.end = argument->location.end;
- pm_node_list_append(&node->arguments, argument);
+ if (PM_NODE_END(node) < PM_NODE_END(argument)) {
+ PM_NODE_LENGTH_SET_NODE(node, argument);
+ }
+
+ pm_node_list_append(arena, &node->arguments, argument);
if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
- pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
} else {
- pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
}
}
}
@@ -2100,43 +2450,49 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen
*/
static pm_array_node_t *
pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
-
- *node = (pm_array_node_t) {
- {
- .type = PM_ARRAY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .elements = { 0 }
- };
-
- return node;
+ if (opening == NULL) {
+ return pm_array_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
+ } else {
+ return pm_array_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, opening)
+ );
+ }
}
/**
* Append an argument to an array node.
*/
-static inline void
-pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
- if (!node->elements.size && !node->opening_loc.start) {
- node->base.location.start = element->location.start;
+static PRISM_INLINE void
+pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
+ if (!node->elements.size && !node->opening_loc.length) {
+ PM_NODE_START_SET_NODE(node, element);
}
- pm_node_list_append(&node->elements, element);
- node->base.location.end = element->location.end;
+ pm_node_list_append(arena, &node->elements, element);
+ PM_NODE_LENGTH_SET_NODE(node, element);
// If the element is not a static literal, then the array is not a static
// literal. Turn that flag off.
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
}
if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
- pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
+ pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
}
}
@@ -2144,10 +2500,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
* Set the closing token and end location of an array node.
*/
static void
-pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
- assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
- node->base.location.end = closing->end;
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
+ assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ node->closing_loc = TOK2LOC(parser, closing);
}
/**
@@ -2156,24 +2512,18 @@ pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = nodes->nodes[0]->location.start,
- .end = nodes->nodes[nodes->size - 1]->location.end
- },
- },
- .constant = NULL,
- .rest = NULL,
- .requireds = { 0 },
- .posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
+ pm_array_pattern_node_t *node = pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list.
@@ -2185,9 +2535,9 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
node->rest = child;
found_rest = true;
} else if (found_rest) {
- pm_node_list_append(&node->posts, child);
+ pm_node_list_append(parser->arena, &node->posts, child);
} else {
- pm_node_list_append(&node->requireds, child);
+ pm_node_list_append(parser->arena, &node->requireds, child);
}
}
@@ -2199,23 +2549,18 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = rest->location,
- },
- .constant = NULL,
- .rest = rest,
- .requireds = { 0 },
- .posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(rest),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ rest,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -2224,26 +2569,18 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = constant->location.start,
- .end = closing->end
- },
- },
- .constant = constant,
- .rest = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .requireds = { 0 },
- .posts = { 0 }
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
+ constant,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -2252,31 +2589,23 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant,
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .constant = NULL,
- .rest = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .requireds = { 0 },
- .posts = { 0 }
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
-static inline void
-pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
- pm_node_list_append(&node->requireds, inner);
+static PRISM_INLINE void
+pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
+ pm_node_list_append(arena, &node->requireds, inner);
}
/**
@@ -2284,15 +2613,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t
*/
static pm_assoc_node_t *
pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
- pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
- const uint8_t *end;
+ uint32_t end;
- if (value != NULL && value->location.end > key->location.end) {
- end = value->location.end;
- } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
- end = operator->end;
+ if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
+ end = PM_NODE_END(value);
+ } else if (operator != NULL) {
+ end = PM_TOKEN_END(parser, operator);
} else {
- end = key->location.end;
+ end = PM_NODE_END(key);
}
// Hash string keys will be frozen, so we can mark them as frozen here so
@@ -2312,22 +2640,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
}
- *node = (pm_assoc_node_t) {
- {
- .type = PM_ASSOC_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = key->location.start,
- .end = end
- },
- },
- .key = key,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_assoc_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
+ key,
+ value,
+ NTOK2LOC(parser, operator)
+ );
}
/**
@@ -2336,22 +2657,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
static pm_assoc_splat_node_t *
pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
assert(operator->type == PM_TOKEN_USTAR_STAR);
- pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
-
- *node = (pm_assoc_splat_node_t) {
- {
- .type = PM_ASSOC_SPLAT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = value == NULL ? operator->end : value->location.end
- },
- },
- .value = value,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
- return node;
+ return pm_assoc_splat_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2360,18 +2674,14 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token
static pm_back_reference_read_node_t *
pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_BACK_REFERENCE);
- pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
- *node = (pm_back_reference_read_node_t) {
- {
- .type = PM_BACK_REFERENCE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_back_reference_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -2379,23 +2689,21 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static pm_begin_node_t *
pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
- pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
-
- *node = (pm_begin_node_t) {
- {
- .type = PM_BEGIN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = begin_keyword->start,
- .end = statements == NULL ? begin_keyword->end : statements->base.location.end
- },
- },
- .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
- .statements = statements,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
+ uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
+
+ return pm_begin_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, begin_keyword),
+ statements,
+ NULL,
+ NULL,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -2403,11 +2711,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st
*/
static void
pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
- // If the begin keyword doesn't exist, we set the start on the begin_node
- if (!node->begin_keyword_loc.start) {
- node->base.location.start = rescue_clause->base.location.start;
+ if (node->begin_keyword_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, rescue_clause);
}
- node->base.location.end = rescue_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
node->rescue_clause = rescue_clause;
}
@@ -2416,7 +2723,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_
*/
static void
pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
- node->base.location.end = else_clause->base.location.end;
+ if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+ PM_NODE_START_SET_NODE(node, else_clause);
+ }
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
node->else_clause = else_clause;
}
@@ -2425,7 +2735,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause
*/
static void
pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
- node->base.location.end = ensure_clause->base.location.end;
+ if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+ PM_NODE_START_SET_NODE(node, ensure_clause);
+ }
+ PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
node->ensure_clause = ensure_clause;
}
@@ -2433,11 +2746,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_
* Set the end keyword and end location of a begin node.
*/
static void
-pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
- assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
-
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
+pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
+ assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
@@ -2445,22 +2757,16 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo
*/
static pm_block_argument_node_t *
pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
-
- *node = (pm_block_argument_node_t) {
- {
- .type = PM_BLOCK_ARGUMENT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = expression == NULL ? operator->end : expression->location.end
- },
- },
- .expression = expression,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ assert(operator->type == PM_TOKEN_UAMPERSAND);
+
+ return pm_block_argument_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+ expression,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2468,22 +2774,17 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
*/
static pm_block_node_t *
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
- pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
-
- *node = (pm_block_node_t) {
- {
- .type = PM_BLOCK_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = opening->start, .end = closing->end },
- },
- .locals = *locals,
- .parameters = parameters,
- .body = body,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_block_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ *locals,
+ parameters,
+ body,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -2491,24 +2792,17 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
*/
static pm_block_parameter_node_t *
pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
- assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
- pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
-
- *node = (pm_block_parameter_node_t) {
- {
- .type = PM_BLOCK_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
+
+ return pm_block_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2516,53 +2810,44 @@ pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, cons
*/
static pm_block_parameters_node_t *
pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
- pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
-
- const uint8_t *start;
- if (opening->type != PM_TOKEN_NOT_PROVIDED) {
- start = opening->start;
+ uint32_t start;
+ if (opening != NULL) {
+ start = PM_TOKEN_START(parser, opening);
} else if (parameters != NULL) {
- start = parameters->base.location.start;
+ start = PM_NODE_START(parameters);
} else {
- start = NULL;
+ start = 0;
}
- const uint8_t *end;
+ uint32_t end;
if (parameters != NULL) {
- end = parameters->base.location.end;
- } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
- end = opening->end;
+ end = PM_NODE_END(parameters);
+ } else if (opening != NULL) {
+ end = PM_TOKEN_END(parser, opening);
} else {
- end = NULL;
- }
-
- *node = (pm_block_parameters_node_t) {
- {
- .type = PM_BLOCK_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = start,
- .end = end
- }
- },
- .parameters = parameters,
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .locals = { 0 }
- };
-
- return node;
+ end = 0;
+ }
+
+ return pm_block_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ parameters,
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, opening),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Set the closing location of a BlockParametersNode node.
*/
static void
-pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
- assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
-
- node->base.location.end = closing->end;
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
+ assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ node->closing_loc = TOK2LOC(parser, closing);
}
/**
@@ -2570,29 +2855,27 @@ pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_
*/
static pm_block_local_variable_node_t *
pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
-
- *node = (pm_block_local_variable_node_t) {
- {
- .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_block_local_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
* Append a new block-local variable to a BlockParametersNode node.
*/
static void
-pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
- pm_node_list_append(&node->locals, (pm_node_t *) local);
+pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
+ pm_node_list_append(arena, &node->locals, UP(local));
+
+ if (PM_NODE_LENGTH(node) == 0) {
+ PM_NODE_START_SET_NODE(node, local);
+ }
- if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
- node->base.location.end = local->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, local);
}
/**
@@ -2601,66 +2884,55 @@ pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm
static pm_break_node_t *
pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
- pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
- *node = (pm_break_node_t) {
- {
- .type = PM_BREAK_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- },
- },
- .arguments = arguments,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
-
- return node;
+ return pm_break_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ arguments,
+ TOK2LOC(parser, keyword)
+ );
}
// There are certain flags that we want to use internally but don't want to
// expose because they are not relevant beyond parsing. Therefore we'll define
// them here and not define them in config.yml/a header file.
-static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
+static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
+
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
/**
- * Allocate and initialize a new CallNode node. This sets everything to NULL or
- * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
- * in the various specializations of this function.
+ * Allocate and initialize a new CallNode node. This sets everything to NULL
+ * such that its values can be overridden in the various specializations of this
+ * function.
*/
static pm_call_node_t *
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
- pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
-
- *node = (pm_call_node_t) {
- {
- .type = PM_CALL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser),
- },
- .receiver = NULL,
- .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .arguments = NULL,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .block = NULL,
- .name = 0
- };
-
- return node;
+ return pm_call_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_UNSET,
+ NULL,
+ ((pm_location_t) { 0 }),
+ 0,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL
+ );
}
/**
* Returns the value that the ignore visibility flag should be set to for the
* given receiver.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
}
@@ -2680,12 +2952,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
pm_call_node_t *node = pm_call_node_create(parser, flags);
- node->base.location.start = receiver->location.start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
node->receiver = receiver;
node->message_loc.start = arguments->opening_loc.start;
- node->message_loc.end = arguments->closing_loc.end;
+ node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
@@ -2706,20 +2981,22 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
- node->base.location.start = MIN(receiver->location.start, argument->location.start);
- node->base.location.end = MAX(receiver->location.end, argument->location.end);
+ PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
+ PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->message_loc = TOK2LOC(parser, operator);
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
- pm_arguments_node_arguments_append(arguments, argument);
+ pm_arguments_node_arguments_append(parser->arena, arguments, argument);
node->arguments = arguments;
node->name = pm_parser_constant_id_token(parser, operator);
return node;
}
+static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
+
/**
* Allocate and initialize a new CallNode node from a call expression.
*/
@@ -2729,26 +3006,31 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = receiver->location.start;
- const uint8_t *end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+ const pm_location_t *end = pm_arguments_end(arguments);
if (end == NULL) {
- end = message->end;
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
+ } else {
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
}
- node->base.location.end = end;
node->receiver = receiver;
- node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->call_operator_loc = TOK2LOC(parser, operator);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
- node->name = pm_parser_constant_id_token(parser, message);
+ /**
+ * If the final character is `@` as is the case for `foo.~@`,
+ * we should ignore the @ in the same way we do for symbols.
+ */
+ node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
return node;
}
@@ -2758,12 +3040,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
static pm_call_node_t *
pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
pm_call_node_t *node = pm_call_node_create(parser, 0);
- node->base.location.start = parser->start;
- node->base.location.end = parser->end;
+ node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
node->receiver = receiver;
- node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
- node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
node->arguments = arguments;
node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
@@ -2778,10 +3057,12 @@ static pm_call_node_t *
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location.start = message->start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_TOKEN(parser, node, message);
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
@@ -2799,7 +3080,7 @@ static pm_call_node_t *
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location = PM_LOCATION_NULL_VALUE(parser);
+ node->base.location = (pm_location_t) { 0 };
node->arguments = arguments;
node->name = name;
@@ -2816,16 +3097,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = message->start;
- if (arguments->closing_loc.start != NULL) {
- node->base.location.end = arguments->closing_loc.end;
+ PM_NODE_START_SET_TOKEN(parser, node, message);
+ if (arguments->closing_loc.length > 0) {
+ PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
} else {
assert(receiver != NULL);
- node->base.location.end = receiver->location.end;
+ PM_NODE_LENGTH_SET_NODE(node, receiver);
}
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
@@ -2843,18 +3124,20 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = receiver->location.start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
node->receiver = receiver;
- node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->call_operator_loc = TOK2LOC(parser, operator);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -2870,11 +3153,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = operator->start;
- node->base.location.end = receiver->location.end;
+ PM_NODE_START_SET_TOKEN(parser, node, operator);
+ PM_NODE_LENGTH_SET_NODE(node, receiver);
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->message_loc = TOK2LOC(parser, operator);
node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
return node;
@@ -2888,8 +3171,8 @@ static pm_call_node_t *
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location = PM_LOCATION_TOKEN_VALUE(message);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->base.location = TOK2LOC(parser, message);
+ node->message_loc = TOK2LOC(parser, message);
node->name = pm_parser_constant_id_token(parser, message);
return node;
@@ -2899,14 +3182,14 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
* Returns whether or not this call can be used on the left-hand side of an
* operator assignment.
*/
-static inline bool
+static PRISM_INLINE bool
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
return (
- (node->message_loc.start != NULL) &&
- (node->message_loc.end[-1] != '!') &&
- (node->message_loc.end[-1] != '?') &&
- char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
- (node->opening_loc.start == NULL) &&
+ (node->message_loc.length > 0) &&
+ (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
+ (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
+ char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
+ (node->opening_loc.length == 0) &&
(node->arguments == NULL) &&
(node->block == NULL)
);
@@ -2922,10 +3205,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p
if (write_constant->length > 0) {
size_t length = write_constant->length - 1;
- void *memory = xmalloc(length);
+ uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
memcpy(memory, write_constant->start, length);
- *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
+ *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
} else {
// We can get here if the message was missing because of a syntax error.
*read_name = pm_parser_constant_id_constant(parser, "", 0);
@@ -2939,33 +3222,25 @@ static pm_call_and_write_node_t *
pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
- *node = (pm_call_and_write_node_t) {
- {
- .type = PM_CALL_AND_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_and_write_node_t *node = pm_call_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -2976,7 +3251,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
*/
static void
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
- if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
pm_node_t *node;
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
@@ -2999,35 +3274,28 @@ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *argumen
static pm_index_and_write_node_t *
pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_and_write_node_t) {
- {
- .type = PM_INDEX_AND_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_and_write_node_t *node = pm_index_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3038,34 +3306,26 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
static pm_call_operator_write_node_t *
pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
- pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
- *node = (pm_call_operator_write_node_t) {
- {
- .type = PM_CALL_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3075,36 +3335,28 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
*/
static pm_index_operator_write_node_t *
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
-
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_operator_write_node_t) {
- {
- .type = PM_INDEX_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3116,33 +3368,25 @@ static pm_call_or_write_node_t *
pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
- *node = (pm_call_or_write_node_t) {
- {
- .type = PM_CALL_OR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_or_write_node_t *node = pm_call_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3153,35 +3397,28 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
static pm_index_or_write_node_t *
pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_or_write_node_t) {
- {
- .type = PM_INDEX_OR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_or_write_node_t *node = pm_index_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3192,25 +3429,27 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
*/
static pm_call_target_node_t *
pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
+ pm_call_target_node_t *node = pm_call_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODE(target),
+ target->receiver,
+ target->call_operator_loc,
+ target->name,
+ target->message_loc
+ );
- *node = (pm_call_target_node_t) {
- {
- .type = PM_CALL_TARGET_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = target->base.location
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .name = target->name,
- .message_loc = target->message_loc
- };
+ /* It is possible to get here where we have parsed an invalid syntax tree
+ * where the call operator was not present. In that case we will have a
+ * problem because it is a required location. In this case we need to fill
+ * it in with a fake location so that the syntax tree remains valid. */
+ if (node->call_operator_loc.length == 0) {
+ node->call_operator_loc = target->base.location;
+ }
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3221,30 +3460,23 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
*/
static pm_index_target_node_t *
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
- pm_node_flags_t flags = target->base.flags;
-
pm_index_arguments_check(parser, target->arguments, target->block);
-
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_target_node_t) {
- {
- .type = PM_INDEX_TARGET_NODE,
- .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = target->base.location
- },
- .receiver = target->receiver,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_target_node_t *node = pm_index_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
+ PM_LOCATION_INIT_NODE(target),
+ target->receiver,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3254,23 +3486,15 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
*/
static pm_capture_pattern_node_t *
pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
- pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
-
- *node = (pm_capture_pattern_node_t) {
- {
- .type = PM_CAPTURE_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = target->base.location.end
- },
- },
- .value = value,
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_capture_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, target),
+ value,
+ target,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3278,36 +3502,28 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v
*/
static pm_case_node_t *
pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
-
- *node = (pm_case_node_t) {
- {
- .type = PM_CASE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
- .predicate = predicate,
- .else_clause = NULL,
- .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .conditions = { 0 }
- };
-
- return node;
+ return pm_case_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
+ predicate,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, case_keyword),
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
* Append a new condition to a CaseNode node.
*/
static void
-pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
+pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
- pm_node_list_append(&node->conditions, condition);
- node->base.location.end = condition->location.end;
+ pm_node_list_append(arena, &node->conditions, condition);
+ PM_NODE_LENGTH_SET_NODE(node, condition);
}
/**
@@ -3316,53 +3532,45 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
static void
pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
node->else_clause = else_clause;
- node->base.location.end = else_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
}
/**
* Set the end location for a CaseNode node.
*/
static void
-pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
* Allocate and initialize a new CaseMatchNode node.
*/
static pm_case_match_node_t *
-pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
-
- *node = (pm_case_match_node_t) {
- {
- .type = PM_CASE_MATCH_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
- .predicate = predicate,
- .else_clause = NULL,
- .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .conditions = { 0 }
- };
-
- return node;
+pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
+ return pm_case_match_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, case_keyword),
+ predicate,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, case_keyword),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Append a new condition to a CaseMatchNode node.
*/
static void
-pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
+pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
- pm_node_list_append(&node->conditions, condition);
- node->base.location.end = condition->location.end;
+ pm_node_list_append(arena, &node->conditions, condition);
+ PM_NODE_LENGTH_SET_NODE(node, condition);
}
/**
@@ -3371,16 +3579,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi
static void
pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
node->else_clause = else_clause;
- node->base.location.end = else_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
}
/**
* Set the end location for a CaseMatchNode node.
*/
static void
-pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
@@ -3388,25 +3596,20 @@ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_toke
*/
static pm_class_node_t *
pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
-
- *node = (pm_class_node_t) {
- {
- .type = PM_CLASS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = class_keyword->start, .end = end_keyword->end },
- },
- .locals = *locals,
- .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
- .constant_path = constant_path,
- .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
- .superclass = superclass,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_class_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+ *locals,
+ TOK2LOC(parser, class_keyword),
+ constant_path,
+ NTOK2LOC(parser, inheritance_operator),
+ superclass,
+ body,
+ TOK2LOC(parser, end_keyword),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -3415,24 +3618,17 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
static pm_class_variable_and_write_node_t *
pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
- *node = (pm_class_variable_and_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_class_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3440,25 +3636,17 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r
*/
static pm_class_variable_operator_write_node_t *
pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
-
- *node = (pm_class_variable_operator_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_class_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3467,24 +3655,17 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
static pm_class_variable_or_write_node_t *
pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
-
- *node = (pm_class_variable_or_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_class_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3493,18 +3674,14 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re
static pm_class_variable_read_node_t *
pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_CLASS_VARIABLE);
- pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
-
- *node = (pm_class_variable_read_node_t) {
- {
- .type = PM_CLASS_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
- return node;
+ return pm_class_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -3513,9 +3690,9 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token)
* a = *b
* a = 1, 2, 3
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
- if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
+ if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
return flags;
}
return 0;
@@ -3526,25 +3703,16 @@ pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
*/
static pm_class_variable_write_node_t *
pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
-
- *node = (pm_class_variable_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- },
- },
- .name = read_node->name,
- .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_class_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(read_node, value),
+ read_node->name,
+ read_node->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3553,23 +3721,16 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_
static pm_constant_path_and_write_node_t *
pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
-
- *node = (pm_constant_path_and_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_constant_path_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3577,24 +3738,16 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod
*/
static pm_constant_path_operator_write_node_t *
pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
-
- *node = (pm_constant_path_operator_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_constant_path_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3603,23 +3756,16 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
static pm_constant_path_or_write_node_t *
pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
- *node = (pm_constant_path_or_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_path_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3628,29 +3774,22 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
static pm_constant_path_node_t *
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
pm_assert_value_expression(parser, parent);
- pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
if (name_token->type == PM_TOKEN_CONSTANT) {
name = pm_parser_constant_id_token(parser, name_token);
}
- *node = (pm_constant_path_node_t) {
- {
- .type = PM_CONSTANT_PATH_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = parent == NULL ? delimiter->start : parent->location.start,
- .end = name_token->end
- },
- },
- .parent = parent,
- .name = name,
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
- };
-
- return node;
+ return pm_constant_path_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
+ parent,
+ name,
+ TOK2LOC(parser, delimiter),
+ TOK2LOC(parser, name_token)
+ );
}
/**
@@ -3658,24 +3797,15 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to
*/
static pm_constant_path_write_node_t *
pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
-
- *node = (pm_constant_path_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- },
- },
- .target = target,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_path_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3684,24 +3814,17 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t
static pm_constant_and_write_node_t *
pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
- *node = (pm_constant_and_write_node_t) {
- {
- .type = PM_CONSTANT_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3709,25 +3832,17 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *
*/
static pm_constant_operator_write_node_t *
pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
-
- *node = (pm_constant_operator_write_node_t) {
- {
- .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_constant_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3736,24 +3851,17 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
static pm_constant_or_write_node_t *
pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
-
- *node = (pm_constant_or_write_node_t) {
- {
- .type = PM_CONSTANT_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_constant_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3761,19 +3869,15 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t
*/
static pm_constant_read_node_t *
pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
- pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
-
- *node = (pm_constant_read_node_t) {
- {
- .type = PM_CONSTANT_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
+
+ return pm_constant_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -3781,25 +3885,16 @@ pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
*/
static pm_constant_write_node_t *
pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
-
- *node = (pm_constant_write_node_t) {
- {
- .type = PM_CONSTANT_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3810,7 +3905,7 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_BEGIN_NODE: {
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
- if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
break;
}
case PM_PARENTHESES_NODE: {
@@ -3865,65 +3960,45 @@ pm_def_node_create(
const pm_token_t *equal,
const pm_token_t *end_keyword
) {
- pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
- const uint8_t *end;
-
- if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
- end = body->location.end;
- } else {
- end = end_keyword->end;
- }
-
- if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
+ if (receiver != NULL) {
pm_def_node_receiver_check(parser, receiver);
}
- *node = (pm_def_node_t) {
- {
- .type = PM_DEF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = def_keyword->start, .end = end },
- },
- .name = name,
- .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
- .receiver = receiver,
- .parameters = parameters,
- .body = body,
- .locals = *locals,
- .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
- .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
- .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_def_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
+ name,
+ TOK2LOC(parser, name_loc),
+ receiver,
+ parameters,
+ body,
+ *locals,
+ TOK2LOC(parser, def_keyword),
+ NTOK2LOC(parser, operator),
+ NTOK2LOC(parser, lparen),
+ NTOK2LOC(parser, rparen),
+ NTOK2LOC(parser, equal),
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
* Allocate a new DefinedNode node.
*/
static pm_defined_node_t *
-pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
- pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
-
- *node = (pm_defined_node_t) {
- {
- .type = PM_DEFINED_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword_loc->start,
- .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
- },
- },
- .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
- .value = value,
- .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
- .keyword_loc = *keyword_loc
- };
-
- return node;
+pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
+ return pm_defined_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
+ NTOK2LOC(parser, lparen),
+ value,
+ NTOK2LOC(parser, rparen),
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -3931,29 +4006,15 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t
*/
static pm_else_node_t *
pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
- const uint8_t *end = NULL;
- if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
- end = statements->base.location.end;
- } else {
- end = end_keyword->end;
- }
-
- *node = (pm_else_node_t) {
- {
- .type = PM_ELSE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = else_keyword->start,
- .end = end,
- },
- },
- .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
- .statements = statements,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_else_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
+ TOK2LOC(parser, else_keyword),
+ statements,
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -3961,23 +4022,15 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat
*/
static pm_embedded_statements_node_t *
pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
-
- *node = (pm_embedded_statements_node_t) {
- {
- .type = PM_EMBEDDED_STATEMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .statements = statements,
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_embedded_statements_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ statements,
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -3985,22 +4038,14 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin
*/
static pm_embedded_variable_node_t *
pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
-
- *node = (pm_embedded_variable_node_t) {
- {
- .type = PM_EMBEDDED_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .variable = variable
- };
-
- return node;
+ return pm_embedded_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+ TOK2LOC(parser, operator),
+ variable
+ );
}
/**
@@ -4008,23 +4053,15 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator
*/
static pm_ensure_node_t *
pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
-
- *node = (pm_ensure_node_t) {
- {
- .type = PM_ENSURE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = ensure_keyword->start,
- .end = end_keyword->end
- },
- },
- .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
- .statements = statements,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_ensure_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
+ TOK2LOC(parser, ensure_keyword),
+ statements,
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4033,16 +4070,13 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_
static pm_false_node_t *
pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_FALSE);
- pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
- *node = (pm_false_node_t) {{
- .type = PM_FALSE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_false_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4051,50 +4085,31 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_find_pattern_node_t *
pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
-
+ assert(nodes->size >= 2);
pm_node_t *left = nodes->nodes[0];
- assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
- pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
-
- pm_node_t *right;
+ pm_node_t *right = nodes->nodes[nodes->size - 1];
- if (nodes->size == 1) {
- right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
- } else {
- right = nodes->nodes[nodes->size - 1];
- assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
- }
-
-#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
- // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
- // The resulting AST will anyway be ignored, but this file still needs to compile.
- pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
-#else
- pm_node_t *right_splat_node = right;
-#endif
- *node = (pm_find_pattern_node_t) {
- {
- .type = PM_FIND_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end,
- },
- },
- .constant = NULL,
- .left = left_splat_node,
- .right = right_splat_node,
- .requireds = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
+ assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
+ assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
+
+ pm_find_pattern_node_t *node = pm_find_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ NULL,
+ (pm_splat_node_t *) left,
+ ((pm_node_list_t) { 0 }),
+ (pm_splat_node_t *) right,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list to only point
// to 1...-1.
for (size_t index = 1; index < nodes->size - 1; index++) {
- pm_node_list_append(&node->requireds, nodes->nodes[index]);
+ pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
}
return node;
@@ -4111,7 +4126,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
// First, get a buffer of the content.
size_t length = (size_t) diff;
- char *buffer = xmalloc(sizeof(char) * (length + 1));
+ const size_t buffer_size = sizeof(char) * (length + 1);
+ char *buffer = xmalloc(buffer_size);
memcpy((void *) buffer, token->start, length);
// Next, determine if we need to replace the decimal point because of
@@ -4145,8 +4161,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
// This should never happen, because we've already checked that the token
// is in a valid format. However it's good to be safe.
if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
- xfree((void *) buffer);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
+ xfree_sized(buffer, buffer_size);
return 0.0;
}
@@ -4164,12 +4180,12 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
ellipsis = "";
}
- pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+ pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
}
// Finally we can free the buffer and return the value.
- xfree((void *) buffer);
+ xfree_sized(buffer, buffer_size);
return value;
}
@@ -4179,19 +4195,14 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
static pm_float_node_t *
pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT);
- pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
- *node = (pm_float_node_t) {
- {
- .type = PM_FLOAT_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .value = pm_double_parse(parser, token)
- };
-
- return node;
+ return pm_float_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_double_parse(parser, token)
+ );
}
/**
@@ -4201,22 +4212,17 @@ static pm_imaginary_node_t *
pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_float_node_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4226,17 +4232,14 @@ static pm_rational_node_t *
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
- pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
- *node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numerator = { 0 },
- .denominator = { 0 }
- };
+ pm_rational_node_t *node = pm_rational_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 }),
+ ((pm_integer_t) { 0 })
+ );
const uint8_t *start = token->start;
const uint8_t *end = token->end - 1; // r
@@ -4253,7 +4256,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
const uint8_t *point = memchr(start, '.', length);
assert(point && "should have a decimal point");
- uint8_t *digits = malloc(length);
+ uint8_t *digits = xmalloc(length);
if (digits == NULL) {
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
abort();
@@ -4263,12 +4266,18 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
+ size_t fract_length = 0;
+ for (const uint8_t *fract = point; fract < end; ++fract) {
+ if (*fract != '_') ++fract_length;
+ }
digits[0] = '1';
- if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
- pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
- free(digits);
+ if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
+ xfree_sized(digits, length);
pm_integers_reduce(&node->numerator, &node->denominator);
+ pm_integer_arena_move(parser->arena, &node->numerator);
+ pm_integer_arena_move(parser->arena, &node->denominator);
return node;
}
@@ -4280,22 +4289,17 @@ static pm_imaginary_node_t *
pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_float_node_rational_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4312,27 +4316,19 @@ pm_for_node_create(
const pm_token_t *do_keyword,
const pm_token_t *end_keyword
) {
- pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
-
- *node = (pm_for_node_t) {
- {
- .type = PM_FOR_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = for_keyword->start,
- .end = end_keyword->end
- },
- },
- .index = index,
- .collection = collection,
- .statements = statements,
- .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
- .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_for_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
+ index,
+ collection,
+ statements,
+ TOK2LOC(parser, for_keyword),
+ TOK2LOC(parser, in_keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4341,15 +4337,13 @@ pm_for_node_create(
static pm_forwarding_arguments_node_t *
pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
-
- *node = (pm_forwarding_arguments_node_t) {{
- .type = PM_FORWARDING_ARGUMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_forwarding_arguments_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4358,15 +4352,13 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token
static pm_forwarding_parameter_node_t *
pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
-
- *node = (pm_forwarding_parameter_node_t) {{
- .type = PM_FORWARDING_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_forwarding_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4376,26 +4368,20 @@ static pm_forwarding_super_node_t *
pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
assert(token->type == PM_TOKEN_KEYWORD_SUPER);
- pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
pm_block_node_t *block = NULL;
if (arguments->block != NULL) {
block = (pm_block_node_t *) arguments->block;
}
- *node = (pm_forwarding_super_node_t) {
- {
- .type = PM_FORWARDING_SUPER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = token->start,
- .end = block != NULL ? block->base.location.end : token->end
- },
- },
- .block = block
- };
-
- return node;
+ return pm_forwarding_super_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ block
+ );
}
/**
@@ -4404,25 +4390,17 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
- *node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .constant = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .elements = { 0 },
- .rest = NULL
- };
-
- return node;
+ return pm_hash_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -4430,46 +4408,36 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
- pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
- const uint8_t *start;
- const uint8_t *end;
+ uint32_t start;
+ uint32_t end;
if (elements->size > 0) {
if (rest) {
- start = elements->nodes[0]->location.start;
- end = rest->location.end;
+ start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
+ end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
} else {
- start = elements->nodes[0]->location.start;
- end = elements->nodes[elements->size - 1]->location.end;
+ start = PM_NODE_START(elements->nodes[0]);
+ end = PM_NODE_END(elements->nodes[elements->size - 1]);
}
} else {
assert(rest != NULL);
- start = rest->location.start;
- end = rest->location.end;
- }
-
- *node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = start,
- .end = end
- },
- },
- .constant = NULL,
- .elements = { 0 },
- .rest = rest,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- pm_node_t *element;
- PM_NODE_LIST_FOREACH(elements, index, element) {
- pm_node_list_append(&node->elements, element);
- }
+ start = PM_NODE_START(rest);
+ end = PM_NODE_END(rest);
+ }
+
+ pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ rest,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
+ pm_node_list_concat(parser->arena, &node->elements, elements);
return node;
}
@@ -4486,7 +4454,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
case PM_NUMBERED_REFERENCE_READ_NODE:
// This will only ever happen in the event of a syntax error, but we
// still need to provide something for the node.
- return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
default:
assert(false && "unreachable");
return (pm_constant_id_t) -1;
@@ -4499,24 +4467,17 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
static pm_global_variable_and_write_node_t *
pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
-
- *node = (pm_global_variable_and_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_global_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -4524,25 +4485,17 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_operator_write_node_t *
pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
-
- *node = (pm_global_variable_operator_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_global_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -4551,24 +4504,17 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
static pm_global_variable_or_write_node_t *
pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
- *node = (pm_global_variable_or_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_global_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -4576,18 +4522,13 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_read_node_t *
pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
- *node = (pm_global_variable_read_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_global_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -4595,18 +4536,13 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static pm_global_variable_read_node_t *
pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
- pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
- *node = (pm_global_variable_read_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .name = name
- };
-
- return node;
+ return pm_global_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ name
+ );
}
/**
@@ -4614,25 +4550,16 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
*/
static pm_global_variable_write_node_t *
pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
- *node = (pm_global_variable_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- },
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = PM_LOCATION_NODE_VALUE(target),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_global_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -4640,21 +4567,16 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con
*/
static pm_global_variable_write_node_t *
pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
- pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
- *node = (pm_global_variable_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .name = name,
- .name_loc = PM_LOCATION_NULL_VALUE(parser),
- .operator_loc = PM_LOCATION_NULL_VALUE(parser),
- .value = value
- };
-
- return node;
+ return pm_global_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ name,
+ ((pm_location_t) { 0 }),
+ value,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -4663,29 +4585,24 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
static pm_hash_node_t *
pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
assert(opening != NULL);
- pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
-
- *node = (pm_hash_node_t) {
- {
- .type = PM_HASH_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_NULL_VALUE(parser),
- .elements = { 0 }
- };
- return node;
+ return pm_hash_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Append a new element to a hash node.
*/
-static inline void
-pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
- pm_node_list_append(&hash->elements, element);
+static PRISM_INLINE void
+pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
+ pm_node_list_append(arena, &hash->elements, element);
bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
if (static_literal) {
@@ -4696,14 +4613,14 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
}
if (!static_literal) {
- pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
}
}
-static inline void
-pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
- hash->base.location.end = token->end;
- hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
+static PRISM_INLINE void
+pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
+ hash->closing_loc = TOK2LOC(parser, token);
}
/**
@@ -4719,38 +4636,32 @@ pm_if_node_create(pm_parser_t *parser,
const pm_token_t *end_keyword
) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
- const uint8_t *end;
- if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
- end = end_keyword->end;
+ uint32_t start = PM_TOKEN_START(parser, if_keyword);
+ uint32_t end;
+
+ if (end_keyword != NULL) {
+ end = PM_TOKEN_END(parser, end_keyword);
} else if (subsequent != NULL) {
- end = subsequent->location.end;
+ end = PM_NODE_END(subsequent);
} else if (pm_statements_node_body_length(statements) != 0) {
- end = statements->base.location.end;
+ end = PM_NODE_END(statements);
} else {
- end = predicate->location.end;
- }
-
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = if_keyword->start,
- .end = end
- },
- },
- .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
- .statements = statements,
- .subsequent = subsequent,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ end = PM_NODE_END(predicate);
+ }
+
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ TOK2LOC(parser, if_keyword),
+ predicate,
+ NTOK2LOC(parser, then_keyword),
+ statements,
+ subsequent,
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4759,30 +4670,22 @@ pm_if_node_create(pm_parser_t *parser,
static pm_if_node_t *
pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, statement, true);
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
- .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .statements = statements,
- .subsequent = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(statement, predicate),
+ TOK2LOC(parser, if_keyword),
+ predicate,
+ ((pm_location_t) { 0 }),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -4799,43 +4702,31 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to
pm_statements_node_t *else_statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, else_statements, false_expression, true);
- pm_token_t end_keyword = not_provided(parser);
- pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
-
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
-
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = predicate->location.start,
- .end = false_expression->location.end,
- },
- },
- .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
- .statements = if_statements,
- .subsequent = (pm_node_t *) else_node,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
-
+ pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(predicate, false_expression),
+ ((pm_location_t) { 0 }),
+ predicate,
+ TOK2LOC(parser, qmark),
+ if_statements,
+ UP(else_node),
+ ((pm_location_t) { 0 })
+ );
}
-static inline void
-pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
- node->base.location.end = keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+ node->end_keyword_loc = TOK2LOC(parser, keyword);
}
-static inline void
-pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
- node->base.location.end = keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+ node->end_keyword_loc = TOK2LOC(parser, keyword);
}
/**
@@ -4843,18 +4734,13 @@ pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword
*/
static pm_implicit_node_t *
pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
- pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
-
- *node = (pm_implicit_node_t) {
- {
- .type = PM_IMPLICIT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = value->location
- },
- .value = value
- };
-
- return node;
+ return pm_implicit_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(value),
+ value
+ );
}
/**
@@ -4864,17 +4750,12 @@ static pm_implicit_rest_node_t *
pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_COMMA);
- pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
-
- *node = (pm_implicit_rest_node_t) {
- {
- .type = PM_IMPLICIT_REST_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }
- };
-
- return node;
+ return pm_implicit_rest_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4883,28 +4764,33 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
static pm_integer_node_t *
pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER);
- pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
- *node = (pm_integer_node_t) {
- {
- .type = PM_INTEGER_NODE,
- .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .value = { 0 }
- };
+ pm_integer_node_t *node = pm_integer_node_new(
+ parser->arena,
+ ++parser->node_id,
+ base | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 })
+ );
- pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
- switch (base) {
- case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
- case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
- case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
- case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
- default: assert(false && "unreachable"); break;
+ if (parser->integer.lexed) {
+ // The value was already computed during lexing.
+ node->value.value = parser->integer.value;
+ parser->integer.lexed = false;
+ } else {
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+ switch (base) {
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+ default: assert(false && "unreachable"); break;
+ }
+
+ pm_integer_parse(&node->value, integer_base, token->start, token->end);
+ pm_integer_arena_move(parser->arena, &node->value);
}
- pm_integer_parse(&node->value, integer_base, token->start, token->end);
return node;
}
@@ -4916,22 +4802,17 @@ static pm_imaginary_node_t *
pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_integer_node_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4942,17 +4823,14 @@ static pm_rational_node_t *
pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
- pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
- *node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numerator = { 0 },
- .denominator = { .value = 1, 0 }
- };
+ pm_rational_node_t *node = pm_rational_node_new(
+ parser->arena,
+ ++parser->node_id,
+ base | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 }),
+ ((pm_integer_t) { .value = 1 })
+ );
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
switch (base) {
@@ -4964,6 +4842,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
}
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+ pm_integer_arena_move(parser->arena, &node->numerator);
return node;
}
@@ -4976,22 +4855,17 @@ static pm_imaginary_node_t *
pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4999,33 +4873,27 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b
*/
static pm_in_node_t *
pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
- pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
+ uint32_t start = PM_TOKEN_START(parser, in_keyword);
+ uint32_t end;
- const uint8_t *end;
if (statements != NULL) {
- end = statements->base.location.end;
- } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
- end = then_keyword->end;
+ end = PM_NODE_END(statements);
+ } else if (then_keyword != NULL) {
+ end = PM_TOKEN_END(parser, then_keyword);
} else {
- end = pattern->location.end;
- }
-
- *node = (pm_in_node_t) {
- {
- .type = PM_IN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = in_keyword->start,
- .end = end
- },
- },
- .pattern = pattern,
- .statements = statements,
- .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
- .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
- };
-
- return node;
+ end = PM_NODE_END(pattern);
+ }
+
+ return pm_in_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ pattern,
+ statements,
+ TOK2LOC(parser, in_keyword),
+ NTOK2LOC(parser, then_keyword)
+ );
}
/**
@@ -5034,24 +4902,17 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t
static pm_instance_variable_and_write_node_t *
pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
- *node = (pm_instance_variable_and_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_instance_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -5059,25 +4920,17 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari
*/
static pm_instance_variable_operator_write_node_t *
pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
-
- *node = (pm_instance_variable_operator_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_instance_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -5086,24 +4939,17 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
static pm_instance_variable_or_write_node_t *
pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
-
- *node = (pm_instance_variable_or_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_instance_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -5112,18 +4958,14 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia
static pm_instance_variable_read_node_t *
pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
- pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
-
- *node = (pm_instance_variable_read_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
- return node;
+ return pm_instance_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -5132,24 +4974,16 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok
*/
static pm_instance_variable_write_node_t *
pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
- *node = (pm_instance_variable_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- }
- },
- .name = read_node->name,
- .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_instance_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(read_node, value),
+ read_node->name,
+ read_node->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5158,7 +4992,7 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
* literals.
*/
static void
-pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
switch (PM_NODE_TYPE(part)) {
case PM_STRING_NODE:
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
@@ -5186,14 +5020,14 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
break;
}
case PM_EMBEDDED_VARIABLE_NODE:
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
break;
default:
assert(false && "unexpected node type");
break;
}
- pm_node_list_append(parts, part);
+ pm_node_list_append(arena, parts, part);
}
/**
@@ -5201,43 +5035,34 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
*/
static pm_interpolated_regular_expression_node_t *
pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
-
- *node = (pm_interpolated_regular_expression_node_t) {
- {
- .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = NULL,
- },
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .parts = { 0 }
- };
-
- return node;
+ return pm_interpolated_regular_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening)
+ );
}
-static inline void
-pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
- if (node->base.location.start > part->location.start) {
- node->base.location.start = part->location.start;
+static PRISM_INLINE void
+pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
+ if (PM_NODE_START(node) > PM_NODE_START(part)) {
+ PM_NODE_START_SET_NODE(node, part);
}
- if (node->base.location.end < part->location.end) {
- node->base.location.end = part->location.end;
+ if (PM_NODE_END(node) < PM_NODE_END(part)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
}
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
}
-static inline void
+static PRISM_INLINE void
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
- pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
}
/**
@@ -5249,7 +5074,7 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
* single static literal string that can be pushed onto the stack on its own.
* Note that this doesn't necessarily mean that the string will be frozen or
- * not; the instructions in CRuby will be either putobject or putstring,
+ * not; the instructions in CRuby will be either putobject, dupstring or dupchilledstring,
* depending on the combination of `--enable-frozen-string-literal`,
* `# frozen_string_literal: true`, and whether or not there is interpolation.
*
@@ -5263,22 +5088,31 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
* is necessary to indicate that the string should be left up to the runtime,
* which could potentially use a chilled string otherwise.
*/
-static inline void
-pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+static PRISM_INLINE void
+pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
+ pm_arena_t *arena = parser->arena;
#define CLEAR_FLAGS(node) \
- node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+ node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
#define MUTABLE_FLAGS(node) \
- node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+ node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
- if (node->parts.size == 0 && node->opening_loc.start == NULL) {
- node->base.location.start = part->location.start;
+ if (node->parts.size == 0 && node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, part);
}
- node->base.location.end = MAX(node->base.location.end, part->location.end);
+ if (PM_NODE_END(part) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
+ }
switch (PM_NODE_TYPE(part)) {
case PM_STRING_NODE:
+ // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
+ // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
+ // as long as this interpolation only consists of other string literals.
+ if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+ }
part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
break;
case PM_INTERPOLATED_STRING_NODE:
@@ -5328,12 +5162,24 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
// should clear the mutability flags.
CLEAR_FLAGS(node);
break;
+ case PM_X_STRING_NODE:
+ case PM_INTERPOLATED_X_STRING_NODE:
+ case PM_SYMBOL_NODE:
+ case PM_INTERPOLATED_SYMBOL_NODE:
+ // These will only happen in error cases. But we want to handle it
+ // here so that we don't fail the assertion.
+ CLEAR_FLAGS(node);
+ pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
+ return;
+ case PM_ERROR_RECOVERY_NODE:
+ CLEAR_FLAGS(node);
+ break;
default:
assert(false && "unexpected node type");
break;
}
- pm_node_list_append(&node->parts, part);
+ pm_node_list_append(arena, &node->parts, part);
#undef CLEAR_FLAGS
#undef MUTABLE_FLAGS
@@ -5344,7 +5190,6 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
*/
static pm_interpolated_string_node_t *
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
switch (parser->frozen_string_literal) {
@@ -5356,25 +5201,23 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
break;
}
- *node = (pm_interpolated_string_node_t) {
- {
- .type = PM_INTERPOLATED_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
+ uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+ pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, closing)
+ );
if (parts != NULL) {
pm_node_t *part;
PM_NODE_LIST_FOREACH(parts, index, part) {
- pm_interpolated_string_node_append(node, part);
+ pm_interpolated_string_node_append(parser, node, part);
}
}
@@ -5385,25 +5228,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
* Set the closing token of the given InterpolatedStringNode node.
*/
static void
-pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
static void
-pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
- if (node->parts.size == 0 && node->opening_loc.start == NULL) {
- node->base.location.start = part->location.start;
+pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
+ if (node->parts.size == 0 && node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, part);
}
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
- node->base.location.end = MAX(node->base.location.end, part->location.end);
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+
+ if (PM_NODE_END(part) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
+ }
}
static void
-pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
/**
@@ -5411,27 +5257,23 @@ pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node,
*/
static pm_interpolated_symbol_node_t *
pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
-
- *node = (pm_interpolated_symbol_node_t) {
- {
- .type = PM_INTERPOLATED_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
+ uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+ pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, closing)
+ );
if (parts != NULL) {
pm_node_t *part;
PM_NODE_LIST_FOREACH(parts, index, part) {
- pm_interpolated_symbol_node_append(node, part);
+ pm_interpolated_symbol_node_append(parser->arena, node, part);
}
}
@@ -5443,35 +5285,27 @@ pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *openin
*/
static pm_interpolated_x_string_node_t *
pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
-
- *node = (pm_interpolated_x_string_node_t) {
- {
- .type = PM_INTERPOLATED_X_STRING_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
-
- return node;
+ return pm_interpolated_x_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, closing)
+ );
}
-static inline void
-pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
- node->base.location.end = part->location.end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+ PM_NODE_LENGTH_SET_NODE(node, part);
}
-static inline void
-pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
/**
@@ -5479,17 +5313,12 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
*/
static pm_it_local_variable_read_node_t *
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
-
- *node = (pm_it_local_variable_read_node_t) {
- {
- .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- }
- };
-
- return node;
+ return pm_it_local_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name)
+ );
}
/**
@@ -5497,20 +5326,12 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam
*/
static pm_it_parameters_node_t *
pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
-
- *node = (pm_it_parameters_node_t) {
- {
- .type = PM_IT_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- }
- };
-
- return node;
+ return pm_it_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing)
+ );
}
/**
@@ -5518,37 +5339,31 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con
*/
static pm_keyword_hash_node_t *
pm_keyword_hash_node_create(pm_parser_t *parser) {
- pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
-
- *node = (pm_keyword_hash_node_t) {
- .base = {
- .type = PM_KEYWORD_HASH_NODE,
- .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- },
- .elements = { 0 }
- };
-
- return node;
+ return pm_keyword_hash_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
* Append an element to a KeywordHashNode node.
*/
static void
-pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
+pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
// If the element being added is not an AssocNode or does not have a symbol
// key, then we want to turn the SYMBOL_KEYS flag off.
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
- pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
+ pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
}
- pm_node_list_append(&hash->elements, element);
- if (hash->base.location.start == NULL) {
- hash->base.location.start = element->location.start;
+ pm_node_list_append(arena, &hash->elements, element);
+ if (PM_NODE_LENGTH(hash) == 0) {
+ PM_NODE_START_SET_NODE(hash, element);
}
- hash->base.location.end = element->location.end;
+ PM_NODE_LENGTH_SET_NODE(hash, element);
}
/**
@@ -5556,22 +5371,14 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el
*/
static pm_required_keyword_parameter_node_t *
pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
-
- *node = (pm_required_keyword_parameter_node_t) {
- {
- .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = name->end
- },
- },
- .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- };
-
- return node;
+ return pm_required_keyword_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+ TOK2LOC(parser, name)
+ );
}
/**
@@ -5579,23 +5386,15 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_optional_keyword_parameter_node_t *
pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
- pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
-
- *node = (pm_optional_keyword_parameter_node_t) {
- {
- .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = value->location.end
- },
- },
- .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- .value = value
- };
-
- return node;
+ return pm_optional_keyword_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+ pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+ TOK2LOC(parser, name),
+ value
+ );
}
/**
@@ -5603,23 +5402,15 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_keyword_rest_parameter_node_t *
pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
-
- *node = (pm_keyword_rest_parameter_node_t) {
- {
- .type = PM_KEYWORD_REST_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_keyword_rest_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5635,26 +5426,18 @@ pm_lambda_node_create(
pm_node_t *parameters,
pm_node_t *body
) {
- pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
-
- *node = (pm_lambda_node_t) {
- {
- .type = PM_LAMBDA_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = closing->end
- },
- },
- .locals = *locals,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .parameters = parameters,
- .body = body
- };
-
- return node;
+ return pm_lambda_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, closing),
+ *locals,
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing),
+ parameters,
+ body
+ );
}
/**
@@ -5664,25 +5447,18 @@ static pm_local_variable_and_write_node_t *
pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
-
- *node = (pm_local_variable_and_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .depth = depth
- };
- return node;
+ return pm_local_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ depth
+ );
}
/**
@@ -5690,26 +5466,18 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_local_variable_operator_write_node_t *
pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
- pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
-
- *node = (pm_local_variable_operator_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .depth = depth
- };
-
- return node;
+ return pm_local_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ depth
+ );
}
/**
@@ -5719,25 +5487,18 @@ static pm_local_variable_or_write_node_t *
pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
-
- *node = (pm_local_variable_or_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .depth = depth
- };
- return node;
+ return pm_local_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ depth
+ );
}
/**
@@ -5747,19 +5508,14 @@ static pm_local_variable_read_node_t *
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
- pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
-
- *node = (pm_local_variable_read_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
- .name = name_id,
- .depth = depth
- };
-
- return node;
+ return pm_local_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ name_id,
+ depth
+ );
}
/**
@@ -5786,32 +5542,23 @@ pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t
*/
static pm_local_variable_write_node_t *
pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
- pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
-
- *node = (pm_local_variable_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name_loc->start,
- .end = value->location.end
- }
- },
- .name = name,
- .depth = depth,
- .value = value,
- .name_loc = *name_loc,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_local_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
+ name,
+ depth,
+ *name_loc,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
* Returns true if the given bounds comprise `it`.
*/
-static inline bool
+static PRISM_INLINE bool
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
}
@@ -5820,19 +5567,24 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
* Returns true if the given bounds comprise a numbered parameter (i.e., they
* are of the form /^_\d$/).
*/
-static inline bool
-pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
- return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
+static PRISM_INLINE bool
+pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
+ return (
+ (length == 2) &&
+ (parser->start[start] == '_') &&
+ (parser->start[start + 1] != '0') &&
+ pm_char_is_decimal_digit(parser->start[start + 1])
+ );
}
/**
* Ensure the given bounds do not comprise a numbered parameter. If they do, add
* an appropriate error message to the parser.
*/
-static inline void
-pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- if (pm_token_is_numbered_parameter(start, end)) {
- PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
+static PRISM_INLINE void
+pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ if (pm_token_is_numbered_parameter(parser, start, length)) {
+ PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
}
}
@@ -5842,20 +5594,16 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
*/
static pm_local_variable_target_node_t *
pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
- pm_refute_numbered_parameter(parser, location->start, location->end);
- pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
-
- *node = (pm_local_variable_target_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_TARGET_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = *location
- },
- .name = name,
- .depth = depth
- };
-
- return node;
+ pm_refute_numbered_parameter(parser, location->start, location->length);
+
+ return pm_local_variable_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = location->start, .length = location->length }),
+ name,
+ depth
+ );
}
/**
@@ -5865,23 +5613,15 @@ static pm_match_predicate_node_t *
pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
-
- *node = (pm_match_predicate_node_t) {
- {
- .type = PM_MATCH_PREDICATE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
- .value = value,
- .pattern = pattern,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_match_predicate_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, pattern),
+ value,
+ pattern,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5891,23 +5631,15 @@ static pm_match_required_node_t *
pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
-
- *node = (pm_match_required_node_t) {
- {
- .type = PM_MATCH_REQUIRED_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
- .value = value,
- .pattern = pattern,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_match_required_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, pattern),
+ value,
+ pattern,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5915,19 +5647,14 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *
*/
static pm_match_write_node_t *
pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
- pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
-
- *node = (pm_match_write_node_t) {
- {
- .type = PM_MATCH_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = call->base.location
- },
- .call = call,
- .targets = { 0 }
- };
-
- return node;
+ return pm_match_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(call),
+ call,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -5935,26 +5662,18 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
*/
static pm_module_node_t *
pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
-
- *node = (pm_module_node_t) {
- {
- .type = PM_MODULE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = module_keyword->start,
- .end = end_keyword->end
- }
- },
- .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
- .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
- .constant_path = constant_path,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_module_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
+ (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
+ TOK2LOC(parser, module_keyword),
+ constant_path,
+ body,
+ TOK2LOC(parser, end_keyword),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -5962,22 +5681,17 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const
*/
static pm_multi_target_node_t *
pm_multi_target_node_create(pm_parser_t *parser) {
- pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
-
- *node = (pm_multi_target_node_t) {
- {
- .type = PM_MULTI_TARGET_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = NULL, .end = NULL }
- },
- .lefts = { 0 },
- .rest = NULL,
- .rights = { 0 },
- .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_multi_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -5990,27 +5704,27 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
node->rest = target;
} else {
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
- pm_node_list_append(&node->rights, target);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
if (node->rest == NULL) {
node->rest = target;
} else {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
- pm_node_list_append(&node->rights, target);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
} else if (node->rest == NULL) {
- pm_node_list_append(&node->lefts, target);
+ pm_node_list_append(parser->arena, &node->lefts, target);
} else {
- pm_node_list_append(&node->rights, target);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
- if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
- node->base.location.start = target->location.start;
+ if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
+ PM_NODE_START_SET_NODE(node, target);
}
- if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
- node->base.location.end = target->location.end;
+ if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
+ PM_NODE_LENGTH_SET_NODE(node, target);
}
}
@@ -6018,18 +5732,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
* Set the opening of a MultiTargetNode node.
*/
static void
-pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
- node->base.location.start = lparen->start;
- node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
+pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
+ PM_NODE_START_SET_TOKEN(parser, node, lparen);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
+ node->lparen_loc = TOK2LOC(parser, lparen);
}
/**
* Set the closing of a MultiTargetNode node.
*/
static void
-pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
- node->base.location.end = rparen->end;
- node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
+pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
+ node->rparen_loc = TOK2LOC(parser, rparen);
}
/**
@@ -6037,32 +5752,21 @@ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t
*/
static pm_multi_write_node_t *
pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
-
- *node = (pm_multi_write_node_t) {
- {
- .type = PM_MULTI_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .lefts = target->lefts,
- .rest = target->rest,
- .rights = target->rights,
- .lparen_loc = target->lparen_loc,
- .rparen_loc = target->rparen_loc,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- // Explicitly do not call pm_node_destroy here because we want to keep
- // around all of the information within the MultiWriteNode node.
- xfree(target);
-
- return node;
+ /* The target is no longer necessary because we have reused its children. It
+ * is arena-allocated so no explicit free is needed. */
+ return pm_multi_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->lefts,
+ target->rest,
+ target->rights,
+ target->lparen_loc,
+ target->rparen_loc,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -6071,22 +5775,15 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
static pm_next_node_t *
pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
- pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
- *node = (pm_next_node_t) {
- {
- .type = PM_NEXT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .arguments = arguments
- };
-
- return node;
+ return pm_next_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ arguments,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -6095,16 +5792,31 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments
static pm_nil_node_t *
pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_NIL);
- pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
- *node = (pm_nil_node_t) {{
- .type = PM_NIL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ return pm_nil_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
+}
- return node;
+/**
+ * Allocate and initialize a new NoKeywordsParameterNode node.
+ */
+static pm_no_block_parameter_node_t *
+pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
+ assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
+ assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
+
+ return pm_no_block_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -6114,41 +5826,29 @@ static pm_no_keywords_parameter_node_t *
pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
- pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
- *node = (pm_no_keywords_parameter_node_t) {
- {
- .type = PM_NO_KEYWORDS_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = keyword->end
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
-
- return node;
+ return pm_no_keywords_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, keyword)
+ );
}
/**
* Allocate and initialize a new NumberedParametersNode node.
*/
static pm_numbered_parameters_node_t *
-pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
- pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
-
- *node = (pm_numbered_parameters_node_t) {
- {
- .type = PM_NUMBERED_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = *location
- },
- .maximum = maximum
- };
-
- return node;
+pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
+ return pm_numbered_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ maximum
+ );
}
/**
@@ -6184,14 +5884,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
unsigned long value = strtoul(digits, &endptr, 10);
if ((digits == endptr) || (*endptr != '\0')) {
- pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+ pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
value = 0;
}
- xfree(digits);
+ xfree_sized(digits, sizeof(char) * (length + 1));
if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
- PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
+ PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
value = 0;
}
@@ -6206,18 +5906,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
static pm_numbered_reference_read_node_t *
pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
- pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
-
- *node = (pm_numbered_reference_read_node_t) {
- {
- .type = PM_NUMBERED_REFERENCE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .number = pm_numbered_reference_read_node_number(parser, name)
- };
- return node;
+ return pm_numbered_reference_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_numbered_reference_read_node_number(parser, name)
+ );
}
/**
@@ -6225,24 +5921,16 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na
*/
static pm_optional_parameter_node_t *
pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
- pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
-
- *node = (pm_optional_parameter_node_t) {
- {
- .type = PM_OPTIONAL_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = value->location.end
- }
- },
- .name = pm_parser_constant_id_token(parser, name),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_optional_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+ pm_parser_constant_id_token(parser, name),
+ TOK2LOC(parser, name),
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -6252,23 +5940,15 @@ static pm_or_node_t *
pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
-
- *node = (pm_or_node_t) {
- {
- .type = PM_OR_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- }
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_or_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6276,24 +5956,19 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat
*/
static pm_parameters_node_t *
pm_parameters_node_create(pm_parser_t *parser) {
- pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
-
- *node = (pm_parameters_node_t) {
- {
- .type = PM_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
- },
- .rest = NULL,
- .keyword_rest = NULL,
- .block = NULL,
- .requireds = { 0 },
- .optionals = { 0 },
- .posts = { 0 },
- .keywords = { 0 }
- };
-
- return node;
+ return pm_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ NULL
+ );
}
/**
@@ -6301,16 +5976,12 @@ pm_parameters_node_create(pm_parser_t *parser) {
*/
static void
pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
- if (params->base.location.start == NULL) {
- params->base.location.start = param->location.start;
- } else {
- params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
+ if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
+ PM_NODE_START_SET_NODE(params, param);
}
- if (params->base.location.end == NULL) {
- params->base.location.end = param->location.end;
- } else {
- params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
+ if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
+ PM_NODE_LENGTH_SET_NODE(params, param);
}
}
@@ -6318,27 +5989,27 @@ pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param)
* Append a required parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->requireds, param);
+ pm_node_list_append(arena, &params->requireds, param);
}
/**
* Append an optional parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
- pm_parameters_node_location_set(params, (pm_node_t *) param);
- pm_node_list_append(&params->optionals, (pm_node_t *) param);
+pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
+ pm_parameters_node_location_set(params, UP(param));
+ pm_node_list_append(arena, &params->optionals, UP(param));
}
/**
* Append a post optional arguments parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->posts, param);
+ pm_node_list_append(arena, &params->posts, param);
}
/**
@@ -6354,9 +6025,9 @@ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
* Append a keyword parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->keywords, param);
+ pm_node_list_append(arena, &params->keywords, param);
}
/**
@@ -6373,9 +6044,9 @@ pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *par
* Set the block parameter on a ParametersNode node.
*/
static void
-pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
+pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
assert(params->block == NULL);
- pm_parameters_node_location_set(params, (pm_node_t *) param);
+ pm_parameters_node_location_set(params, param);
params->block = param;
}
@@ -6384,46 +6055,30 @@ pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_no
*/
static pm_program_node_t *
pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
- pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
-
- *node = (pm_program_node_t) {
- {
- .type = PM_PROGRAM_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements == NULL ? parser->start : statements->base.location.start,
- .end = statements == NULL ? parser->end : statements->base.location.end
- }
- },
- .locals = *locals,
- .statements = statements
- };
-
- return node;
+ return pm_program_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(statements),
+ *locals,
+ statements
+ );
}
/**
* Allocate and initialize new ParenthesesNode node.
*/
static pm_parentheses_node_t *
-pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
- pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
-
- *node = (pm_parentheses_node_t) {
- {
- .type = PM_PARENTHESES_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .body = body,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
+ return pm_parentheses_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ body,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6431,24 +6086,16 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no
*/
static pm_pinned_expression_node_t *
pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
- pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
-
- *node = (pm_pinned_expression_node_t) {
- {
- .type = PM_PINNED_EXPRESSION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = rparen->end
- }
- },
- .expression = expression,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
- .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
- };
-
- return node;
+ return pm_pinned_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
+ expression,
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, lparen),
+ TOK2LOC(parser, rparen)
+ );
}
/**
@@ -6456,22 +6103,14 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con
*/
static pm_pinned_variable_node_t *
pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
-
- *node = (pm_pinned_variable_node_t) {
- {
- .type = PM_PINNED_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
- .variable = variable,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_pinned_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+ variable,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6479,24 +6118,16 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator,
*/
static pm_post_execution_node_t *
pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
-
- *node = (pm_post_execution_node_t) {
- {
- .type = PM_POST_EXECUTION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
- .statements = statements,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_post_execution_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ statements,
+ TOK2LOC(parser, keyword),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6504,24 +6135,16 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co
*/
static pm_pre_execution_node_t *
pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
-
- *node = (pm_pre_execution_node_t) {
- {
- .type = PM_PRE_EXECUTION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
- .statements = statements,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_pre_execution_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ statements,
+ TOK2LOC(parser, keyword),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6531,8 +6154,6 @@ static pm_range_node_t *
pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
pm_assert_value_expression(parser, right);
-
- pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
pm_node_flags_t flags = 0;
// Indicate that this node is an exclusive range if the operator is `...`.
@@ -6550,22 +6171,18 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
flags |= PM_NODE_FLAG_STATIC_LITERAL;
}
- *node = (pm_range_node_t) {
- {
- .type = PM_RANGE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (left == NULL ? operator->start : left->location.start),
- .end = (right == NULL ? operator->end : right->location.end)
- }
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
+ uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
+ uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
- return node;
+ return pm_range_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6574,15 +6191,13 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
static pm_redo_node_t *
pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_REDO);
- pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
- *node = (pm_redo_node_t) {{
- .type = PM_REDO_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_redo_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6591,31 +6206,22 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_regular_expression_node_t *
pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
-
- *node = (pm_regular_expression_node_t) {
- {
- .type = PM_REGULAR_EXPRESSION_NODE,
- .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = MIN(opening->start, closing->start),
- .end = MAX(opening->end, closing->end)
- }
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ return pm_regular_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ TOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate a new initialize a new RegularExpressionNode node.
*/
-static inline pm_regular_expression_node_t *
+static PRISM_INLINE pm_regular_expression_node_t *
pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
}
@@ -6625,18 +6231,13 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_required_parameter_node_t *
pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
- pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
-
- *node = (pm_required_parameter_node_t) {
- {
- .type = PM_REQUIRED_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
-
- return node;
+ return pm_required_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -6644,23 +6245,15 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token)
*/
static pm_rescue_modifier_node_t *
pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
- pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
-
- *node = (pm_rescue_modifier_node_t) {
- {
- .type = PM_RESCUE_MODIFIER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = expression->location.start,
- .end = rescue_expression->location.end
- }
- },
- .expression = expression,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .rescue_expression = rescue_expression
- };
-
- return node;
+ return pm_rescue_modifier_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(expression, rescue_expression),
+ expression,
+ TOK2LOC(parser, keyword),
+ rescue_expression
+ );
}
/**
@@ -6668,28 +6261,24 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const
*/
static pm_rescue_node_t *
pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
-
- *node = (pm_rescue_node_t) {
- {
- .type = PM_RESCUE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(keyword)
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .reference = NULL,
- .statements = NULL,
- .subsequent = NULL,
- .exceptions = { 0 }
- };
-
- return node;
+ return pm_rescue_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, keyword),
+ TOK2LOC(parser, keyword),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL,
+ ((pm_location_t) { 0 }),
+ NULL,
+ NULL
+ );
}
-static inline void
-pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
- node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+static PRISM_INLINE void
+pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
+ node->operator_loc = TOK2LOC(parser, operator);
}
/**
@@ -6698,7 +6287,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator)
static void
pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
node->reference = reference;
- node->base.location.end = reference->location.end;
+ PM_NODE_LENGTH_SET_NODE(node, reference);
}
/**
@@ -6708,7 +6297,7 @@ static void
pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
node->statements = statements;
if (pm_statements_node_body_length(statements) > 0) {
- node->base.location.end = statements->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, statements);
}
}
@@ -6718,16 +6307,16 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat
static void
pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
node->subsequent = subsequent;
- node->base.location.end = subsequent->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, subsequent);
}
/**
* Append an exception node to a rescue node, and update the location.
*/
static void
-pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
- pm_node_list_append(&node->exceptions, exception);
- node->base.location.end = exception->location.end;
+pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
+ pm_node_list_append(arena, &node->exceptions, exception);
+ PM_NODE_LENGTH_SET_NODE(node, exception);
}
/**
@@ -6735,23 +6324,15 @@ pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
*/
static pm_rest_parameter_node_t *
pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
-
- *node = (pm_rest_parameter_node_t) {
- {
- .type = PM_REST_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- }
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_rest_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6760,15 +6341,13 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c
static pm_retry_node_t *
pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_RETRY);
- pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
-
- *node = (pm_retry_node_t) {{
- .type = PM_RETRY_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_retry_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6776,22 +6355,14 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_return_node_t *
pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
- pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
-
- *node = (pm_return_node_t) {
- {
- .type = PM_RETURN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .arguments = arguments
- };
-
- return node;
+ return pm_return_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ TOK2LOC(parser, keyword),
+ arguments
+ );
}
/**
@@ -6800,15 +6371,13 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
static pm_self_node_t *
pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_SELF);
- pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
-
- *node = (pm_self_node_t) {{
- .type = PM_SELF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_self_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6816,19 +6385,13 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_shareable_constant_node_t *
pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
- pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
-
- *node = (pm_shareable_constant_node_t) {
- {
- .type = PM_SHAREABLE_CONSTANT_NODE,
- .flags = (pm_node_flags_t) value,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NODE_VALUE(write)
- },
- .write = write
- };
-
- return node;
+ return pm_shareable_constant_node_new(
+ parser->arena,
+ ++parser->node_id,
+ (pm_node_flags_t) value,
+ PM_LOCATION_INIT_NODE(write),
+ write
+ );
}
/**
@@ -6836,26 +6399,18 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar
*/
static pm_singleton_class_node_t *
pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
-
- *node = (pm_singleton_class_node_t) {
- {
- .type = PM_SINGLETON_CLASS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = class_keyword->start,
- .end = end_keyword->end
- }
- },
- .locals = *locals,
- .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .expression = expression,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_singleton_class_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+ *locals,
+ TOK2LOC(parser, class_keyword),
+ TOK2LOC(parser, operator),
+ expression,
+ body,
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -6864,16 +6419,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local
static pm_source_encoding_node_t *
pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
- pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
-
- *node = (pm_source_encoding_node_t) {{
- .type = PM_SOURCE_ENCODING_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_source_encoding_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6881,7 +6433,6 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_source_file_node_t*
pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
- pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
pm_node_flags_t flags = 0;
@@ -6895,17 +6446,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
break;
}
- *node = (pm_source_file_node_t) {
- {
- .type = PM_SOURCE_FILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
- },
- .filepath = parser->filepath
- };
-
- return node;
+ return pm_source_file_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKEN(parser, file_keyword),
+ parser->filepath
+ );
}
/**
@@ -6914,16 +6461,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
static pm_source_line_node_t *
pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___LINE__);
- pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
-
- *node = (pm_source_line_node_t) {{
- .type = PM_SOURCE_LINE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_source_line_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6931,22 +6475,14 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_splat_node_t *
pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
-
- *node = (pm_splat_node_t) {
- {
- .type = PM_SPLAT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (expression == NULL ? operator->end : expression->location.end)
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .expression = expression
- };
-
- return node;
+ return pm_splat_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+ TOK2LOC(parser, operator),
+ expression
+ );
}
/**
@@ -6954,18 +6490,13 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t
*/
static pm_statements_node_t *
pm_statements_node_create(pm_parser_t *parser) {
- pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
-
- *node = (pm_statements_node_t) {
- {
- .type = PM_STATEMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .body = { 0 }
- };
-
- return node;
+ return pm_statements_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -6977,25 +6508,17 @@ pm_statements_node_body_length(pm_statements_node_t *node) {
}
/**
- * Set the location of the given StatementsNode.
- */
-static void
-pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
- node->base.location = (pm_location_t) { .start = start, .end = end };
-}
-
-/**
* Update the location of the statements node based on the statement that is
* being added to the list.
*/
-static inline void
+static PRISM_INLINE void
pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
- if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
- node->base.location.start = statement->location.start;
+ if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
+ PM_NODE_START_SET_NODE(node, statement);
}
- if (statement->location.end > node->base.location.end) {
- node->base.location.end = statement->location.end;
+ if (PM_NODE_END(statement) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, statement);
}
}
@@ -7022,7 +6545,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
}
}
- pm_node_list_append(&node->body, statement);
+ pm_node_list_append(parser->arena, &node->body, statement);
if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
}
@@ -7030,18 +6553,17 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
* Prepend a new node to the given StatementsNode node's body.
*/
static void
-pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
+pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
pm_statements_node_body_update(node, statement);
- pm_node_list_prepend(&node->body, statement);
+ pm_node_list_prepend(arena, &node->body, statement);
pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
}
/**
* Allocate a new StringNode node with the current string on the parser.
*/
-static inline pm_string_node_t *
+static PRISM_INLINE pm_string_node_t *
pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
- pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
switch (parser->frozen_string_literal) {
@@ -7053,23 +6575,19 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
break;
}
- *node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *string
- };
+ uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
+ uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
- return node;
+ return pm_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ NTOK2LOC(parser, closing),
+ *string
+ );
}
/**
@@ -7097,30 +6615,21 @@ pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
static pm_super_node_t *
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
- pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
- const uint8_t *end = pm_arguments_end(arguments);
- if (end == NULL) {
- assert(false && "unreachable");
- }
-
- *node = (pm_super_node_t) {
- {
- .type = PM_SUPER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end,
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .lparen_loc = arguments->opening_loc,
- .arguments = arguments->arguments,
- .rparen_loc = arguments->closing_loc,
- .block = arguments->block
- };
-
- return node;
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+
+ return pm_super_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
+ TOK2LOC(parser, keyword),
+ arguments->opening_loc,
+ arguments->arguments,
+ arguments->closing_loc,
+ arguments->block
+ );
}
/**
@@ -7148,7 +6657,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
if (width == 0) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
break;
}
@@ -7168,7 +6677,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
size_t width = encoding->char_width(cursor, end - cursor);
if (width == 0) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
break;
}
@@ -7185,7 +6694,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
* If the validate flag is set, then it will check the contents of the symbol
* to ensure that all characters are valid in the encoding.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
if (parser->explicit_encoding != NULL) {
// A Symbol may optionally have its encoding explicitly set. This will
@@ -7210,160 +6719,31 @@ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_
return 0;
}
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
- assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
- (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
- (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
- (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
-
- // There's special validation logic used if a string does not contain any character escape sequences.
- if (parser->explicit_encoding == NULL) {
- // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
- // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
- // the US-ASCII encoding.
- if (ascii_only) {
- return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
- }
-
- if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
- if (!ascii_only) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
- }
- } else if (parser->encoding != modifier_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
-
- if (modifier == 'n' && !ascii_only) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- }
-
- return flags;
- }
-
- // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
- bool mixed_encoding = false;
-
- if (mixed_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
- // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
- bool valid_string_in_modifier_encoding = true;
-
- if (!valid_string_in_modifier_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
- if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- }
-
- // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
- return flags;
-}
-
-/**
- * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
- * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
- * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
- * may be explicitly set with an escape sequence.
- */
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
- // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
- bool valid_unicode_range = true;
- if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- return flags;
- }
-
- // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
- // to multi-byte characters are allowed.
- if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
- // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
- // following error message appearing twice. We do the same for compatibility.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
- }
-
- /**
- * Start checking modifier flags. We need to process these before considering any explicit encodings that may have
- * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the
- * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set
- * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and
- * determine the encoding that way.
- */
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
- }
-
- // At this point no encoding modifiers will be present on the regular expression as they would have already
- // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
- // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
- if (ascii_only) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
- }
-
- // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
- // or by specifying a modifier.
- //
- // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
- if (parser->explicit_encoding != NULL) {
- if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
- } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
- }
- }
-
- return 0;
-}
-
/**
* Allocate and initialize a new SymbolNode node with the given unescaped
* string.
*/
static pm_symbol_node_t *
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
- pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
- *node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .value_loc = PM_LOCATION_TOKEN_VALUE(value),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
+
+ return pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL | flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ NTOK2LOC(parser, value),
+ NTOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate and initialize a new SymbolNode node.
*/
-static inline pm_symbol_node_t *
+static PRISM_INLINE pm_symbol_node_t *
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
}
@@ -7383,35 +6763,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
*/
static pm_symbol_node_t *
pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
- pm_symbol_node_t *node;
+ assert(token->type == PM_TOKEN_LABEL);
- switch (token->type) {
- case PM_TOKEN_LABEL: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
-
- pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
- node = pm_symbol_node_create(parser, &opening, &label, &closing);
-
- assert((label.end - label.start) >= 0);
- pm_string_shared_init(&node->unescaped, label.start, label.end);
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
+ pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+ pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
+ pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
- break;
- }
- case PM_TOKEN_MISSING: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
- node = pm_symbol_node_create(parser, &opening, &label, &closing);
- break;
- }
- default:
- assert(false && "unreachable");
- node = NULL;
- break;
- }
+ assert((label.end - label.start) >= 0);
+ pm_string_shared_init(&node->unescaped, label.start, label.end);
+ pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
return node;
}
@@ -7421,18 +6781,16 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_symbol_node_t *
pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
- pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
- *node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .value_loc = PM_LOCATION_NULL_VALUE(parser),
- .unescaped = { 0 }
- };
+ pm_symbol_node_t *node = pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_string_t) { 0 })
+ );
pm_string_constant_init(&node->unescaped, content, strlen(content));
return node;
@@ -7442,21 +6800,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
* Check if the given node is a label in a hash.
*/
static bool
-pm_symbol_node_label_p(pm_node_t *node) {
- const uint8_t *end = NULL;
+pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
+ const pm_location_t *location = NULL;
switch (PM_NODE_TYPE(node)) {
- case PM_SYMBOL_NODE:
- end = ((pm_symbol_node_t *) node)->closing_loc.end;
+ case PM_SYMBOL_NODE: {
+ const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
+ if (cast->closing_loc.length > 0) {
+ location = &cast->closing_loc;
+ }
break;
- case PM_INTERPOLATED_SYMBOL_NODE:
- end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
+ }
+ case PM_INTERPOLATED_SYMBOL_NODE: {
+ const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
+ if (cast->closing_loc.length > 0) {
+ location = &cast->closing_loc;
+ }
break;
+ }
default:
return false;
}
- return (end != NULL) && (end[-1] == ':');
+ return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
}
/**
@@ -7464,32 +6830,26 @@ pm_symbol_node_label_p(pm_node_t *node) {
*/
static pm_symbol_node_t *
pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
- pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+ pm_symbol_node_t *new_node = pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ node->content_loc,
+ TOK2LOC(parser, closing),
+ node->unescaped
+ );
- *new_node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .value_loc = node->content_loc,
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = node->unescaped
+ pm_token_t content = {
+ .type = PM_TOKEN_IDENTIFIER,
+ .start = parser->start + node->content_loc.start,
+ .end = parser->start + node->content_loc.start + node->content_loc.length
};
- pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
- pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
-
- // We are explicitly _not_ using pm_node_destroy here because we don't want
- // to trash the unescaped string. We could instead copy the string if we
- // know that it is owned, but we're taking the fast path for now.
- xfree(node);
+ pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
+ /* The old node is arena-allocated so no explicit free is needed. */
return new_node;
}
@@ -7498,7 +6858,6 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
*/
static pm_string_node_t *
pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
- pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
switch (parser->frozen_string_literal) {
@@ -7510,24 +6869,18 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
break;
}
- *new_node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = node->base.location
- },
- .opening_loc = node->opening_loc,
- .content_loc = node->value_loc,
- .closing_loc = node->closing_loc,
- .unescaped = node->unescaped
- };
-
- // We are explicitly _not_ using pm_node_destroy here because we don't want
- // to trash the unescaped string. We could instead copy the string if we
- // know that it is owned, but we're taking the fast path for now.
- xfree(node);
+ pm_string_node_t *new_node = pm_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODE(node),
+ node->opening_loc,
+ node->value_loc,
+ node->closing_loc,
+ node->unescaped
+ );
+ /* The old node is arena-allocated so no explicit free is needed. */
return new_node;
}
@@ -7537,16 +6890,13 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
static pm_true_node_t *
pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_TRUE);
- pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
- *node = (pm_true_node_t) {{
- .type = PM_TRUE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_true_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -7554,16 +6904,12 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_true_node_t *
pm_true_node_synthesized_create(pm_parser_t *parser) {
- pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
-
- *node = (pm_true_node_t) {{
- .type = PM_TRUE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = parser->start, .end = parser->end }
- }};
-
- return node;
+ return pm_true_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_UNSET
+ );
}
/**
@@ -7572,28 +6918,24 @@ pm_true_node_synthesized_create(pm_parser_t *parser) {
static pm_undef_node_t *
pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
- pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
-
- *node = (pm_undef_node_t) {
- {
- .type = PM_UNDEF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token),
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
- .names = { 0 }
- };
- return node;
+ return pm_undef_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, token)
+ );
}
/**
* Append a name to an undef node.
*/
static void
-pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
- node->base.location.end = name->location.end;
- pm_node_list_append(&node->names, name);
+pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
+ PM_NODE_LENGTH_SET_NODE(node, name);
+ pm_node_list_append(arena, &node->names, name);
}
/**
@@ -7602,34 +6944,20 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
static pm_unless_node_t *
pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
-
- const uint8_t *end;
- if (statements != NULL) {
- end = statements->base.location.end;
- } else {
- end = predicate->location.end;
- }
-
- *node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
- .statements = statements,
- .else_clause = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ pm_node_t *end = statements == NULL ? predicate : UP(statements);
+
+ return pm_unless_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
+ TOK2LOC(parser, keyword),
+ predicate,
+ NTOK2LOC(parser, then_keyword),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -7638,36 +6966,28 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
static pm_unless_node_t *
pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, statement, true);
- *node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .statements = statements,
- .else_clause = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_unless_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(statement, predicate),
+ TOK2LOC(parser, unless_keyword),
+ predicate,
+ ((pm_location_t) { 0 }),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
-static inline void
-pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
- node->base.location.end = end_keyword->end;
+static PRISM_INLINE void
+pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
}
/**
@@ -7682,7 +7002,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
// All of the block exits that we want to remove should be within the
// statements, and since we are modifying the statements, we shouldn't have
// to check the end location.
- const uint8_t *start = statements->base.location.start;
+ uint32_t start = statements->base.location.start;
for (size_t index = parser->current_block_exits->size; index > 0; index--) {
pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
@@ -7698,27 +7018,19 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
*/
static pm_until_node_t *
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- *node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end,
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_until_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ TOK2LOC(parser, keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, closing),
+ predicate,
+ statements
+ );
}
/**
@@ -7726,28 +7038,20 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_until_node_t *
pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
pm_loop_modifier_block_exits(parser, statements);
- *node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end,
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_until_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODES(statements, predicate),
+ TOK2LOC(parser, keyword),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7755,42 +7059,34 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_when_node_t *
pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
-
- *node = (pm_when_node_t) {
- {
- .type = PM_WHEN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = NULL
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .statements = NULL,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .conditions = { 0 }
- };
-
- return node;
+ return pm_when_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, keyword),
+ TOK2LOC(parser, keyword),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL
+ );
}
/**
* Append a new condition to a when node.
*/
static void
-pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
- node->base.location.end = condition->location.end;
- pm_node_list_append(&node->conditions, condition);
+pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
+ PM_NODE_LENGTH_SET_NODE(node, condition);
+ pm_node_list_append(arena, &node->conditions, condition);
}
/**
* Set the location of the then keyword of a when node.
*/
-static inline void
-pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
- node->base.location.end = then_keyword->end;
- node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
+static PRISM_INLINE void
+pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
+ node->then_keyword_loc = TOK2LOC(parser, then_keyword);
}
/**
@@ -7798,8 +7094,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k
*/
static void
pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
- if (statements->base.location.end > node->base.location.end) {
- node->base.location.end = statements->base.location.end;
+ if (PM_NODE_END(statements) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, statements);
}
node->statements = statements;
@@ -7810,27 +7106,19 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
*/
static pm_while_node_t *
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ TOK2LOC(parser, keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, closing),
+ predicate,
+ statements
+ );
}
/**
@@ -7838,28 +7126,20 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_while_node_t *
pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
pm_loop_modifier_block_exits(parser, statements);
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODES(statements, predicate),
+ TOK2LOC(parser, keyword),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7867,22 +7147,17 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_while_node_t *
pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
-
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
- .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
- .closing_loc = PM_LOCATION_NULL_VALUE(parser),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7891,31 +7166,22 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
*/
static pm_x_string_node_t *
pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
-
- *node = (pm_x_string_node_t) {
- {
- .type = PM_X_STRING_NODE,
- .flags = PM_STRING_FLAGS_FROZEN,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ return pm_x_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_STRING_FLAGS_FROZEN,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ TOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate and initialize a new XStringNode node.
*/
-static inline pm_x_string_node_t *
+static PRISM_INLINE pm_x_string_node_t *
pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
}
@@ -7925,40 +7191,31 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
*/
static pm_yield_node_t *
pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
- pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
+ uint32_t start = PM_TOKEN_START(parser, keyword);
+ uint32_t end;
- const uint8_t *end;
- if (rparen_loc->start != NULL) {
- end = rparen_loc->end;
+ if (rparen_loc->length > 0) {
+ end = PM_LOCATION_END(rparen_loc);
} else if (arguments != NULL) {
- end = arguments->base.location.end;
- } else if (lparen_loc->start != NULL) {
- end = lparen_loc->end;
+ end = PM_NODE_END(arguments);
+ } else if (lparen_loc->length > 0) {
+ end = PM_LOCATION_END(lparen_loc);
} else {
- end = keyword->end;
- }
-
- *node = (pm_yield_node_t) {
- {
- .type = PM_YIELD_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .lparen_loc = *lparen_loc,
- .arguments = arguments,
- .rparen_loc = *rparen_loc
- };
-
- return node;
+ end = PM_TOKEN_END(parser, keyword);
+ }
+
+ return pm_yield_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ TOK2LOC(parser, keyword),
+ *lparen_loc,
+ arguments,
+ *rparen_loc
+ );
}
-#undef PM_NODE_ALLOC
-#undef PM_NODE_IDENTIFY
-
/**
* Check if any of the currently visible scopes contain a local variable
* described by the given constant id.
@@ -7984,7 +7241,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
* described by the given token. This function implicitly inserts a constant
* into the constant pool.
*/
-static inline int
+static PRISM_INLINE int
pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
}
@@ -7992,27 +7249,35 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
/**
* Add a constant id to the local table of the current scope.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
- pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
+ pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
}
/**
* Add a local variable from a location to the current scope.
*/
static pm_constant_id_t
-pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
+pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
return constant_id;
}
/**
+ * Add a local variable from a location to the current scope.
+ */
+static PRISM_INLINE pm_constant_id_t
+pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
+ return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
+}
+
+/**
* Add a local variable from a token to the current scope.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
- return pm_parser_local_add_location(parser, token->start, token->end, reads);
+ return pm_parser_local_add_raw(parser, token->start, token->end, reads);
}
/**
@@ -8046,7 +7311,7 @@ static bool
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
// We want to check whether the parameter name is a numbered parameter or
// not.
- pm_refute_numbered_parameter(parser, name->start, name->end);
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
// Otherwise we'll fetch the constant id for the parameter name and check
// whether it's already in the current scope.
@@ -8070,8 +7335,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
pm_locals_free(&scope->locals);
- pm_node_list_free(&scope->implicit_parameters);
- xfree(scope);
+ xfree_sized(scope, sizeof(pm_scope_t));
}
/******************************************************************************/
@@ -8081,7 +7345,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
/**
* Pushes a value onto the stack.
*/
-static inline void
+static PRISM_INLINE void
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
*stack = (*stack << 1) | (value & 1);
}
@@ -8089,7 +7353,7 @@ pm_state_stack_push(pm_state_stack_t *stack, bool value) {
/**
* Pops a value off the stack.
*/
-static inline void
+static PRISM_INLINE void
pm_state_stack_pop(pm_state_stack_t *stack) {
*stack >>= 1;
}
@@ -8097,38 +7361,38 @@ pm_state_stack_pop(pm_state_stack_t *stack) {
/**
* Returns the value at the top of the stack.
*/
-static inline bool
+static PRISM_INLINE bool
pm_state_stack_p(const pm_state_stack_t *stack) {
return *stack & 1;
}
-static inline void
+static PRISM_INLINE void
pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
// Use the negation of the value to prevent stack overflow.
pm_state_stack_push(&parser->accepts_block_stack, !value);
}
-static inline void
+static PRISM_INLINE void
pm_accepts_block_stack_pop(pm_parser_t *parser) {
pm_state_stack_pop(&parser->accepts_block_stack);
}
-static inline bool
+static PRISM_INLINE bool
pm_accepts_block_stack_p(pm_parser_t *parser) {
return !pm_state_stack_p(&parser->accepts_block_stack);
}
-static inline void
+static PRISM_INLINE void
pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
pm_state_stack_push(&parser->do_loop_stack, value);
}
-static inline void
+static PRISM_INLINE void
pm_do_loop_stack_pop(pm_parser_t *parser) {
pm_state_stack_pop(&parser->do_loop_stack);
}
-static inline bool
+static PRISM_INLINE bool
pm_do_loop_stack_p(pm_parser_t *parser) {
return pm_state_stack_p(&parser->do_loop_stack);
}
@@ -8141,7 +7405,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
* Get the next character in the source starting from +cursor+. If that position
* is beyond the end of the source then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
if (cursor < parser->end) {
return *cursor;
@@ -8155,7 +7419,7 @@ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
* adding the given offset. If that position is beyond the end of the source
* then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
return peek_at(parser, parser->current.end + offset);
}
@@ -8164,7 +7428,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
* Get the next character in the source starting from parser->current.end. If
* that position is beyond the end of the source then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek(const pm_parser_t *parser) {
return peek_at(parser, parser->current.end);
}
@@ -8173,7 +7437,7 @@ peek(const pm_parser_t *parser) {
* If the character to be read matches the given value, then returns true and
* advances the current pointer.
*/
-static inline bool
+static PRISM_INLINE bool
match(pm_parser_t *parser, uint8_t value) {
if (peek(parser) == value) {
parser->current.end++;
@@ -8186,7 +7450,7 @@ match(pm_parser_t *parser, uint8_t value) {
* Return the length of the line ending string starting at +cursor+, or 0 if it
* is not a line ending. This function is intended to be CRLF/LF agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
if (peek_at(parser, cursor) == '\n') {
return 1;
@@ -8202,7 +7466,7 @@ match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
* `parser->current.end + offset`, or 0 if it is not a line ending. This
* function is intended to be CRLF/LF agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
return match_eol_at(parser, parser->current.end + offset);
}
@@ -8212,7 +7476,7 @@ match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
* or 0 if it is not a line ending. This function is intended to be CRLF/LF
* agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol(pm_parser_t *parser) {
return match_eol_at(parser, parser->current.end);
}
@@ -8220,7 +7484,7 @@ match_eol(pm_parser_t *parser) {
/**
* Skip to the next newline character or NUL byte.
*/
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
next_newline(const uint8_t *cursor, ptrdiff_t length) {
assert(length >= 0);
@@ -8233,7 +7497,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
/**
* This is equivalent to the predicate of warn_balanced in CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
}
@@ -8311,7 +7575,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
// issue because we didn't understand the encoding that the user was
// trying to use. In this case we'll keep using the default encoding but
// add an error to the parser to indicate an unsuccessful parse.
- pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+ pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
}
}
@@ -8336,7 +7600,7 @@ parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t valu
}
}
-static inline bool
+static PRISM_INLINE bool
pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
return b == '\'' || b == '"' || b == ':' || b == ';';
}
@@ -8346,13 +7610,15 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
* found, it returns a pointer to the start of the marker. Otherwise it returns
* NULL.
*/
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
- while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
- if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
- return cursor;
+ // Scan for '*' as the middle character, since it is rarer than '-' in
+ // typical comments and avoids repeated memchr calls for '-' that hit
+ // dashes in words like "foo-bar".
+ while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
+ if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
+ return cursor - 1;
}
- cursor++;
}
return NULL;
}
@@ -8367,7 +7633,7 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
* It returns true if it consumes the entire comment. Otherwise it returns
* false.
*/
-static inline bool
+static PRISM_INLINE bool
parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
bool result = true;
@@ -8389,11 +7655,24 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// have a magic comment.
return false;
}
+ } else {
+ // Non-emacs magic comments must contain a colon for `key: value`.
+ // Reject early if there is no colon to avoid scanning the entire
+ // comment character-by-character.
+ if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
+ return false;
+ }
+
+ // Advance start past leading whitespace so the main loop begins
+ // directly at the key, avoiding a redundant whitespace scan.
+ start += pm_strspn_whitespace(start, end - start);
}
cursor = start;
while (cursor < end) {
- while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+ if (indicator) {
+ while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+ }
const uint8_t *key_start = cursor;
while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
@@ -8421,7 +7700,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
}
value_end = cursor;
- if (*cursor == '"') cursor++;
+ if (cursor < end && *cursor == '"') cursor++;
} else {
value_start = cursor;
while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
@@ -8479,7 +7758,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8506,7 +7785,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8541,7 +7820,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
} else {
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8554,17 +7833,14 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// When we're done, we want to free the string in case we had to
// allocate memory for it.
- pm_string_free(&key);
+ pm_string_cleanup(&key);
// Allocate a new magic comment node to append to the parser's list.
- pm_magic_comment_t *magic_comment;
- if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
- magic_comment->key_start = key_start;
- magic_comment->value_start = value_start;
- magic_comment->key_length = (uint32_t) key_length;
- magic_comment->value_length = value_length;
- pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
- }
+ pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+ magic_comment->node.next = NULL;
+ magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
+ magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
+ pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
}
return result;
@@ -8574,85 +7850,67 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
/* Context manipulations */
/******************************************************************************/
-static bool
-context_terminator(pm_context_t context, pm_token_t *token) {
- switch (context) {
- case PM_CONTEXT_MAIN:
- case PM_CONTEXT_DEF_PARAMS:
- case PM_CONTEXT_DEFINED:
- case PM_CONTEXT_MULTI_TARGET:
- case PM_CONTEXT_TERNARY:
- case PM_CONTEXT_RESCUE_MODIFIER:
- return token->type == PM_TOKEN_EOF;
- case PM_CONTEXT_DEFAULT_PARAMS:
- return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_PREEXE:
- case PM_CONTEXT_POSTEXE:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_MODULE:
- case PM_CONTEXT_CLASS:
- case PM_CONTEXT_SCLASS:
- case PM_CONTEXT_LAMBDA_DO_END:
- case PM_CONTEXT_DEF:
- case PM_CONTEXT_BLOCK_KEYWORDS:
- return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
- case PM_CONTEXT_WHILE:
- case PM_CONTEXT_UNTIL:
- case PM_CONTEXT_ELSE:
- case PM_CONTEXT_FOR:
- case PM_CONTEXT_BEGIN_ENSURE:
- case PM_CONTEXT_BLOCK_ENSURE:
- case PM_CONTEXT_CLASS_ENSURE:
- case PM_CONTEXT_DEF_ENSURE:
- case PM_CONTEXT_LAMBDA_ENSURE:
- case PM_CONTEXT_MODULE_ENSURE:
- case PM_CONTEXT_SCLASS_ENSURE:
- return token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_LOOP_PREDICATE:
- return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
- case PM_CONTEXT_FOR_INDEX:
- return token->type == PM_TOKEN_KEYWORD_IN;
- case PM_CONTEXT_CASE_WHEN:
- return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_CASE_IN:
- return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_IF:
- case PM_CONTEXT_ELSIF:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_UNLESS:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_EMBEXPR:
- return token->type == PM_TOKEN_EMBEXPR_END;
- case PM_CONTEXT_BLOCK_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PARENS:
- return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_BEGIN:
- case PM_CONTEXT_BEGIN_RESCUE:
- case PM_CONTEXT_BLOCK_RESCUE:
- case PM_CONTEXT_CLASS_RESCUE:
- case PM_CONTEXT_DEF_RESCUE:
- case PM_CONTEXT_LAMBDA_RESCUE:
- case PM_CONTEXT_MODULE_RESCUE:
- case PM_CONTEXT_SCLASS_RESCUE:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_BEGIN_ELSE:
- case PM_CONTEXT_BLOCK_ELSE:
- case PM_CONTEXT_CLASS_ELSE:
- case PM_CONTEXT_DEF_ELSE:
- case PM_CONTEXT_LAMBDA_ELSE:
- case PM_CONTEXT_MODULE_ELSE:
- case PM_CONTEXT_SCLASS_ELSE:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_LAMBDA_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PREDICATE:
- return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
- case PM_CONTEXT_NONE:
- return false;
- }
+static const uint32_t context_terminators[] = {
+ [PM_CONTEXT_NONE] = 0,
+ [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
+ [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
+ [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
+ [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
+ [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
+ [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
+};
- return false;
+static PRISM_INLINE bool
+context_terminator(pm_context_t context, pm_token_t *token) {
+ return token->type < 32 && (context_terminators[context] & (1U << token->type));
}
/**
@@ -8691,7 +7949,7 @@ context_push(pm_parser_t *parser, pm_context_t context) {
static void
context_pop(pm_parser_t *parser) {
pm_context_node_t *prev = parser->current_context->prev;
- xfree(parser->current_context);
+ xfree_sized(parser->current_context, sizeof(pm_context_node_t));
parser->current_context = prev;
}
@@ -8753,6 +8011,7 @@ context_human(pm_context_t context) {
case PM_CONTEXT_BEGIN: return "begin statement";
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+ case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
case PM_CONTEXT_CASE_IN: return "'in' clause";
case PM_CONTEXT_CLASS: return "class definition";
@@ -8813,11 +8072,11 @@ context_human(pm_context_t context) {
/* Specific token lexers */
/******************************************************************************/
-static inline void
+static PRISM_INLINE void
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
if (invalid != NULL) {
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
- pm_parser_err(parser, invalid, invalid + 1, diag_id);
+ pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
}
}
@@ -8928,7 +8187,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
break;
// 0o1111 is an octal number
@@ -8942,7 +8201,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
break;
// 01111 is an octal number
@@ -8956,7 +8215,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
case '6':
case '7':
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
- parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
break;
// 0x1111 is a hexadecimal number
@@ -8970,7 +8229,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
break;
// 0.xxx is a float
@@ -8988,11 +8247,62 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
}
} else {
// If it didn't start with a 0, then we'll lex as far as we can into a
- // decimal number.
- parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+ // decimal number. We compute the integer value inline to avoid
+ // re-scanning the digits later in pm_integer_parse.
+ {
+ const uint8_t *cursor = parser->current.end;
+ const uint8_t *end = parser->end;
+ uint64_t value = (uint64_t) (cursor[-1] - '0');
+
+ bool has_underscore = false;
+ bool prev_underscore = false;
+ const uint8_t *invalid = NULL;
+
+ while (cursor < end) {
+ uint8_t c = *cursor;
+ if (c >= '0' && c <= '9') {
+ if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
+ prev_underscore = false;
+ cursor++;
+ } else if (c == '_') {
+ has_underscore = true;
+ if (prev_underscore && invalid == NULL) invalid = cursor;
+ prev_underscore = true;
+ cursor++;
+ } else {
+ break;
+ }
+ }
+
+ if (has_underscore) {
+ if (prev_underscore && invalid == NULL) invalid = cursor - 1;
+ pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
+ }
+
+ if (value <= UINT32_MAX) {
+ parser->integer.value = (uint32_t) value;
+ parser->integer.lexed = true;
+ }
+
+ parser->current.end = cursor;
+ }
// Afterward, we'll lex as far as we can into an optional float suffix.
- type = lex_optional_float_suffix(parser, seen_e);
+ // Guard the function call: the vast majority of decimal numbers are
+ // plain integers, so avoid the call when the next byte cannot start a
+ // float suffix.
+ {
+ uint8_t next = peek(parser);
+ if (next == '.' || next == 'e' || next == 'E') {
+ type = lex_optional_float_suffix(parser, seen_e);
+
+ // If it turned out to be a float, the cached integer value is
+ // invalid.
+ if (type != PM_TOKEN_INTEGER) {
+ parser->integer.lexed = false;
+ }
+ }
+ }
}
// At this point we have a completed number, but we want to provide the user
@@ -9002,7 +8312,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
const uint8_t *fraction_start = parser->current.end;
const uint8_t *fraction_end = parser->current.end + 2;
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
- pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+ pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
}
return type;
@@ -9011,7 +8321,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
static pm_token_type_t
lex_numeric(pm_parser_t *parser) {
pm_token_type_t type = PM_TOKEN_INTEGER;
- parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+ parser->integer.lexed = false;
if (parser->current.end < parser->end) {
bool seen_e = false;
@@ -9101,8 +8412,8 @@ lex_global_variable(pm_parser_t *parser) {
} while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
// $0 isn't allowed to be followed by anything.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9138,9 +8449,9 @@ lex_global_variable(pm_parser_t *parser) {
} else {
// If we get here, then we have a $ followed by something that
// isn't recognized as a global variable.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
- const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9160,7 +8471,7 @@ lex_global_variable(pm_parser_t *parser) {
* * `type` - the expected token type
* * `modifier_type` - the expected modifier token type
*/
-static inline pm_token_type_t
+static PRISM_INLINE pm_token_type_t
lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
if (memcmp(current_start, value, vlen) == 0) {
pm_lex_state_t last_state = parser->lex_state;
@@ -9199,6 +8510,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
current_end += width;
}
} else {
+ // Fast path: scan ASCII identifier bytes using wide operations.
+ current_end += scan_identifier_ascii(current_end, end);
+
+ // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
current_end += width;
}
@@ -9258,9 +8573,15 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
switch (width) {
case 2:
if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
+ if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
+ return PM_TOKEN_KEYWORD_DO;
+ }
if (pm_do_loop_stack_p(parser)) {
return PM_TOKEN_KEYWORD_DO_LOOP;
}
+ if (!pm_accepts_block_stack_p(parser)) {
+ return PM_TOKEN_KEYWORD_DO_BLOCK;
+ }
return PM_TOKEN_KEYWORD_DO;
}
@@ -9339,8 +8660,8 @@ current_token_starts_line(pm_parser_t *parser) {
* handle interpolation. This function performs that check. It returns a token
* type representing what it found. Those cases are:
*
- * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The
- * caller should keep lexing.
+ * * 0 - No interpolation was found at this point. The caller should keep
+ * lexing.
* * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The
* caller should return this token type.
* * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller
@@ -9357,9 +8678,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
return PM_TOKEN_STRING_CONTENT;
}
- // Now we'll check against the character that follows the #. If it constitutes
- // valid interplation, we'll handle that, otherwise we'll return
- // PM_TOKEN_NOT_PROVIDED.
+ // Now we'll check against the character that follows the #. If it
+ // constitutes valid interplation, we'll handle that, otherwise we'll return
+ // 0.
switch (pound[1]) {
case '@': {
// In this case we may have hit an embedded instance or class variable.
@@ -9393,7 +8714,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// string content. This is like if we get "#@-". In this case the caller
// should keep lexing.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
}
case '$':
// In this case we may have hit an embedded global variable. If there's
@@ -9443,7 +8764,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// In this case we've hit a #$ that does not indicate a global variable.
// In this case we'll continue lexing past it.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
case '{':
// In this case it's the start of an embedded expression. If we have
// already consumed content, then we need to return that content as string
@@ -9467,7 +8788,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// mark that by returning the not provided token type. This tells the
// consumer to keep lexing forward.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
}
}
@@ -9491,7 +8812,7 @@ static const bool ascii_printable_chars[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
-static inline bool
+static PRISM_INLINE bool
char_is_ascii_printable(const uint8_t b) {
return (b < 0x80) && ascii_printable_chars[b];
}
@@ -9500,7 +8821,7 @@ char_is_ascii_printable(const uint8_t b) {
* Return the value that a hexadecimal digit character represents. For example,
* transform 'a' into 10, 'b' into 11, etc.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
escape_hexadecimal_digit(const uint8_t value) {
return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
}
@@ -9510,8 +8831,8 @@ escape_hexadecimal_digit(const uint8_t value) {
* digits scanned. This function assumes that the characters have already been
* validated.
*/
-static inline uint32_t
-escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
+static PRISM_INLINE uint32_t
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
uint32_t value = 0;
for (size_t index = 0; index < length; index++) {
if (index != 0) value <<= 4;
@@ -9521,7 +8842,14 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
// Here we're going to verify that the value is actually a valid Unicode
// codepoint and not a surrogate pair.
if (value >= 0xD800 && value <= 0xDFFF) {
- pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, defer the error to regexp encoding
+ // validation where we can produce a regexp-specific message.
+ } else if (error_location != NULL) {
+ pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ } else {
+ pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
return 0xFFFD;
}
@@ -9531,7 +8859,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
/**
* Escape a single character value based on the given flags.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
escape_byte(uint8_t value, const uint8_t flags) {
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
@@ -9541,21 +8869,32 @@ escape_byte(uint8_t value, const uint8_t flags) {
/**
* Write a unicode codepoint to the given buffer.
*/
-static inline void
+static PRISM_INLINE void
escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
// \u escape sequences in string-like structures implicitly change the
// encoding to UTF-8 if they are >= 0x80 or if they are used in a character
// literal.
if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, suppress this error — the regexp encoding
+ // validation will produce a more specific error message.
+ } else {
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+ }
}
parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
}
if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
- pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, defer the error to the regexp encoding
+ // validation which produces a regexp-specific message.
+ } else {
+ pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
+
pm_buffer_append_byte(buffer, 0xEF);
pm_buffer_append_byte(buffer, 0xBF);
pm_buffer_append_byte(buffer, 0xBD);
@@ -9566,11 +8905,16 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
* When you're writing a byte to the unescape buffer, if the byte is non-ASCII
* (i.e., the top bit is set) then it locks in the encoding.
*/
-static inline void
-escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
+static PRISM_INLINE void
+escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
if (byte >= 0x80) {
if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, suppress this error — the regexp encoding
+ // validation will produce a more specific error message.
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ }
}
parser->explicit_encoding = parser->encoding;
@@ -9594,19 +8938,19 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
* Note that in this case there is a literal \ byte in the regular expression
* source so that the regular expression engine will perform its own unescaping.
*/
-static inline void
+static PRISM_INLINE void
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
}
- escape_write_byte_encoded(parser, buffer, byte);
+ escape_write_byte_encoded(parser, buffer, flags, byte);
}
/**
* Write each byte of the given escaped character into the buffer.
*/
-static inline void
+static PRISM_INLINE void
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
size_t width;
if (parser->encoding_changed) {
@@ -9616,6 +8960,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_
}
if (width == 1) {
+ if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
} else if (width > 1) {
// Valid multibyte character. Just ignore escape.
@@ -9641,7 +8986,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_CHARACTER,
FLAG(flags),
FLAG(flag),
@@ -9756,7 +9101,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
}
- escape_write_byte_encoded(parser, buffer, value);
+ escape_write_byte_encoded(parser, buffer, flags, value);
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
}
@@ -9769,7 +9114,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (parser->current.end == parser->end) {
const uint8_t *start = parser->current.end - 2;
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
} else if (peek(parser) == '{') {
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
parser->current.end++;
@@ -9798,18 +9143,19 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (hexadecimal_length > 6) {
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
+ pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
} else if (hexadecimal_length == 0) {
// there are not hexadecimal characters
if (flags & PM_ESCAPE_FLAG_REGEXP) {
// If this is a regular expression, we are going to
// let the regular expression engine handle this
- // error instead of us.
+ // error instead of us because we don't know at this
+ // point if we're inside a comment in /x mode.
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
- pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
- pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
}
return;
@@ -9821,7 +9167,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
extra_codepoints_start = unicode_start;
}
- uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9830,21 +9176,22 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
// ?\u{nnnn} character literal should contain only one codepoint
// and cannot be like ?\u{nnnn mmmm}.
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
- pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
+ pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
}
if (parser->current.end == parser->end) {
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
} else if (peek(parser) == '}') {
parser->current.end++;
} else {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
// If this is a regular expression, we are going to let
// the regular expression engine handle this error
- // instead of us.
+ // instead of us because we don't know at this point if
+ // we're inside a comment in /x mode.
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
- pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
}
}
@@ -9859,10 +9206,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
const uint8_t *start = parser->current.end - 2;
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
}
} else if (length == 4) {
- uint32_t value = escape_unicode(parser, parser->current.end, 4);
+ uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9908,7 +9255,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -9930,6 +9277,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
@@ -9944,7 +9292,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (peek(parser) != '-') {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
@@ -9965,7 +9313,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -9984,10 +9332,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default: {
if (!char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
@@ -10002,7 +9351,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (peek(parser) != '-') {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
@@ -10018,7 +9367,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -10037,10 +9386,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default:
if (!char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
return;
@@ -10048,8 +9398,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
case '\r': {
if (peek_offset(parser, 1) == '\n') {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
parser->current.end += 2;
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+ escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
return;
}
PRISM_FALLTHROUGH
@@ -10057,7 +9408,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default: {
if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
if (parser->current.end < parser->end) {
@@ -10119,10 +9470,14 @@ lex_question_mark(pm_parser_t *parser) {
lex_state_set(parser, PM_LEX_STATE_END);
pm_buffer_t buffer;
- pm_buffer_init_capacity(&buffer, 3);
+ pm_buffer_init(&buffer, 3);
escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
- pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+
+ // Copy buffer data into the arena and free the heap buffer.
+ void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
+ pm_buffer_cleanup(&buffer);
return PM_TOKEN_CHARACTER_LITERAL;
} else {
@@ -10165,12 +9520,12 @@ lex_at_variable(pm_parser_t *parser) {
}
} else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
}
size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
} else {
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
pm_parser_err_token(parser, &parser->current, diag_id);
@@ -10188,24 +9543,23 @@ lex_at_variable(pm_parser_t *parser) {
/**
* Optionally call out to the lex callback if one is provided.
*/
-static inline void
+static PRISM_INLINE void
parser_lex_callback(pm_parser_t *parser) {
- if (parser->lex_callback) {
- parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
+ if (parser->lex_callback.callback) {
+ parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
}
}
/**
* Return a new comment node of the specified type.
*/
-static inline pm_comment_t *
+static PRISM_INLINE pm_comment_t *
parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
- pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
- if (comment == NULL) return NULL;
+ pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
*comment = (pm_comment_t) {
.type = type,
- .location = { parser->current.start, parser->current.end }
+ .location = TOK2LOC(parser, &parser->current)
};
return comment;
@@ -10224,7 +9578,7 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
@@ -10232,8 +9586,8 @@ lex_embdoc(pm_parser_t *parser) {
parser_lex_callback(parser);
// Now, create a comment that is going to be attached to the parser.
+ const uint8_t *comment_start = parser->current.start;
pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
- if (comment == NULL) return PM_TOKEN_EOF;
// Now, loop until we find the end of the embedded documentation or the end
// of the file.
@@ -10257,14 +9611,14 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
parser->current.type = PM_TOKEN_EMBDOC_END;
parser_lex_callback(parser);
- comment->location.end = parser->current.end;
+ comment->location.length = (uint32_t) (parser->current.end - comment_start);
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
return PM_TOKEN_EMBDOC_END;
@@ -10277,7 +9631,7 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
@@ -10287,7 +9641,7 @@ lex_embdoc(pm_parser_t *parser) {
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
- comment->location.end = parser->current.end;
+ comment->location.length = (uint32_t) (parser->current.end - comment_start);
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
return PM_TOKEN_EOF;
@@ -10298,7 +9652,7 @@ lex_embdoc(pm_parser_t *parser) {
* This happens in a couple places depending on whether or not we have already
* lexed a comment.
*/
-static inline void
+static PRISM_INLINE void
parser_lex_ignored_newline(pm_parser_t *parser) {
parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
parser_lex_callback(parser);
@@ -10313,7 +9667,7 @@ parser_lex_ignored_newline(pm_parser_t *parser) {
* If it is set, then we need to skip past the heredoc body and then clear the
* heredoc_end field.
*/
-static inline void
+static PRISM_INLINE void
parser_flush_heredoc_end(pm_parser_t *parser) {
assert(parser->heredoc_end <= parser->end);
parser->next_start = parser->heredoc_end;
@@ -10389,12 +9743,12 @@ typedef struct {
/**
* Push the given byte into the token buffer.
*/
-static inline void
+static PRISM_INLINE void
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->buffer, byte);
}
-static inline void
+static PRISM_INLINE void
pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
}
@@ -10402,7 +9756,7 @@ pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t
/**
* Return the width of the character at the end of the current token.
*/
-static inline size_t
+static PRISM_INLINE size_t
parser_char_width(const pm_parser_t *parser) {
size_t width;
if (parser->encoding_changed) {
@@ -10429,36 +9783,31 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
static void
pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
size_t width = parser_char_width(parser);
- pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
- pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
+ const uint8_t *start = parser->current.end;
+ pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
parser->current.end += width;
}
-static bool
-pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
- for (size_t index = 0; index < length; index++) {
- if (value[index] & 0x80) return false;
- }
-
- return true;
-}
-
/**
* When we're about to return from lexing the current token and we know for sure
* that we have found an escape sequence, this function is called to copy the
* contents of the token buffer into the current string on the parser so that it
* can be attached to the correct node.
*/
-static inline void
+static PRISM_INLINE void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
- pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
+ // Copy buffer data into the arena and free the heap buffer.
+ size_t len = pm_buffer_length(&token_buffer->buffer);
+ void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
+ pm_buffer_cleanup(&token_buffer->buffer);
}
-static inline void
+static PRISM_INLINE void
pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
- pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
- parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
- pm_buffer_free(&token_buffer->regexp_buffer);
+ pm_token_buffer_copy(parser, &token_buffer->base);
+ pm_buffer_cleanup(&token_buffer->regexp_buffer);
}
/**
@@ -10484,10 +9833,11 @@ static void
pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
if (token_buffer->base.cursor == NULL) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
- parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
} else {
- pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
- pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+ const uint8_t *cursor = token_buffer->base.cursor;
+ size_t length = (size_t) (parser->current.end - cursor);
+ pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
pm_regexp_token_buffer_copy(parser, token_buffer);
}
}
@@ -10506,7 +9856,7 @@ static void
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) {
- pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
start = parser->current.start;
} else {
start = token_buffer->cursor;
@@ -10523,8 +9873,8 @@ static void
pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->base.cursor == NULL) {
- pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
- pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
start = parser->current.start;
} else {
start = token_buffer->base.cursor;
@@ -10543,7 +9893,7 @@ pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *tok
* Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
* a tilde heredoc expands out tab characters to the nearest tab boundaries.
*/
-static inline size_t
+static PRISM_INLINE size_t
pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
size_t whitespace = 0;
@@ -10591,7 +9941,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
parser_flush_heredoc_end(parser);
} else {
// Otherwise, we'll add the newline to the list of newlines.
- pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
}
uint8_t delimiter = *parser->current.end;
@@ -10639,6 +9989,12 @@ parser_lex(pm_parser_t *parser) {
unsigned int semantic_token_seen = parser->semantic_token_seen;
parser->semantic_token_seen = true;
+ // We'll jump to this label when we are about to encounter an EOF.
+ // If we still have lex_modes on the stack, we pop them so that cleanup
+ // can happen. For example, we should still continue parsing after a heredoc
+ // identifier, even if the heredoc body was syntax invalid.
+ switch_lex_modes:
+
switch (parser->lex_modes.current->mode) {
case PM_LEX_DEFAULT:
case PM_LEX_EMBEXPR:
@@ -10661,22 +10017,29 @@ parser_lex(pm_parser_t *parser) {
bool space_seen = false;
// First, we're going to skip past any whitespace at the front of the next
- // token.
+ // token. Skip runs of inline whitespace in bulk to avoid per-character
+ // stores back to parser->current.end.
bool chomping = true;
while (parser->current.end < parser->end && chomping) {
- switch (*parser->current.end) {
- case ' ':
- case '\t':
- case '\f':
- case '\v':
- parser->current.end++;
+ {
+ static const uint8_t inline_whitespace[256] = {
+ [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
+ };
+ const uint8_t *scan = parser->current.end;
+ while (scan < parser->end && inline_whitespace[*scan]) scan++;
+ if (scan > parser->current.end) {
+ parser->current.end = scan;
space_seen = true;
- break;
+ continue;
+ }
+ }
+
+ switch (*parser->current.end) {
case '\r':
if (match_eol_offset(parser, 1)) {
chomping = false;
} else {
- pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
+ pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
parser->current.end++;
space_seen = true;
}
@@ -10689,7 +10052,7 @@ parser_lex(pm_parser_t *parser) {
parser->heredoc_end = NULL;
} else {
parser->current.end += eol_length + 1;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
space_seen = true;
}
} else if (pm_char_is_inline_whitespace(*parser->current.end)) {
@@ -10712,6 +10075,14 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
+ // We may be missing closing tokens. We should pop modes one by one
+ // to do the appropriate cleanup like moving next_start for heredocs.
+ // Only when no mode is remaining will we actually emit the EOF token.
+ if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+ lex_mode_pop(parser);
+ goto switch_lex_modes;
+ }
+
// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
@@ -10783,7 +10154,7 @@ parser_lex(pm_parser_t *parser) {
}
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}
}
@@ -10841,14 +10212,50 @@ parser_lex(pm_parser_t *parser) {
following = next_newline(following, parser->end - following);
}
- // If the lex state was ignored, or we hit a '.' or a '&.',
- // we will lex the ignored newline
+ // If the lex state was ignored, we will lex the
+ // ignored newline.
+ if (lex_state_ignored_p(parser)) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+ // If we hit a '.' or a '&.' we will lex the ignored
+ // newline.
+ if (following && (
+ (peek_at(parser, following) == '.') ||
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+ )) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+
+ // If we are parsing as CRuby 4.0 or later and we
+ // hit a '&&' or a '||' then we will lex the ignored
+ // newline.
if (
- lex_state_ignored_p(parser) ||
- (following && (
- (peek_at(parser, following) == '.') ||
- (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
- ))
+ (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
+ following && (
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+ (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+ (
+ peek_at(parser, following) == 'a' &&
+ peek_at(parser, following + 1) == 'n' &&
+ peek_at(parser, following + 2) == 'd' &&
+ peek_at(parser, next_content + 3) != '!' &&
+ peek_at(parser, next_content + 3) != '?' &&
+ !char_is_identifier(parser, following + 3, parser->end - (following + 3))
+ ) ||
+ (
+ peek_at(parser, following) == 'o' &&
+ peek_at(parser, following + 1) == 'r' &&
+ peek_at(parser, next_content + 2) != '!' &&
+ peek_at(parser, next_content + 2) != '?' &&
+ !char_is_identifier(parser, following + 2, parser->end - (following + 2))
+ )
+ )
) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
@@ -10888,6 +10295,67 @@ parser_lex(pm_parser_t *parser) {
parser->next_start = NULL;
LEX(PM_TOKEN_AMPERSAND_DOT);
}
+
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ // If we hit an && then we are in a logical chain
+ // and we need to return the logical operator.
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+ }
+
+ // If we hit a || then we are in a logical chain and
+ // we need to return the logical operator.
+ if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_PIPE_PIPE);
+ }
+
+ // If we hit an 'and' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'a' &&
+ peek_at(parser, next_content + 1) == 'n' &&
+ peek_at(parser, next_content + 2) == 'd' &&
+ peek_at(parser, next_content + 3) != '!' &&
+ peek_at(parser, next_content + 3) != '?' &&
+ !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 3;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_AND);
+ }
+
+ // If we hit a 'or' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'o' &&
+ peek_at(parser, next_content + 1) == 'r' &&
+ peek_at(parser, next_content + 2) != '!' &&
+ peek_at(parser, next_content + 2) != '?' &&
+ !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_OR);
+ }
+ }
}
// At this point we know this is a regular newline, and we can set the
@@ -10902,7 +10370,7 @@ parser_lex(pm_parser_t *parser) {
// ,
case ',':
if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
}
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
@@ -11028,7 +10496,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR_STAR;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11053,7 +10521,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11179,7 +10647,7 @@ parser_lex(pm_parser_t *parser) {
bool ident_error = false;
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
- pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+ pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
ident_error = true;
}
@@ -11212,7 +10680,7 @@ parser_lex(pm_parser_t *parser) {
} else {
// Otherwise, we want to indicate that the body of the
// heredoc starts on the character after the next newline.
- pm_newline_list_append(&parser->newline_list, body_start);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
body_start++;
}
@@ -11231,7 +10699,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
}
if (lex_state_operator_p(parser)) {
@@ -11357,7 +10825,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_UAMPERSAND;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11433,7 +10901,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
}
lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11474,7 +10942,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
}
lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11573,7 +11041,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
}
if (lex_state_operator_p(parser)) {
@@ -11758,7 +11226,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
}
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
@@ -11794,40 +11262,40 @@ parser_lex(pm_parser_t *parser) {
// token after adding an appropriate error message.
if (!width) {
if (*parser->current.start >= 0x80) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
} else if (*parser->current.start == '\\') {
switch (peek_at(parser, parser->current.start + 1)) {
case ' ':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
break;
case '\f':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
break;
case '\t':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
break;
case '\v':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
break;
case '\r':
if (peek_at(parser, parser->current.start + 2) != '\n') {
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
break;
}
PRISM_FALLTHROUGH
default:
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
break;
}
} else if (char_is_ascii_printable(*parser->current.start)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
} else {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
}
goto lex_next_token;
@@ -11853,15 +11321,15 @@ parser_lex(pm_parser_t *parser) {
// correct column information for it.
const uint8_t *cursor = parser->current.end;
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
- pm_newline_list_append(&parser->newline_list, cursor++);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
}
parser->current.end = parser->end;
parser->current.type = PM_TOKEN___END__;
parser_lex_callback(parser);
- parser->data_loc.start = parser->current.start;
- parser->data_loc.end = parser->current.end;
+ parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
+ parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
LEX(PM_TOKEN_EOF);
}
@@ -11886,7 +11354,7 @@ parser_lex(pm_parser_t *parser) {
!(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
(type == PM_TOKEN_IDENTIFIER) &&
((pm_parser_local_depth(parser, &parser->current) != -1) ||
- pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
+ pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
) {
lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
}
@@ -11914,7 +11382,7 @@ parser_lex(pm_parser_t *parser) {
whitespace += 1;
}
} else {
- whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
+ whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}
if (whitespace > 0) {
@@ -12029,7 +11497,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12057,7 +11525,7 @@ parser_lex(pm_parser_t *parser) {
if (*breakpoint == '#') {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had something
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
@@ -12162,7 +11630,13 @@ parser_lex(pm_parser_t *parser) {
size_t eol_length = match_eol_at(parser, breakpoint);
if (eol_length) {
parser->current.end = breakpoint + eol_length;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+ }
} else {
parser->current.end = breakpoint + 1;
}
@@ -12208,7 +11682,7 @@ parser_lex(pm_parser_t *parser) {
// If we've hit a newline, then we need to track that in
// the list of newlines.
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
break;
@@ -12256,7 +11730,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12303,7 +11777,7 @@ parser_lex(pm_parser_t *parser) {
// interpolation.
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had
// something that looked like an interpolated class or
// instance variable like "#@" but wasn't actually. In
@@ -12416,7 +11890,13 @@ parser_lex(pm_parser_t *parser) {
size_t eol_length = match_eol_at(parser, breakpoint);
if (eol_length) {
parser->current.end = breakpoint + eol_length;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+ }
} else {
parser->current.end = breakpoint + 1;
}
@@ -12428,6 +11908,13 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_LABEL_END);
}
+ // When the delimiter itself is a newline, we won't
+ // get a chance to flush heredocs in the usual places since
+ // the newline is already consumed.
+ if (term == '\n' && parser->heredoc_end) {
+ parser_flush_heredoc_end(parser);
+ }
+
lex_state_set(parser, PM_LEX_STATE_END);
lex_mode_pop(parser);
LEX(PM_TOKEN_STRING_END);
@@ -12460,7 +11947,7 @@ parser_lex(pm_parser_t *parser) {
// for the terminator in case the terminator is a
// newline character.
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
@@ -12514,7 +12001,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12543,7 +12030,7 @@ parser_lex(pm_parser_t *parser) {
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had something that
// looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next
@@ -12643,7 +12130,7 @@ parser_lex(pm_parser_t *parser) {
(memcmp(terminator_start, ident_start, ident_length) == 0)
) {
if (newline != NULL) {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
}
parser->current.end = terminator_end;
@@ -12674,7 +12161,7 @@ parser_lex(pm_parser_t *parser) {
// Otherwise we'll be parsing string content. These are the places
// where we need to split up the content of the heredoc. We'll use
// strpbrk to find the first of these characters.
- uint8_t breakpoints[] = "\r\n\\#";
+ uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
@@ -12715,7 +12202,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
}
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
// If we have a - or ~ heredoc, then we can match after
// some leading whitespace.
@@ -12833,7 +12320,10 @@ parser_lex(pm_parser_t *parser) {
// string content.
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
const uint8_t *end = parser->current.end;
- pm_newline_list_append(&parser->newline_list, end);
+
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
+ }
// Here we want the buffer to only
// include up to the backslash.
@@ -12864,7 +12354,7 @@ parser_lex(pm_parser_t *parser) {
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had
// something that looked like an interpolated class
// or instance variable like "#@" but wasn't
@@ -13089,7 +12579,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
/**
* Returns true if the current token is of the given type.
*/
-static inline bool
+static PRISM_INLINE bool
match1(const pm_parser_t *parser, pm_token_type_t type) {
return parser->current.type == type;
}
@@ -13097,7 +12587,7 @@ match1(const pm_parser_t *parser, pm_token_type_t type) {
/**
* Returns true if the current token is of either of the given types.
*/
-static inline bool
+static PRISM_INLINE bool
match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
return match1(parser, type1) || match1(parser, type2);
}
@@ -13105,7 +12595,7 @@ match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2)
/**
* Returns true if the current token is any of the three given types.
*/
-static inline bool
+static PRISM_INLINE bool
match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
}
@@ -13113,15 +12603,23 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
/**
* Returns true if the current token is any of the four given types.
*/
-static inline bool
+static PRISM_INLINE bool
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
}
/**
+ * Returns true if the current token is any of the six given types.
+ */
+static PRISM_INLINE bool
+match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
+}
+
+/**
* Returns true if the current token is any of the seven given types.
*/
-static inline bool
+static PRISM_INLINE bool
match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
}
@@ -13129,20 +12627,12 @@ match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
/**
* Returns true if the current token is any of the eight given types.
*/
-static inline bool
+static PRISM_INLINE bool
match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
}
/**
- * Returns true if the current token is any of the nine given types.
- */
-static inline bool
-match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
-}
-
-/**
* If the current token is of the specified type, lex forward by one token and
* return true. Otherwise, return false. For example:
*
@@ -13161,7 +12651,7 @@ accept1(pm_parser_t *parser, pm_token_type_t type) {
* If the current token is either of the two given types, lex forward by one
* token and return true. Otherwise return false.
*/
-static inline bool
+static PRISM_INLINE bool
accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
if (match2(parser, type1, type2)) {
parser_lex(parser);
@@ -13186,10 +12676,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
if (accept1(parser, type)) return;
const uint8_t *location = parser->previous.end;
- pm_parser_err(parser, location, location, diag_id);
+ pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
/**
@@ -13201,10 +12691,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
if (accept2(parser, type1, type2)) return;
const uint8_t *location = parser->previous.end;
- pm_parser_err(parser, location, location, diag_id);
+ pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
/**
@@ -13218,20 +12708,43 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide
} else {
pm_parser_err_heredoc_term(parser, ident_start, ident_length);
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
}
+/**
+ * A special expect1 that attaches the error to the opening token location
+ * rather than the current position. This is useful for errors about missing
+ * closing tokens, where we want to point to the line with the opening token
+ * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file.
+ */
+static void
+expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
+ if (accept1(parser, type)) return;
+
+ const uint8_t *start = opening->start;
+ pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
+
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
+}
+
+/** Flags for controlling expression parsing behavior. */
+#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
+#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
+#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
+#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
+
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
/**
* This is a wrapper of parse_expression, which also checks whether the
* resulting node is a value expression.
*/
static pm_node_t *
-parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
- pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
pm_assert_value_expression(parser, node);
return node;
}
@@ -13254,7 +12767,7 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bo
* work in all cases, it may need to be refactored later. But it appears to work
* for now.
*/
-static inline bool
+static PRISM_INLINE bool
token_begins_expression_p(pm_token_type_t type) {
switch (type) {
case PM_TOKEN_EQUAL_GREATER:
@@ -13270,6 +12783,7 @@ token_begins_expression_p(pm_token_type_t type) {
case PM_TOKEN_EOF:
case PM_TOKEN_LAMBDA_BEGIN:
case PM_TOKEN_KEYWORD_DO:
+ case PM_TOKEN_KEYWORD_DO_BLOCK:
case PM_TOKEN_KEYWORD_DO_LOOP:
case PM_TOKEN_KEYWORD_END:
case PM_TOKEN_KEYWORD_ELSE:
@@ -13315,14 +12829,89 @@ token_begins_expression_p(pm_token_type_t type) {
* prefixed by the * operator.
*/
static pm_node_t *
-parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ return UP(pm_splat_node_create(parser, &operator, expression));
}
- return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
+ return parse_value_expression(parser, binding_power, flags, diag_id, depth);
+}
+
+static bool
+pm_node_unreference_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ /* When we are about to destroy a set of nodes that could potentially
+ * contain block exits for the current scope, we need to check if they
+ * are contained in the list of block exits and remove them if they are.
+ */
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ case PM_REDO_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ size_t index = 0;
+
+ while (index < parser->current_block_exits->size) {
+ pm_node_t *block_exit = parser->current_block_exits->nodes[index];
+
+ if (block_exit == node) {
+ if (index + 1 < parser->current_block_exits->size) {
+ memmove(
+ &parser->current_block_exits->nodes[index],
+ &parser->current_block_exits->nodes[index + 1],
+ (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
+ );
+ }
+ parser->current_block_exits->size--;
+
+ /* Note returning true here because these nodes could have
+ * arguments that are themselves block exits. */
+ return true;
+ }
+
+ index++;
+ }
+
+ return true;
+ }
+ /* When an implicit local variable is written to or targeted, it becomes
+ * a regular, named local variable. This branch removes it from the list
+ * of implicit parameters when that happens. */
+ case PM_LOCAL_VARIABLE_READ_NODE:
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ if (implicit_parameters->nodes[index] == node) {
+ /* If the node is not the last one in the list, we need to
+ * shift the remaining nodes down to fill the gap. This is
+ * extremely unlikely to happen. */
+ if (index != implicit_parameters->size - 1) {
+ memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+ }
+
+ implicit_parameters->size--;
+ break;
+ }
+ }
+
+ return false;
+ }
+ default:
+ return true;
+ }
+}
+
+/**
+ * When we are about to destroy a set of nodes that could potentially be
+ * referenced by one or more lists on the parser, then remove them from those
+ * lists so we don't get a use-after-free.
+ */
+static void
+pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, pm_node_unreference_each, parser);
}
/**
@@ -13337,16 +12926,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
// append an =.
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
size_t length = constant->length;
- uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
- if (name == NULL) return;
+ uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
memcpy(name, constant->start, length);
name[length] = '=';
- // Now switch the name to the new string.
- // This silences clang analyzer warning about leak of memory pointed by `name`.
- // NOLINTNEXTLINE(clang-analyzer-*)
- *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
+ *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
}
/**
@@ -13368,35 +12953,10 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
default: break;
}
- pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
- pm_node_destroy(parser, target);
- return (pm_node_t *) result;
-}
-
-/**
- * When an implicit local variable is written to or targeted, it becomes a
- * regular, named local variable. This function removes it from the list of
- * implicit parameters when that happens.
- */
-static void
-parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
- pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
-
- for (size_t index = 0; index < implicit_parameters->size; index++) {
- if (implicit_parameters->nodes[index] == node) {
- // If the node is not the last one in the list, we need to shift the
- // remaining nodes down to fill the gap. This is extremely unlikely
- // to happen.
- if (index != implicit_parameters->size - 1) {
- memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
- }
-
- implicit_parameters->size--;
- break;
- }
- }
+ return UP(result);
}
/**
@@ -13410,7 +12970,7 @@ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
static pm_node_t *
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
switch (PM_NODE_TYPE(target)) {
- case PM_MISSING_NODE:
+ case PM_ERROR_RECOVERY_NODE:
return target;
case PM_SOURCE_ENCODING_NODE:
case PM_FALSE_NODE:
@@ -13448,15 +13008,15 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
- return target;
+ return UP(pm_error_recovery_node_create_unexpected(parser, target));
case PM_GLOBAL_VARIABLE_READ_NODE:
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
return target;
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
- parse_target_implicit_parameter(parser, target);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
+ pm_node_unreference(parser, target);
}
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
@@ -13471,10 +13031,9 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
- pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
+ pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
- parse_target_implicit_parameter(parser, target);
- pm_node_destroy(parser, target);
+ pm_node_unreference(parser, target);
return node;
}
@@ -13497,7 +13056,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
splat->expression = parse_target(parser, splat->expression, multiple, true);
}
- return (pm_node_t *) splat;
+ return UP(splat);
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13506,10 +13065,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// target then this is either a method call or a local variable
// write.
if (
- (call->message_loc.start != NULL) &&
- (call->message_loc.end[-1] != '!') &&
- (call->message_loc.end[-1] != '?') &&
- (call->opening_loc.start == NULL) &&
+ (call->message_loc.length > 0) &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+ (call->opening_loc.length == 0) &&
(call->arguments == NULL) &&
(call->block == NULL)
) {
@@ -13523,21 +13082,19 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
- const pm_location_t message_loc = call->message_loc;
-
- pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
- pm_node_destroy(parser, target);
+ pm_location_t message_loc = call->message_loc;
+ pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
- return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
+ return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
}
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
}
parse_write_name(parser, &call->name);
- return (pm_node_t *) pm_call_target_node_create(parser, call);
+ return UP(pm_call_target_node_create(parser, call));
}
}
@@ -13545,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// an aref expression, and we can transform it into an aset
// expression.
if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
- return (pm_node_t *) pm_index_target_node_create(parser, call);
+ return UP(pm_index_target_node_create(parser, call));
}
}
PRISM_FALLTHROUGH
@@ -13588,7 +13145,7 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
- return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
+ return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
}
return write;
@@ -13600,16 +13157,14 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
static pm_node_t *
parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
switch (PM_NODE_TYPE(target)) {
- case PM_MISSING_NODE:
- pm_node_destroy(parser, value);
+ case PM_ERROR_RECOVERY_NODE:
return target;
case PM_CLASS_VARIABLE_READ_NODE: {
pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
- pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_CONSTANT_PATH_NODE: {
- pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
+ pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
if (context_def_p(parser)) {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
@@ -13618,13 +13173,12 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
return parse_shareable_constant_write(parser, node);
}
case PM_CONSTANT_READ_NODE: {
- pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
+ pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
if (context_def_p(parser)) {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
}
- pm_node_destroy(parser, target);
return parse_shareable_constant_write(parser, node);
}
case PM_BACK_REFERENCE_READ_NODE:
@@ -13633,45 +13187,40 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
- pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_LOCAL_VARIABLE_READ_NODE: {
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
+ pm_location_t location = target->location;
pm_constant_id_t name = local_read->name;
- pm_location_t name_loc = target->location;
-
uint32_t depth = local_read->depth;
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
- if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
- PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
- parse_target_implicit_parameter(parser, target);
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
+ pm_node_unreference(parser, target);
}
pm_locals_unread(&scope->locals, name);
- pm_node_destroy(parser, target);
- return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
+ return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
- pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
+ pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
- parse_target_implicit_parameter(parser, target);
- pm_node_destroy(parser, target);
+ pm_node_unreference(parser, target);
return node;
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
- pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
- pm_node_destroy(parser, target);
+ pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
return write_node;
}
case PM_MULTI_TARGET_NODE:
- return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
+ return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
case PM_SPLAT_NODE: {
pm_splat_node_t *splat = (pm_splat_node_t *) target;
@@ -13680,9 +13229,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
}
pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
+ pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
- return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
+ return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13691,10 +13240,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// target then this is either a method call or a local variable
// write.
if (
- (call->message_loc.start != NULL) &&
- (call->message_loc.end[-1] != '!') &&
- (call->message_loc.end[-1] != '?') &&
- (call->opening_loc.start == NULL) &&
+ (call->message_loc.length > 0) &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+ (call->opening_loc.length == 0) &&
(call->arguments == NULL) &&
(call->block == NULL)
) {
@@ -13708,19 +13257,18 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
- const pm_location_t message = call->message_loc;
+ pm_location_t message_loc = call->message_loc;
- pm_parser_local_add_location(parser, message.start, message.end, 0);
- pm_node_destroy(parser, target);
+ pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
+ pm_parser_local_add_location(parser, &message_loc, 0);
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
- target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
+ target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
- pm_refute_numbered_parameter(parser, message.start, message.end);
return target;
}
- if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
+ if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
// When we get here, we have a method call, because it was
// previously marked as a method call but now we have an =. This
// looks like:
@@ -13734,13 +13282,14 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
call->arguments = arguments;
- pm_arguments_node_arguments_append(arguments, value);
- call->base.location.end = arguments->base.location.end;
+ pm_arguments_node_arguments_append(parser->arena, arguments, value);
+ PM_NODE_LENGTH_SET_NODE(call, arguments);
+ call->equal_loc = TOK2LOC(parser, operator);
parse_write_name(parser, &call->name);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
- return (pm_node_t *) call;
+ return UP(call);
}
}
@@ -13752,25 +13301,31 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
call->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(call->arguments, value);
- target->location.end = value->location.end;
+ pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
+ PM_NODE_LENGTH_SET_NODE(target, value);
// Replace the name with "[]=".
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
+ call->equal_loc = TOK2LOC(parser, operator);
// Ensure that the arguments for []= don't contain keywords
pm_index_arguments_check(parser, call->arguments, call->block);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
return target;
}
- // If there are arguments on the call node, then it can't be a method
- // call ending with = or a local variable write, so it must be a
- // syntax error. In this case we'll fall through to our default
+ // If there are arguments on the call node, then it can't be a
+ // method call ending with = or a local variable write, so it must
+ // be a syntax error. In this case we'll fall through to our default
// handling. We need to free the value that we parsed because there
// is no way for us to attach it to the tree at this point.
- pm_node_destroy(parser, value);
+ //
+ // Since it is possible for the value to contain an implicit
+ // parameter somewhere in its subtree, we need to walk it and remove
+ // any implicit parameters from the list of implicit parameters for
+ // the current scope.
+ pm_node_unreference(parser, value);
}
PRISM_FALLTHROUGH
default:
@@ -13801,11 +13356,10 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t
default: break;
}
- pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
+ pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
- pm_node_destroy(parser, target);
- return (pm_node_t *) result;
+ return UP(result);
}
/**
@@ -13838,35 +13392,35 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
name = parse_target(parser, name, true, true);
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
pm_multi_target_node_targets_append(parser, result, splat);
has_rest = true;
} else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
context_push(parser, PM_CONTEXT_MULTI_TARGET);
- pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
context_pop(parser);
} else if (token_begins_expression_p(parser->current.type)) {
- pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
} else if (!match1(parser, PM_TOKEN_EOF)) {
// If we get here, then we have a trailing , in a multi target node.
// We'll add an implicit rest node to represent this.
- pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, result, rest);
break;
}
}
- return (pm_node_t *) result;
+ return UP(result);
}
/**
@@ -13876,7 +13430,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
static pm_node_t *
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
- accept1(parser, PM_TOKEN_NEWLINE);
+
+ // If we're inside parentheses, then we allow a newline before the
+ // closing parenthesis or equals sign. Outside of parentheses, a newline
+ // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
+ if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ }
// Ensure that we have either an = or a ) after the targets.
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -13905,7 +13465,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
context_push(parser, context);
while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
pm_statements_node_body_append(parser, statements, node, true);
// If we're recovering from a syntax error, then we need to stop parsing
@@ -13945,7 +13505,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
// we were unable to parse an expression, then we will skip past this
// token and continue parsing the statements list. Otherwise we'll add
// an error and continue parsing the statements list.
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
+ if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
parser_lex(parser);
// If we are at the end of the file, then we need to stop parsing
@@ -13963,13 +13523,14 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
// This is an inlined version of accept1 because the error that we
// want to add has varargs. If this happens again, we should
// probably extract a helper function.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
}
context_pop(parser);
+
bool last_value = true;
switch (context) {
case PM_CONTEXT_BEGIN_ENSURE:
@@ -13990,23 +13551,24 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
*/
static void
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
if (duplicated != NULL) {
pm_buffer_t buffer = { 0 };
- pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
+ pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
pm_diagnostic_list_append_format(
+ &parser->metadata_arena,
&parser->warning_list,
duplicated->location.start,
- duplicated->location.end,
+ duplicated->location.length,
PM_WARN_DUPLICATED_HASH_KEY,
(int) pm_buffer_length(&buffer),
pm_buffer_value(&buffer),
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
);
- pm_buffer_free(&buffer);
+ pm_buffer_cleanup(&buffer);
}
}
@@ -14018,14 +13580,15 @@ static void
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
pm_node_t *previous;
- if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
+ if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
pm_diagnostic_list_append_format(
+ &parser->metadata_arena,
&parser->warning_list,
- node->location.start,
- node->location.end,
+ PM_NODE_START(node),
+ PM_NODE_LENGTH(node),
PM_WARN_DUPLICATED_WHEN_CLAUSE,
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
- pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
);
}
}
@@ -14053,14 +13616,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
// inner hash to share the static literals with the outer
// hash.
parser->current_hash_keys = literals;
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
} else if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
} else {
pm_parser_scope_forwarding_keywords_check(parser, &operator);
}
- element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ element = UP(pm_assoc_splat_node_create(parser, value, &operator));
contains_keyword_splat = true;
break;
}
@@ -14068,44 +13631,43 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
pm_token_t label = parser->current;
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
pm_hash_key_static_literals_add(parser, literals, key);
- pm_token_t operator = not_provided(parser);
pm_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
} else {
if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
- value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+ value = UP(pm_constant_read_node_create(parser, &constant));
} else {
int depth = -1;
pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
} else {
depth = pm_parser_local_depth(parser, &identifier);
}
if (depth == -1) {
- value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
+ value = UP(pm_call_node_variable_call_create(parser, &identifier));
} else {
- value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
+ value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
}
}
- value->location.end++;
- value = (pm_node_t *) pm_implicit_node_create(parser, value);
+ value->location.length++;
+ value = UP(pm_implicit_node_create(parser, value));
}
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ element = UP(pm_assoc_node_create(parser, key, NULL, value));
break;
}
default: {
- pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
// Hash keys that are strings are automatically frozen. We will
// mark that here.
@@ -14115,24 +13677,22 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
pm_hash_key_static_literals_add(parser, literals, key);
- pm_token_t operator;
- if (pm_symbol_node_label_p(key)) {
- operator = not_provided(parser);
- } else {
+ pm_token_t operator = { 0 };
+ if (!pm_symbol_node_label_p(parser, key)) {
expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
operator = parser->previous;
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
break;
}
}
if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
- pm_hash_node_elements_append((pm_hash_node_t *) node, element);
+ pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
} else {
- pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
+ pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
}
// If there's no comma after the element, then we're done.
@@ -14153,23 +13713,47 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
return contains_keyword_splat;
}
+static PRISM_INLINE bool
+argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
+ if (pm_symbol_node_label_p(parser, argument)) {
+ return true;
+ }
+
+ switch (PM_NODE_TYPE(argument)) {
+ case PM_CALL_NODE: {
+ pm_call_node_t *cast = (pm_call_node_t *) argument;
+ if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
+ if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+ return false;
+ }
+ if (cast->block != NULL) {
+ return false;
+ }
+ }
+ break;
+ }
+ default: break;
+ }
+ return accept1(parser, PM_TOKEN_EQUAL_GREATER);
+}
+
/**
* Append an argument to a list of arguments.
*/
-static inline void
+static PRISM_INLINE void
parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments->arguments, argument);
+ pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
}
/**
* Parse a list of arguments.
*/
static void
-parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
+parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
// First we need to check if the next token is one that could be the start
@@ -14202,16 +13786,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
- argument = (pm_node_t *) hash;
+ argument = UP(hash);
pm_static_literals_t hash_keys = { 0 };
- bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
parse_arguments_append(parser, arguments, argument);
- pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
- if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
- pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+ pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), node_flags);
pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
@@ -14224,12 +13808,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_node_t *expression = NULL;
if (token_begins_expression_p(parser->current.type)) {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
} else {
pm_parser_scope_forwarding_block_check(parser, &operator);
}
- argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
+ argument = UP(pm_block_argument_node_create(parser, &operator, expression));
if (parsed_block_argument) {
parse_arguments_append(parser, arguments, argument);
} else {
@@ -14249,18 +13833,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
pm_parser_scope_forwarding_positionals_check(parser, &operator);
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
+ argument = UP(pm_splat_node_create(parser, &operator, NULL));
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
} else {
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
if (parsed_bare_hash) {
- pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ argument = UP(pm_splat_node_create(parser, &operator, expression));
}
parse_arguments_append(parser, arguments, argument);
@@ -14275,26 +13859,26 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
// not actually argument forwarding but was instead a
// range.
pm_token_t operator = parser->previous;
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// If we parse a range, we need to validate that we
// didn't accidentally violate the nonassoc rules of the
// ... operator.
if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
pm_range_node_t *range = (pm_range_node_t *) right;
- pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+ pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
}
- argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ argument = UP(pm_range_node_create(parser, NULL, &operator, right));
} else {
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
}
- argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
+ argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
parse_arguments_append(parser, arguments, argument);
- pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
+ pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
arguments->has_forwarding = true;
parsed_forwarding_arguments = true;
break;
@@ -14304,22 +13888,20 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
PRISM_FALLTHROUGH
default: {
if (argument == NULL) {
- argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
}
bool contains_keywords = false;
bool contains_keyword_splat = false;
- if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+ if (argument_allowed_for_bare_hash(parser, argument)) {
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
}
- pm_token_t operator;
+ pm_token_t operator = { 0 };
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
- } else {
- operator = not_provided(parser);
}
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
@@ -14330,18 +13912,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
// Finish parsing the one we are part way through.
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
- pm_keyword_hash_node_elements_append(bare_hash, argument);
- argument = (pm_node_t *) bare_hash;
+ pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
+ argument = UP(bare_hash);
// Then parse more if we have a comma
if (accept1(parser, PM_TOKEN_COMMA) && (
token_begins_expression_p(parser->current.type) ||
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
)) {
- contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
}
pm_static_literals_free(&hash_keys);
@@ -14350,10 +13932,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
parse_arguments_append(parser, arguments, argument);
- pm_node_flags_t flags = 0;
- if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
- if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
- pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+ pm_node_flags_t node_flags = 0;
+ if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), node_flags);
break;
}
@@ -14362,7 +13944,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
parsed_first_argument = true;
// If parsing the argument failed, we need to stop parsing arguments.
- if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
+ if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
// If the terminator of these arguments is not EOF, then we have a
// specific token we're looking for. In that case we can accept a
@@ -14382,6 +13964,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
if (accepted_newline) {
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
}
+
+ // If this is a command call and an argument takes a block,
+ // there can be no further arguments. For example,
+ // `foo(bar 1 do end, 2)` should be rejected.
+ if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
+ pm_call_node_t *call = (pm_call_node_t *) argument;
+ if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ break;
+ }
+ }
} else {
// If there is no comma at the end of the argument list then we're
// done parsing arguments and can break out of this loop.
@@ -14409,7 +14002,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
- pm_multi_target_node_opening_set(node, &parser->previous);
+ pm_multi_target_node_opening_set(parser, node, &parser->previous);
do {
pm_node_t *param;
@@ -14419,33 +14012,33 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
// commas, so here we'll assume this is a mistake of the user not
// knowing it's not allowed here.
if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, node, param);
pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
break;
}
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
- param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ param = UP(parse_required_destructured_parameter(parser));
} else if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t star = parser->previous;
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
pm_token_t name = parser->previous;
- value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ value = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(value);
}
pm_parser_local_add_token(parser, &name, 1);
}
- param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
+ param = UP(pm_splat_node_create(parser, &star, value));
} else {
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
pm_token_t name = parser->previous;
- param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ param = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14457,7 +14050,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
- pm_multi_target_node_closing_set(node, &parser->previous);
+ pm_multi_target_node_closing_set(parser, node, &parser->previous);
return node;
}
@@ -14533,6 +14126,43 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
return true;
}
+static PRISM_INLINE void
+parse_parameters_handle_trailing_comma(
+ pm_parser_t *parser,
+ pm_parameters_node_t *params,
+ pm_parameters_order_t order,
+ bool in_block,
+ bool allows_trailing_comma
+) {
+ if (!allows_trailing_comma) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ return;
+ }
+
+ if (in_block) {
+ if (order >= PM_PARAMETERS_ORDER_NAMED) {
+ // foo do |bar,|; end
+ pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+
+ if (params->rest == NULL) {
+ pm_parameters_node_rest_set(params, param);
+ } else {
+ pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
+ pm_parameters_node_posts_append(parser->arena, params, UP(param));
+ }
+ } else {
+ // foo do |*bar,|; end
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ }
+ } else {
+ // https://bugs.ruby-lang.org/issues/19107
+ // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ }
+ }
+}
+
/**
* Parse a list of parameters on a method definition.
*/
@@ -14545,6 +14175,7 @@ parse_parameters(
bool allows_forwarding_parameters,
bool accepts_blocks_in_defaults,
bool in_block,
+ pm_diagnostic_id_t diag_id_forwarding,
uint16_t depth
) {
pm_do_loop_stack_push(parser, false);
@@ -14558,12 +14189,12 @@ parse_parameters(
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
update_parameter_state(parser, &parser->current, &order);
- pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ pm_node_t *param = UP(parse_required_destructured_parameter(parser));
if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
- pm_parameters_node_requireds_append(params, param);
+ pm_parameters_node_requireds_append(parser->arena, params, param);
} else {
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, param);
}
break;
}
@@ -14573,34 +14204,40 @@ parse_parameters(
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_token_t name;
+ pm_node_t *param;
- bool repeated = false;
- if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
- name = parser->previous;
- repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name, 1);
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+ param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
} else {
- name = not_provided(parser);
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
- }
+ pm_token_t name = {0};
- pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
- if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ bool repeated = false;
+ if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+ name = parser->previous;
+ repeated = pm_parser_parameter_name_check(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
+ } else {
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
+ }
+
+ param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
+ if (repeated) {
+ pm_node_flag_set_repeated_parameter(param);
+ }
}
+
if (params->block == NULL) {
pm_parameters_node_block_set(params, param);
} else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
}
break;
}
case PM_TOKEN_UDOT_DOT_DOT: {
if (!allows_forwarding_parameters) {
- pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+ pm_parser_err_current(parser, diag_id_forwarding);
}
bool succeeded = update_parameter_state(parser, &parser->current, &order);
@@ -14613,12 +14250,12 @@ parse_parameters(
// If we already have a keyword rest parameter, then we replace it with the
// forwarding parameter and move the keyword rest parameter to the posts list.
pm_node_t *keyword_rest = params->keyword_rest;
- pm_parameters_node_posts_append(params, keyword_rest);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
params->keyword_rest = NULL;
}
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
+ pm_parameters_node_keyword_rest_set(params, UP(param));
break;
}
case PM_TOKEN_CLASS_VARIABLE:
@@ -14663,24 +14300,24 @@ parse_parameters(
parser_lex(parser);
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
- pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *) param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_optionals_append(params, param);
+ pm_parameters_node_optionals_append(parser->arena, params, param);
// If the value of the parameter increased the number of
// reads of that parameter, then we need to warn that we
// have a circular definition.
- if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
+ if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
}
context_pop(parser);
@@ -14695,15 +14332,15 @@ parse_parameters(
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_requireds_append(params, (pm_node_t *) param);
+ pm_parameters_node_requireds_append(parser->arena, params, UP(param));
} else {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parameters_node_posts_append(parser->arena, params, UP(param));
}
break;
@@ -14720,9 +14357,9 @@ parse_parameters(
local.end -= 1;
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
- pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
}
bool repeated = pm_parser_parameter_name_check(parser, &local);
@@ -14734,12 +14371,12 @@ parse_parameters(
case PM_TOKEN_PIPE: {
context_pop(parser);
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
break;
}
case PM_TOKEN_SEMICOLON:
@@ -14751,12 +14388,12 @@ parse_parameters(
break;
}
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
break;
}
default: {
@@ -14764,20 +14401,20 @@ parse_parameters(
if (token_begins_expression_p(parser->current.type)) {
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
- pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
}
- param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
+ param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
}
else {
- param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ param = UP(pm_required_keyword_parameter_node_create(parser, &name));
}
if (repeated) {
@@ -14785,7 +14422,7 @@ parse_parameters(
}
context_pop(parser);
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
@@ -14806,7 +14443,7 @@ parse_parameters(
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_token_t name;
+ pm_token_t name = { 0 };
bool repeated = false;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14814,11 +14451,10 @@ parse_parameters(
repeated = pm_parser_parameter_name_check(parser, &name);
pm_parser_local_add_token(parser, &name, 1);
} else {
- name = not_provided(parser);
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
}
- pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
+ pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14827,7 +14463,7 @@ parse_parameters(
pm_parameters_node_rest_set(params, param);
} else {
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, param);
}
break;
@@ -14846,9 +14482,9 @@ parse_parameters(
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
}
- param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
} else {
- pm_token_t name;
+ pm_token_t name = { 0 };
bool repeated = false;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14856,11 +14492,10 @@ parse_parameters(
repeated = pm_parser_parameter_name_check(parser, &name);
pm_parser_local_add_token(parser, &name, 1);
} else {
- name = not_provided(parser);
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
}
- param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
+ param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14870,27 +14505,14 @@ parse_parameters(
pm_parameters_node_keyword_rest_set(params, param);
} else {
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
}
break;
}
default:
if (parser->previous.type == PM_TOKEN_COMMA) {
- if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
- // If we get here, then we have a trailing comma in a
- // block parameter list.
- pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
-
- if (params->rest == NULL) {
- pm_parameters_node_rest_set(params, param);
- } else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
- }
- } else {
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
- }
+ parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
}
parsing = false;
@@ -14922,8 +14544,7 @@ parse_parameters(
pm_do_loop_stack_pop(parser);
// If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
- if (params->base.location.start == params->base.location.end) {
- pm_node_destroy(parser, (pm_node_t *) params);
+ if (PM_NODE_START(params) == PM_NODE_END(params)) {
return NULL;
}
@@ -14940,13 +14561,13 @@ token_newline_index(const pm_parser_t *parser) {
// This is the common case. In this case we can look at the previously
// recorded newline in the newline list and subtract from the current
// offset.
- return parser->newline_list.size - 1;
+ return parser->line_offsets.size - 1;
} else {
// This is unlikely. This is the case that we have already parsed the
// start of a heredoc, so we cannot rely on looking at the previous
// offset of the newline list, and instead must go through the whole
// process of a binary search for the line number.
- return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
+ return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
}
}
@@ -14956,7 +14577,7 @@ token_newline_index(const pm_parser_t *parser) {
*/
static int64_t
token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
- const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
+ const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
const uint8_t *end = token->start;
// Skip over the BOM if it is present.
@@ -15020,8 +14641,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind
// Otherwise, add a warning.
PM_PARSER_WARN_FORMAT(
parser,
- closing_token->start,
- closing_token->end,
+ PM_TOKEN_START(parser, closing_token),
+ PM_TOKEN_LENGTH(closing_token),
PM_WARN_INDENTATION_MISMATCH,
(int) (closing_token->end - closing_token->start),
(const char *) closing_token->start,
@@ -15045,7 +14666,7 @@ typedef enum {
* Parse any number of rescue clauses. This will form a linked list of if
* nodes pointing to each other from the top.
*/
-static inline void
+static PRISM_INLINE void
parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
pm_rescue_node_t *current = NULL;
@@ -15061,9 +14682,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// we're going to have an empty list of exceptions to rescue (which
// implies StandardError).
parser_lex(parser);
- pm_rescue_node_operator_set(rescue, &parser->previous);
+ pm_rescue_node_operator_set(parser, rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
@@ -15072,8 +14693,8 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
case PM_TOKEN_NEWLINE:
case PM_TOKEN_SEMICOLON:
case PM_TOKEN_KEYWORD_THEN:
- // Here we have a terminator for the rescue keyword, in which case we're
- // going to just continue on.
+ // Here we have a terminator for the rescue keyword, in which
+ // case we're going to just continue on.
break;
default: {
if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
@@ -15082,7 +14703,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
do {
pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
- pm_rescue_node_exceptions_append(rescue, expression);
+ pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
// If we hit a newline, then this is the end of the rescue expression. We
// can continue on to parse the statements.
@@ -15091,9 +14712,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// If we hit a `=>` then we're going to parse the exception variable. Once
// we've done that, we'll break out of the loop and parse the statements.
if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
- pm_rescue_node_operator_set(rescue, &parser->previous);
+ pm_rescue_node_operator_set(parser, rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
@@ -15105,9 +14726,12 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
}
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- accept1(parser, PM_TOKEN_KEYWORD_THEN);
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
+ }
} else {
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
+ rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
}
if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
@@ -15145,11 +14769,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// since we won't know the end until we've found all subsequent
// clauses. This sets the end location on all rescues once we know it.
if (current != NULL) {
- const uint8_t *end_to_set = current->base.location.end;
pm_rescue_node_t *clause = parent_node->rescue_clause;
while (clause != NULL) {
- clause->base.location.end = end_to_set;
+ PM_NODE_LENGTH_SET_NODE(clause, current);
clause = clause->subsequent;
}
}
@@ -15192,7 +14815,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// If we don't have a `current` rescue node, then this is a dangling
// else, and it's an error.
- if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
+ if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
}
if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
@@ -15230,10 +14853,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
if (match1(parser, PM_TOKEN_KEYWORD_END)) {
if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
- pm_begin_node_end_keyword_set(parent_node, &parser->current);
+ pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
} else {
- pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- pm_begin_node_end_keyword_set(parent_node, &end_keyword);
+ pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
+ pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
}
}
@@ -15243,11 +14866,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
*/
static pm_begin_node_t *
parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
- pm_token_t begin_keyword = not_provided(parser);
- pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
-
+ pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
- node->base.location.start = start;
+
+ node->base.location.start = U32(start - parser->start);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
return node;
}
@@ -15266,6 +14889,9 @@ parse_block_parameters(
) {
pm_parameters_node_t *parameters = NULL;
if (!match1(parser, PM_TOKEN_SEMICOLON)) {
+ if (!is_lambda_literal) {
+ context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
+ }
parameters = parse_parameters(
parser,
is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
@@ -15274,12 +14900,16 @@ parse_block_parameters(
false,
accepts_blocks_in_defaults,
true,
+ is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
(uint16_t) (depth + 1)
);
+ if (!is_lambda_literal) {
+ context_pop(parser);
+ }
}
pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
- if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
+ if (opening != NULL) {
accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
@@ -15310,9 +14940,9 @@ parse_block_parameters(
pm_parser_local_add_token(parser, &parser->previous, 1);
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
- if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
+ if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
- pm_block_parameters_node_append_local(block_parameters, local);
+ pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
} while (accept1(parser, PM_TOKEN_COMMA));
}
}
@@ -15392,8 +15022,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
- } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+ } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
} else {
assert(false && "unreachable");
}
@@ -15412,13 +15042,11 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
}
-
- const pm_location_t location = { .start = opening->start, .end = closing->end };
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
+ return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
}
if (it_parameter) {
- return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
+ return UP(pm_it_parameters_node_create(parser, opening, closing));
}
return NULL;
@@ -15450,7 +15078,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
}
- pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+ pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
}
accept1(parser, PM_TOKEN_NEWLINE);
@@ -15458,30 +15086,30 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
if (opening.type == PM_TOKEN_BRACE_LEFT) {
if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
}
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
} else {
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
+ statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
}
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
}
pm_constant_id_list_t locals;
pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
- pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
@@ -15495,42 +15123,54 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
* arguments, or blocks).
*/
static bool
-parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
+parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
+ /* Fast path: if the current token can't begin an expression and isn't
+ * a parenthesis, block opener, or splat/block-pass operator, there are
+ * no arguments to parse. */
+ if (
+ !token_begins_expression_p(parser->current.type) &&
+ !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
+ ) {
+ return false;
+ }
+
bool found = false;
+ bool parsed_command_args = false;
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
found |= true;
- arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->opening_loc = TOK2LOC(parser, &parser->previous);
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->closing_loc = TOK2LOC(parser, &parser->previous);
} else {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
pm_accepts_block_stack_pop(parser);
- arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->closing_loc = TOK2LOC(parser, &parser->previous);
}
- } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
+ } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
found |= true;
+ parsed_command_args = true;
pm_accepts_block_stack_push(parser, false);
// If we get here, then the subsequent token cannot be used as an infix
// operator. In this case we assume the subsequent token is part of an
// argument to this method call.
- parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
// If we have done with the arguments and still not consumed the comma,
// then we have a trailing comma where we need to check whether it is
// allowed or not.
if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
}
pm_accepts_block_stack_pop(parser);
@@ -15549,21 +15189,24 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
} else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
found |= true;
block = parse_block(parser, (uint16_t) (depth + 1));
+ } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
+ found |= true;
+ block = parse_block(parser, (uint16_t) (depth + 1));
}
if (block != NULL) {
if (arguments->block == NULL && !arguments->has_forwarding) {
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
} else {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
if (arguments->block != NULL) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
+ pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
}
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
}
}
}
@@ -15631,6 +15274,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_BLOCK_ENSURE:
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_DEF_ELSE:
case PM_CONTEXT_DEF_ENSURE:
case PM_CONTEXT_DEF_PARAMS:
@@ -15650,7 +15294,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
break;
}
}
- if (in_sclass) {
+ if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
}
}
@@ -15667,6 +15311,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_ELSE:
case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_BLOCK_RESCUE:
case PM_CONTEXT_DEFINED:
case PM_CONTEXT_FOR:
@@ -15676,12 +15321,19 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_LAMBDA_ENSURE:
case PM_CONTEXT_LAMBDA_RESCUE:
case PM_CONTEXT_LOOP_PREDICATE:
- case PM_CONTEXT_POSTEXE:
case PM_CONTEXT_UNTIL:
case PM_CONTEXT_WHILE:
// These are the good cases. We're allowed to have a block exit
// in these contexts.
return;
+ case PM_CONTEXT_POSTEXE:
+ // https://bugs.ruby-lang.org/issues/20409
+ if (context_node->context == PM_CONTEXT_POSTEXE) {
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return;
+ }
+ }
+ PRISM_FALLTHROUGH
case PM_CONTEXT_DEF:
case PM_CONTEXT_DEF_PARAMS:
case PM_CONTEXT_DEF_ELSE:
@@ -15703,7 +15355,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
// block exit to the list of exits for the expression, and
// the node parsing will handle validating it instead.
assert(parser->current_block_exits != NULL);
- pm_node_list_append(parser->current_block_exits, node);
+ pm_node_list_append(parser->arena, parser->current_block_exits, node);
return;
case PM_CONTEXT_BEGIN_ELSE:
case PM_CONTEXT_BEGIN_ENSURE:
@@ -15794,7 +15446,7 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
// However, they could still become valid in a higher level context if
// there is another list above this one. In this case we'll push all of
// the block exits up to the previous list.
- pm_node_list_concat(previous_block_exits, parser->current_block_exits);
+ pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
parser->current_block_exits = previous_block_exits;
} else {
// If we did not match a trailing while/until and this was the last
@@ -15804,11 +15456,11 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
}
}
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
context_push(parser, PM_CONTEXT_PREDICATE);
pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
// Predicates are closed by a term, a "then", or a term and then a "then".
bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -15826,15 +15478,15 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
return predicate;
}
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
pm_node_list_t current_block_exits = { 0 };
pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
pm_token_t keyword = parser->previous;
- pm_token_t then_keyword = not_provided(parser);
+ pm_token_t then_keyword = { 0 };
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_COMPOSITION, context, &then_keyword, (uint16_t) (depth + 1));
pm_statements_node_t *statements = NULL;
if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
@@ -15844,15 +15496,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
- pm_token_t end_keyword = not_provided(parser);
pm_node_t *parent = NULL;
switch (context) {
case PM_CONTEXT_IF:
- parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+ parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
break;
case PM_CONTEXT_UNLESS:
- parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
+ parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
break;
default:
assert(false && "unreachable");
@@ -15866,21 +15517,21 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
if (context == PM_CONTEXT_IF) {
while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
if (parser_end_of_line_p(parser)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
pm_token_t elsif_keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_COMPOSITION, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
pm_accepts_block_stack_push(parser, true);
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+ pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
((pm_if_node_t *) current)->subsequent = elsif;
current = elsif;
}
@@ -15899,13 +15550,13 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
switch (context) {
case PM_CONTEXT_IF:
- ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
+ ((pm_if_node_t *) current)->subsequent = UP(else_node);
break;
case PM_CONTEXT_UNLESS:
((pm_unless_node_t *) parent)->else_clause = else_node;
@@ -15916,7 +15567,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
}
} else {
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
}
// Set the appropriate end location for all of the nodes in the subtree.
@@ -15928,12 +15579,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
while (recursing) {
switch (PM_NODE_TYPE(current)) {
case PM_IF_NODE:
- pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
+ pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
current = ((pm_if_node_t *) current)->subsequent;
recursing = current != NULL;
break;
case PM_ELSE_NODE:
- pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
+ pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
recursing = false;
break;
default: {
@@ -15945,7 +15596,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
break;
}
case PM_CONTEXT_UNLESS:
- pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
+ pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
break;
default:
assert(false && "unreachable");
@@ -15953,8 +15604,6 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
}
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
return parent;
}
@@ -15965,7 +15614,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
- case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
+ case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
@@ -16028,7 +15677,7 @@ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int
* If the encoding was explicitly set through the lexing process, then we need
* to potentially mark the string's flags to indicate how to encode it.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
parse_unescaped_encoding(const pm_parser_t *parser) {
if (parser->explicit_encoding != NULL) {
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
@@ -16060,10 +15709,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^ ^ ^^^^
case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
@@ -16090,7 +15736,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = NULL;
- if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
+ if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
pm_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
@@ -16098,9 +15744,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
parser->brace_nesting = brace_nesting;
lex_state_set(parser, state);
-
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
- pm_token_t closing = parser->previous;
// If this set of embedded statements only contains a single
// statement, then Ruby does not consider it as a possible statement
@@ -16109,7 +15753,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
}
- return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
+ return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
}
// Here the lexer has returned the beginning of an embedded variable.
@@ -16134,42 +15778,42 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
// create a global variable read node.
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
break;
// In this case an nth reference is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
break;
// In this case a global variable is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
break;
// In this case an instance variable is being interpolated.
// We'll create an instance variable read node.
case PM_TOKEN_INSTANCE_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
break;
// In this case a class variable is being interpolated. We'll
// create a class variable read node.
case PM_TOKEN_CLASS_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
break;
// We can hit here if we got an invalid token. In that case
// we'll not attempt to lex this token and instead just return a
// missing node.
default:
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
- variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
break;
}
- return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
+ return UP(pm_embedded_variable_node_create(parser, &operator, variable));
}
default:
parser_lex(parser);
@@ -16197,18 +15841,16 @@ parse_operator_symbol_name(const pm_token_t *name) {
static pm_node_t *
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
const uint8_t *end = parse_operator_symbol_name(&parser->current);
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
parser_lex(parser);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
- pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
+ pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
/**
@@ -16242,13 +15884,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
break;
}
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
if (lex_mode->as.string.interpolation) {
@@ -16256,10 +15896,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
if (match1(parser, PM_TOKEN_STRING_END)) {
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
parser_lex(parser);
+ pm_token_t content = {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->previous.start,
+ .end = parser->previous.start
+ };
- pm_token_t content = not_provided(parser);
- pm_token_t closing = parser->previous;
- return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
+ return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
}
// Now we can parse the first part of the symbol.
@@ -16271,15 +15914,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
- return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
+ return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
}
pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
- if (part) pm_interpolated_symbol_node_append(symbol, part);
+ if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_symbol_node_append(symbol, part);
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
}
}
@@ -16290,8 +15933,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
}
- pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
- return (pm_node_t *) symbol;
+ pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+ return UP(symbol);
}
pm_token_t content;
@@ -16313,13 +15956,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
// interpolated string node, so that's what we'll do here.
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
- pm_token_t bounds = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
- pm_interpolated_symbol_node_append(symbol, part);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
- part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
- pm_interpolated_symbol_node_append(symbol, part);
+ part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
if (next_state != PM_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
@@ -16328,8 +15969,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
parser_lex(parser);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
- pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
- return (pm_node_t *) symbol;
+ pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+ return UP(symbol);
}
} else {
content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
@@ -16346,34 +15987,29 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
}
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
+ return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
}
/**
* Parse an argument to undef which can either be a bare word, a symbol, a
* constant, or an interpolated symbol.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
switch (parser->current.type) {
- case PM_CASE_OPERATOR: {
- const pm_token_t opening = not_provided(parser);
- return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
- }
+ case PM_CASE_OPERATOR:
+ return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
case PM_CASE_KEYWORD:
case PM_TOKEN_CONSTANT:
case PM_TOKEN_IDENTIFIER:
case PM_TOKEN_METHOD_NAME: {
parser_lex(parser);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16383,7 +16019,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
}
default:
pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
}
@@ -16393,13 +16029,11 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
* we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM
* between the first and second arguments.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
switch (parser->current.type) {
- case PM_CASE_OPERATOR: {
- const pm_token_t opening = not_provided(parser);
- return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
- }
+ case PM_CASE_OPERATOR:
+ return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
case PM_CASE_KEYWORD:
case PM_TOKEN_CONSTANT:
case PM_TOKEN_IDENTIFIER:
@@ -16407,14 +16041,11 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
parser_lex(parser);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16424,16 +16055,16 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
}
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_back_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ return UP(pm_global_variable_read_node_create(parser, &parser->previous));
default:
pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
}
@@ -16445,10 +16076,10 @@ static pm_node_t *
parse_variable(pm_parser_t *parser) {
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
- bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
- return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
+ return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
}
pm_scope_t *current_scope = parser->current_scope;
@@ -16467,13 +16098,13 @@ parse_variable(pm_parser_t *parser) {
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
}
- pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
- pm_node_list_append(&current_scope->implicit_parameters, node);
+ pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
+ pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
return node;
- } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
- pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
- pm_node_list_append(&current_scope->implicit_parameters, node);
+ } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+ pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
+ pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
return node;
}
@@ -16496,9 +16127,9 @@ parse_variable_call(pm_parser_t *parser) {
}
pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
- pm_node_flag_set((pm_node_t *)node, flags);
+ pm_node_flag_set(UP(node), flags);
- return (pm_node_t *) node;
+ return UP(node);
}
/**
@@ -16506,7 +16137,7 @@ parse_variable_call(pm_parser_t *parser) {
* parser. If it does not match a valid method definition name, then a missing
* token is returned.
*/
-static inline pm_token_t
+static PRISM_INLINE pm_token_t
parse_method_definition_name(pm_parser_t *parser) {
switch (parser->current.type) {
case PM_CASE_KEYWORD:
@@ -16515,7 +16146,7 @@ parse_method_definition_name(pm_parser_t *parser) {
parser_lex(parser);
return parser->previous;
case PM_TOKEN_IDENTIFIER:
- pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
parser_lex(parser);
return parser->previous;
case PM_CASE_OPERATOR:
@@ -16523,22 +16154,31 @@ parse_method_definition_name(pm_parser_t *parser) {
parser_lex(parser);
return parser->previous;
default:
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
- return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
+ return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
}
}
static void
-parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
- // Get a reference to the string struct that is being held by the string
- // node. This is the value we're going to actually manipulate.
- pm_string_ensure_owned(string);
+parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
+ // Make a writable copy in the arena if the string isn't already writable.
+ // We keep a mutable pointer to the arena memory so we can memmove into it
+ // below without casting away const from the string's source field.
+ uint8_t *writable;
+
+ if (string->type != PM_STRING_OWNED) {
+ size_t length = pm_string_length(string);
+ writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(string, (const char *) writable, length);
+ } else {
+ writable = (uint8_t *) string->source;
+ }
// Now get the bounds of the existing string. We'll use this as a
// destination to move bytes into. We'll also use it for bounds checking
// since we don't require that these strings be null terminated.
size_t dest_length = pm_string_length(string);
- const uint8_t *source_cursor = (uint8_t *) string->source;
+ const uint8_t *source_cursor = writable;
const uint8_t *source_end = source_cursor + dest_length;
// We're going to move bytes backward in the string when we get leading
@@ -16562,11 +16202,24 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
dest_length--;
}
- memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
+ memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
string->length = dest_length;
}
/**
+ * If we end up trimming all of the whitespace from a node and it isn't
+ * part of a line continuation, then we'll drop it from the list entirely.
+ */
+static PRISM_INLINE bool
+heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
+ if (string_node->unescaped.length == 0) {
+ const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
+ return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
+ }
+ return false;
+}
+
+/**
* Take a heredoc node that is indented by a ~ and trim the leading whitespace.
*/
static void
@@ -16576,8 +16229,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
bool dedent_next = true;
// Iterate over all nodes, and trim whitespace accordingly. We're going to
- // keep around two indices: a read and a write. If we end up trimming all of
- // the whitespace from a node, then we'll drop it from the list entirely.
+ // keep around two indices: a read and a write.
size_t write_index = 0;
pm_node_t *node;
@@ -16593,11 +16245,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
pm_string_node_t *string_node = ((pm_string_node_t *) node);
if (dedent_next) {
- parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
+ parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
}
- if (string_node->unescaped.length == 0) {
- pm_node_destroy(parser, node);
+ if (heredoc_dedent_discard_string_node(parser, string_node)) {
} else {
nodes->nodes[write_index++] = node;
}
@@ -16620,7 +16271,7 @@ parse_strings_empty_content(const uint8_t *location) {
/**
* Parse a set of strings that could be concatenated together.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
bool concating = false;
@@ -16647,16 +16298,14 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
pm_string_shared_init(&string->unescaped, content.start, content.end);
- node = (pm_node_t *) string;
+ node = UP(string);
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
// If we get here, then we have an end of a label immediately
// after a start. In that case we'll create an empty symbol
// node.
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
- node = (pm_node_t *) symbol;
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
+ node = UP(symbol);
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (!lex_interpolation) {
@@ -16667,7 +16316,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
if (match1(parser, PM_TOKEN_EOF)) {
unescaped = PM_STRING_EMPTY;
- content = not_provided(parser);
+ content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
} else {
unescaped = parser->current_string;
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
@@ -16687,34 +16336,30 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
// be able to contain all of the parts.
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_node_list_t parts = { 0 };
-
- pm_token_t delimiters = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
- pm_node_list_append(&parts, part);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+ pm_node_list_append(parser->arena, &parts, part);
do {
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
- pm_node_list_append(&parts, part);
+ part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ pm_node_list_append(parser->arena, &parts, part);
parser_lex(parser);
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-
- pm_node_list_free(&parts);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
} else {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ parser->previous.type = 0;
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
}
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string
@@ -16726,7 +16371,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
parser_lex(parser);
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
// Kind of odd behavior, but basically if we have an
@@ -16736,43 +16381,38 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
if (!accept1(parser, PM_TOKEN_STRING_END)) {
const uint8_t *location = parser->previous.end;
if (location > parser->start && location[-1] == '\n') location--;
- pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+ pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else {
// If we get here, then we have interpolation so we'll need
// to create a string or symbol node with interpolation.
pm_node_list_t parts = { 0 };
- pm_token_t string_opening = not_provided(parser);
- pm_token_t string_closing = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
}
-
- pm_node_list_free(&parts);
}
} else {
// If we get here, then the first part of the string is not plain
@@ -16783,22 +16423,20 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
}
-
- pm_node_list_free(&parts);
}
if (current == NULL) {
@@ -16823,15 +16461,17 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
// If we haven't already created our container for concatenation,
// we'll do that now.
if (!concating) {
- concating = true;
- pm_token_t bounds = not_provided(parser);
+ if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
+ }
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
- pm_interpolated_string_node_append(container, current);
- current = (pm_node_t *) container;
+ concating = true;
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, container, current);
+ current = UP(container);
}
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
}
}
@@ -16853,12 +16493,12 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
static void
parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
// Skip this capture if it starts with an underscore.
- if (*location->start == '_') return;
+ if (peek_at(parser, parser->start + location->start) == '_') return;
if (pm_constant_id_list_includes(captures, capture)) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+ pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
} else {
- pm_constant_id_list_append(captures, capture);
+ pm_constant_id_list_append(parser->arena, captures, capture);
}
}
@@ -16872,7 +16512,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If there is a [ or ( that follows, then this is part of a larger pattern
@@ -16893,7 +16533,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
}
closing = parser->previous;
@@ -16905,7 +16545,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
}
closing = parser->previous;
@@ -16914,7 +16554,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
- return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
+ return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
}
// Now that we have the inner pattern, check to see if it's an array, find,
@@ -16925,15 +16565,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_ARRAY_PATTERN_NODE: {
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16941,15 +16581,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_FIND_PATTERN_NODE: {
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16957,15 +16597,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_HASH_PATTERN_NODE: {
pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16978,8 +16618,8 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
// attaching its constant. In this case we'll create an array pattern and
// attach our constant to it.
pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
- pm_array_pattern_node_requireds_append(pattern_node, inner);
- return (pm_node_t *) pattern_node;
+ pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
+ return UP(pattern_node);
}
/**
@@ -16995,21 +16635,20 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
// will check for that here. If they do, then we'll add it to the local
// table since this pattern will cause it to become a local variable.
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
- pm_token_t identifier = parser->previous;
- pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
- pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
+ pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
- name = (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ name = UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&identifier),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
// Finally we can return the created node.
@@ -17028,7 +16667,7 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
- return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
}
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -17039,16 +16678,16 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
- value = (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ value = UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
- return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ return UP(pm_assoc_splat_node_create(parser, value, &operator));
}
/**
@@ -17085,22 +16724,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
static pm_node_t *
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
+ const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
+ const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
int depth = -1;
- if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
+ if (pm_slice_is_valid_local(parser, start, end)) {
depth = pm_parser_local_depth_constant_id(parser, constant_id);
} else {
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+ pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
- if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+ if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
+ PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
}
}
if (depth == -1) {
- pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
+ pm_parser_local_add(parser, constant_id, start, end, 0);
}
parse_pattern_capture(parser, captures, constant_id, value_loc);
@@ -17111,7 +16752,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
(uint32_t) (depth == -1 ? 0 : depth)
);
- return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
+ return UP(pm_implicit_node_create(parser, UP(target)));
}
/**
@@ -17120,7 +16761,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
*/
static void
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
+ if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
}
}
@@ -17139,25 +16780,31 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
case PM_NO_KEYWORDS_PARAMETER_NODE:
rest = first_node;
break;
+ case PM_INTERPOLATED_SYMBOL_NODE:
case PM_SYMBOL_NODE: {
- if (pm_symbol_node_label_p(first_node)) {
- parse_pattern_hash_key(parser, &keys, first_node);
+ if (pm_symbol_node_label_p(parser, first_node)) {
+ if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+ } else {
+ parse_pattern_hash_key(parser, &keys, first_node);
+ }
+
pm_node_t *value;
if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
- // Otherwise, we will create an implicit local variable
- // target for the value.
- value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+ if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+ } else {
+ value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
+ }
} else {
// Here we have a value for the first assoc in the list, so
// we will parse it now.
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
}
- pm_token_t operator = not_provided(parser);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
-
- pm_node_list_append(&assocs, assoc);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
+ pm_node_list_append(parser->arena, &assocs, assoc);
break;
}
}
@@ -17169,11 +16816,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
pm_parser_err_node(parser, first_node, diag_id);
- pm_token_t operator = not_provided(parser);
- pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
+ pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
break;
}
}
@@ -17197,7 +16843,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
rest = assoc;
} else {
pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
}
} else {
pm_node_t *key;
@@ -17207,36 +16853,43 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
- } else if (!pm_symbol_node_label_p(key)) {
+ } else if (!pm_symbol_node_label_p(parser, key)) {
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
}
+ } else if (accept1(parser, PM_TOKEN_LABEL)) {
+ key = UP(pm_symbol_node_label_create(parser, &parser->previous));
} else {
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
- key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+
+ pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
+ key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
}
parse_pattern_hash_key(parser, &keys, key);
pm_node_t *value = NULL;
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+ if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+ } else {
+ value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
+ }
} else {
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
}
- pm_token_t operator = not_provided(parser);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
if (rest != NULL) {
pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
}
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
}
}
pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
- xfree(assocs.nodes);
+ // assocs.nodes is arena-allocated; no explicit free needed.
pm_static_literals_free(&keys);
return node;
@@ -17258,13 +16911,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
- return (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ return UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
pm_token_t opening = parser->current;
@@ -17273,7 +16926,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
// If we have an empty array pattern, then we'll just return a new
// array pattern node.
- return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
+ return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
}
// Otherwise, we'll parse the inner pattern, then deal with it depending
@@ -17281,34 +16934,34 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
pm_token_t closing = parser->previous;
switch (PM_NODE_TYPE(inner)) {
case PM_ARRAY_PATTERN_NODE: {
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
- if (pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = opening.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
}
case PM_FIND_PATTERN_NODE: {
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
- if (pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = opening.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -17318,8 +16971,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
}
pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
- pm_array_pattern_node_requireds_append(node, inner);
- return (pm_node_t *) node;
+ pm_array_pattern_node_requireds_append(parser->arena, node, inner);
+ return UP(node);
}
case PM_TOKEN_BRACE_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17339,19 +16992,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
switch (parser->current.type) {
case PM_TOKEN_LABEL:
parser_lex(parser);
- first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+ first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
break;
case PM_TOKEN_USTAR_STAR:
first_node = parse_pattern_keyword_rest(parser, captures);
break;
case PM_TOKEN_STRING_BEGIN:
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
break;
default: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
parser_lex(parser);
- first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
break;
}
}
@@ -17359,18 +17012,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
pm_token_t closing = parser->previous;
- node->base.location.start = opening.start;
- node->base.location.end = closing.end;
+ PM_NODE_START_SET_TOKEN(parser, node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
- node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ node->opening_loc = TOK2LOC(parser, &opening);
+ node->closing_loc = TOK2LOC(parser, &closing);
}
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT: {
@@ -17381,21 +17034,27 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
// expression as the right side of the range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
default: {
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
}
}
case PM_CASE_PRIMITIVE: {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
// If we found a label, we need to immediately return to the caller.
- if (pm_symbol_node_label_p(node)) return node;
+ if (pm_symbol_node_label_p(parser, node)) return node;
+
+ // Call nodes (arithmetic operations) are not allowed in patterns
+ if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
+ pm_parser_err_node(parser, node, diag_id);
+ return UP(pm_error_recovery_node_create_unexpected(parser, node));
+ }
// Now that we have a primitive, we need to check if it's part of a range.
if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -17406,11 +17065,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
// node. Otherwise, we'll create an endless range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, node, &operator, right));
}
default:
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
+ return UP(pm_range_node_create(parser, node, &operator, NULL));
}
}
@@ -17425,44 +17084,44 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) parse_variable(parser);
+ pm_node_t *variable = UP(parse_variable(parser));
if (variable == NULL) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+ variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
}
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_PARENTHESIS_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17471,19 +17130,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_token_t lparen = parser->current;
parser_lex(parser);
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
- return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
+ return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
}
default: {
// If we get here, then we have a pin operator followed by something
// not understood. We'll create a missing node and return that.
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
- pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
}
}
@@ -17494,31 +17153,56 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
- return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
+ return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
}
case PM_TOKEN_CONSTANT: {
pm_token_t constant = parser->current;
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
}
default:
pm_parser_err_current(parser, diag_id);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+ }
+}
+
+static bool
+parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_LOCAL_VARIABLE_TARGET_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
+ return false;
+ }
+ default:
+ return true;
}
}
/**
+ * When we get here, we know that we already have a syntax error, because we
+ * know we have captured a variable and that we are in an alternation.
+ */
+static void
+parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, parse_pattern_alternation_error_each, parser);
+}
+
+/**
* Parse any number of primitives joined by alternation and ended optionally by
* assignment.
*/
static pm_node_t *
parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
pm_node_t *node = first_node;
+ bool alternation = false;
- while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
- pm_token_t operator = parser->previous;
+ while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
+ if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
+ parse_pattern_alternation_error(parser, node);
+ }
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER:
@@ -17530,41 +17214,47 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT:
case PM_CASE_PRIMITIVE: {
- if (node == NULL) {
+ if (!alternation) {
node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
} else {
+ pm_token_t operator = parser->previous;
pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
break;
}
case PM_TOKEN_PARENTHESIS_LEFT:
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+ pm_token_t operator = parser->previous;
pm_token_t opening = parser->current;
parser_lex(parser);
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
- pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+ pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
- if (node == NULL) {
+ if (!alternation) {
node = right;
} else {
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
break;
}
default: {
pm_parser_err_current(parser, diag_id);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
- if (node == NULL) {
+ if (!alternation) {
node = right;
} else {
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
}
break;
@@ -17585,15 +17275,15 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
);
- node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
+ node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
}
return node;
@@ -17612,8 +17302,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
switch (parser->current.type) {
case PM_TOKEN_LABEL: {
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
- node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+ node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17623,7 +17313,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
}
case PM_TOKEN_USTAR_STAR: {
node = parse_pattern_keyword_rest(parser, captures);
- node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17636,8 +17326,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// be dynamic symbols leading to hash patterns.
node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
- if (pm_symbol_node_label_p(node)) {
- node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ if (pm_symbol_node_label_p(parser, node)) {
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17652,7 +17342,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
case PM_TOKEN_USTAR: {
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
parser_lex(parser);
- node = (pm_node_t *) parse_pattern_rest(parser, captures);
+ node = UP(parse_pattern_rest(parser, captures));
leading_rest = true;
break;
}
@@ -17665,8 +17355,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// If we got a dynamic label symbol, then we need to treat it like the
// beginning of a hash pattern.
- if (pm_symbol_node_label_p(node)) {
- return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ if (pm_symbol_node_label_p(parser, node)) {
+ return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
}
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
@@ -17674,20 +17364,20 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// or a find pattern. We need to parse all of the patterns, put them
// into a big list, and then determine which type of node we have.
pm_node_list_t nodes = { 0 };
- pm_node_list_append(&nodes, node);
+ pm_node_list_append(parser->arena, &nodes, node);
// Gather up all of the patterns into the list.
while (accept1(parser, PM_TOKEN_COMMA)) {
// Break early here in case we have a trailing comma.
- if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
- node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
- pm_node_list_append(&nodes, node);
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
+ node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+ pm_node_list_append(parser->arena, &nodes, node);
trailing_rest = true;
break;
}
if (accept1(parser, PM_TOKEN_USTAR)) {
- node = (pm_node_t *) parse_pattern_rest(parser, captures);
+ node = UP(parse_pattern_rest(parser, captures));
// If we have already parsed a splat pattern, then this is an
// error. We will continue to parse the rest of the patterns,
@@ -17701,7 +17391,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
}
- pm_node_list_append(&nodes, node);
+ pm_node_list_append(parser->arena, &nodes, node);
}
// If the first pattern and the last pattern are rest patterns, then we
@@ -17709,24 +17399,24 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// are in between because we know we already added the appropriate
// errors. Otherwise we will create an array pattern.
if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
- node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
+ node = UP(pm_find_pattern_node_create(parser, &nodes));
if (nodes.size == 2) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
}
} else {
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
+ node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
if (leading_rest && trailing_rest) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
}
}
- xfree(nodes.nodes);
+ // nodes.nodes is arena-allocated; no explicit free needed.
} else if (leading_rest) {
// Otherwise, if we parsed a single splat pattern, then we know we have
// an array pattern, so we can go ahead and create that node.
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
+ node = UP(pm_array_pattern_node_rest_create(parser, node));
}
return node;
@@ -17737,29 +17427,33 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
* from its start bounds. If it's a compound node, then we will recursively
* apply this function to its value.
*/
-static inline void
+static PRISM_INLINE void
parse_negative_numeric(pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE: {
pm_integer_node_t *cast = (pm_integer_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->value.negative = true;
break;
}
case PM_FLOAT_NODE: {
pm_float_node_t *cast = (pm_float_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->value = -cast->value;
break;
}
case PM_RATIONAL_NODE: {
pm_rational_node_t *cast = (pm_rational_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->numerator.negative = true;
break;
}
case PM_IMAGINARY_NODE:
node->location.start--;
+ node->location.length++;
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
break;
default:
@@ -17777,22 +17471,22 @@ static void
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
switch (diag_id) {
case PM_ERR_HASH_KEY: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
break;
}
case PM_ERR_HASH_VALUE:
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
break;
}
case PM_ERR_UNARY_RECEIVER: {
- const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
break;
}
case PM_ERR_UNARY_DISALLOWED:
case PM_ERR_EXPECT_ARGUMENT: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
break;
}
default:
@@ -17872,6 +17566,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_BEGIN:
case PM_CONTEXT_BLOCK_BRACES:
case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_CASE_IN:
case PM_CONTEXT_CASE_WHEN:
case PM_CONTEXT_DEFAULT_PARAMS:
@@ -17952,6 +17647,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_ELSE:
case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_BLOCK_RESCUE:
case PM_CONTEXT_CASE_IN:
case PM_CONTEXT_CASE_WHEN:
@@ -17988,67 +17684,1383 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
}
/**
- * This struct is used to pass information between the regular expression parser
- * and the error callback.
+ * Determine if a given call node looks like a "command", which means it has
+ * arguments but does not have parentheses.
*/
-typedef struct {
- /** The parser that we are parsing the regular expression for. */
- pm_parser_t *parser;
+static PRISM_INLINE bool
+pm_call_node_command_p(const pm_call_node_t *node) {
+ return (
+ (node->opening_loc.length == 0) &&
+ (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
+ (node->arguments != NULL || node->block != NULL)
+ );
+}
- /** The start of the regular expression. */
- const uint8_t *start;
+/**
+ * Returns true if the given node is a command-style call (a method call without
+ * parentheses that has arguments), excluding operator calls (e.g., a + b) which
+ * satisfy the same structural criteria but are not commands.
+ */
+static bool
+pm_command_call_value_p(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_CALL_NODE: {
+ const pm_call_node_t *call = (const pm_call_node_t *) node;
- /** The end of the regular expression. */
- const uint8_t *end;
+ // Command-style calls (e.g., foo bar, obj.foo bar).
+ // Attribute writes (e.g., a.b = 1) are not commands.
+ if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
+ return true;
+ }
- /**
- * Whether or not the source of the regular expression is shared. This
- * impacts the location of error messages, because if it is shared then we
- * can use the location directly and if it is not, then we use the bounds of
- * the regular expression itself.
- */
- bool shared;
-} parse_regular_expression_error_data_t;
+ // A `!` or `not` prefix wrapping a command call (e.g.,
+ // `!foo bar`, `not foo bar`) is also a command-call value.
+ if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
+ return pm_command_call_value_p(call->receiver);
+ }
+
+ return false;
+ }
+ case PM_SUPER_NODE: {
+ const pm_super_node_t *cast = (const pm_super_node_t *) node;
+ return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
+ }
+ case PM_YIELD_NODE: {
+ const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
+ return cast->lparen_loc.length == 0 && cast->arguments != NULL;
+ }
+ case PM_RESCUE_MODIFIER_NODE:
+ return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
+ case PM_DEF_NODE: {
+ const pm_def_node_t *cast = (const pm_def_node_t *) node;
+ if (cast->equal_loc.length > 0 && cast->body != NULL) {
+ const pm_node_t *body = cast->body;
+ if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
+ body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
+ }
+ return pm_command_call_value_p(body);
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
/**
- * This callback is called when the regular expression parser encounters a
- * syntax error.
+ * Returns true if the given node is a block call: a command
+ * with a do-block, or any call chained (via `.`, `::`, `&.`) from such a node.
+ * Block calls can only be followed by call chaining, composition (and/or), and
+ * modifier operators.
*/
-static void
-parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
- parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
- pm_location_t location;
+static bool
+pm_block_call_p(const pm_node_t *node) {
+ while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
+ const pm_call_node_t *call = (const pm_call_node_t *) node;
+ if (call->opening_loc.length > 0) return false;
+
+ // Root: command with do-block (e.g., `foo bar do end`).
+ if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
+ return true;
+ }
+
+ // Walk up the receiver chain (e.g., `foo bar do end.baz`).
+ if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
+ node = call->receiver;
+ continue;
+ }
+
+ return false;
+ }
+
+ return false;
+}
+
+/**
+ * Parse a case expression (the `case` keyword). This handles both case-when and
+ * case-in (pattern matching) forms.
+ */
+static pm_node_t *
+parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+
+ pm_token_t case_keyword = parser->previous;
+ pm_node_t *predicate = NULL;
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ predicate = NULL;
+ } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
+ predicate = NULL;
+ } else if (!token_begins_expression_p(parser->current.type)) {
+ predicate = NULL;
+ } else {
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ }
+
+ if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ parser_lex(parser);
+ pop_block_exits(parser, previous_block_exits);
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
+ }
+
+ /* At this point we can create a case node, though we don't yet know if it
+ * is a case-in or case-when node. */
+ pm_node_t *node;
+
+ if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
+ pm_static_literals_t literals = { 0 };
+
+ /* At this point we've seen a when keyword, so we know this is a
+ * case-when node. We will continue to parse the when nodes until we hit
+ * the end of the list. */
+ while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+ parser_lex(parser);
+
+ pm_token_t when_keyword = parser->previous;
+ pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
+
+ do {
+ if (accept1(parser, PM_TOKEN_USTAR)) {
+ pm_token_t operator = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+
+ pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
+ pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
+
+ if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
+ } else {
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
+ pm_when_node_conditions_append(parser->arena, when_node, condition);
+
+ /* If we found a missing node, then this is a syntax error
+ * and we should stop looping. */
+ if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
+
+ /* If this is a string node, then we need to mark it as
+ * frozen because when clause strings are frozen. */
+ if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+ pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ pm_when_clause_static_literals_add(parser, &literals, condition);
+ }
+ } while (accept1(parser, PM_TOKEN_COMMA));
+
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+ }
+ } else {
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+ pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+ }
+
+ if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
+ if (statements != NULL) {
+ pm_when_node_statements_set(when_node, statements);
+ }
+ }
+
+ pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
+ }
+
+ /* If we didn't parse any conditions (in or when) then we need to
+ * indicate that we have an error. */
+ if (case_node->conditions.size == 0) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ }
+
+ pm_static_literals_free(&literals);
+ node = UP(case_node);
+ } else {
+ pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
+
+ /* If this is a case-match node (i.e., it is a pattern matching case
+ * statement) then we must have a predicate. */
+ if (predicate == NULL) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
+ }
+
+ /* At this point we expect that we're parsing a case-in node. We will
+ * continue to parse the in nodes until we hit the end of the list. */
+ while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
+ bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+ parser->pattern_matching_newlines = true;
+
+ lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+ parser->command_start = false;
+ parser_lex(parser);
+
+ pm_token_t in_keyword = parser->previous;
+
+ pm_constant_id_list_t captures = { 0 };
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+ parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+
+ /* Since we're in the top-level of the case-in node we need to
+ * check for guard clauses in the form of `if` or `unless`
+ * statements. */
+ if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
+ pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
+ pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
+ }
+
+ /* Now we need to check for the terminator of the in node's pattern.
+ * It can be a newline or semicolon optionally followed by a `then`
+ * keyword. */
+ pm_token_t then_keyword = { 0 };
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ then_keyword = parser->previous;
+ }
+ } else {
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
+ then_keyword = parser->previous;
+ }
+
+ /* Now we can actually parse the statements associated with the in
+ * node. */
+ pm_statements_node_t *statements;
+ if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ statements = NULL;
+ } else {
+ statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
+ }
+
+ /* Now that we have the full pattern and statements, we can create
+ * the node and attach it to the case node. */
+ pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
+ pm_case_match_node_condition_append(parser->arena, case_node, condition);
+ }
+
+ /* If we didn't parse any conditions (in or when) then we need to
+ * indicate that we have an error. */
+ if (case_node->conditions.size == 0) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ }
+
+ node = UP(case_node);
+ }
+
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+ pm_token_t else_keyword = parser->previous;
+ pm_else_node_t *else_node;
+
+ if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+ else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
+ } else {
+ else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
+ }
- if (callback_data->shared) {
- location = (pm_location_t) { .start = start, .end = end };
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+ pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
+ } else {
+ pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
+ }
+ }
+
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+ pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
} else {
- location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+ pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
}
- PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+ pop_block_exits(parser, previous_block_exits);
+ return node;
}
/**
- * Parse the errors for the regular expression and add them to the parser.
+ * Parse a class definition expression (the `class` keyword). This handles both
+ * regular class definitions and singleton class definitions (`class << expr`).
*/
-static void
-parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
- const pm_string_t *unescaped = &node->unescaped;
- parse_regular_expression_error_data_t error_data = {
- .parser = parser,
- .start = node->base.location.start,
- .end = node->base.location.end,
- .shared = unescaped->type == PM_STRING_SHARED
- };
+static pm_node_t *
+parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+
+ pm_token_t class_keyword = parser->previous;
+ pm_do_loop_stack_push(parser, false);
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ if (accept1(parser, PM_TOKEN_LESS_LESS)) {
+ pm_token_t operator = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
+
+ pm_parser_scope_push(parser, true);
+ if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
+ }
+
+ pm_node_t *statements = NULL;
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+ }
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ flush_block_exits(parser, previous_block_exits);
+ return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
+ }
+
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
+ pm_token_t name = parser->previous;
+ if (name.type != PM_TOKEN_CONSTANT) {
+ pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
+ }
+
+ pm_token_t inheritance_operator = { 0 };
+ pm_node_t *superclass;
+
+ if (match1(parser, PM_TOKEN_LESS)) {
+ inheritance_operator = parser->current;
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+
+ parser->command_start = true;
+ parser_lex(parser);
+
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
+ } else {
+ superclass = NULL;
+ }
+
+ pm_parser_scope_push(parser, true);
+
+ if (inheritance_operator.start != NULL) {
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
+ } else {
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ }
+ pm_node_t *statements = NULL;
- pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+ }
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+ if (context_def_p(parser)) {
+ pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
+ }
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
+ pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
+ if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+ }
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+ return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
+}
+
+/**
+ * Parse a method definition expression (the `def` keyword).
+ */
+static pm_node_t *
+parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ pm_token_t def_keyword = parser->current;
+ size_t opening_newline_index = token_newline_index(parser);
+
+ pm_node_t *receiver = NULL;
+ pm_token_t operator = { 0 };
+ pm_token_t name;
+
+ /* This context is necessary for lexing `...` in a bare params correctly. It
+ * must be pushed before lexing the first param, so it is here. */
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
+ parser_lex(parser);
+
+ /* This will be false if the method name is not a valid identifier but could
+ * be followed by an operator. */
+ bool valid_name = true;
+
+ switch (parser->current.type) {
+ case PM_CASE_OPERATOR:
+ pm_parser_scope_push(parser, true);
+ lex_state_set(parser, PM_LEX_STATE_ENDFN);
+ parser_lex(parser);
+
+ name = parser->previous;
+ break;
+ case PM_TOKEN_IDENTIFIER: {
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+ receiver = parse_variable_call(parser);
+
+ pm_parser_scope_push(parser, true);
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ parser_lex(parser);
+
+ operator = parser->previous;
+ name = parse_method_definition_name(parser);
+ } else {
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
+ pm_parser_scope_push(parser, true);
+
+ name = parser->previous;
+ }
+
+ break;
+ }
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ case PM_TOKEN_CLASS_VARIABLE:
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ valid_name = false;
+ PRISM_FALLTHROUGH
+ case PM_TOKEN_CONSTANT:
+ case PM_TOKEN_KEYWORD_NIL:
+ case PM_TOKEN_KEYWORD_SELF:
+ case PM_TOKEN_KEYWORD_TRUE:
+ case PM_TOKEN_KEYWORD_FALSE:
+ case PM_TOKEN_KEYWORD___FILE__:
+ case PM_TOKEN_KEYWORD___LINE__:
+ case PM_TOKEN_KEYWORD___ENCODING__: {
+ pm_parser_scope_push(parser, true);
+ parser_lex(parser);
+
+ pm_token_t identifier = parser->previous;
+
+ if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ parser_lex(parser);
+ operator = parser->previous;
+
+ switch (identifier.type) {
+ case PM_TOKEN_CONSTANT:
+ receiver = UP(pm_constant_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_CLASS_VARIABLE:
+ receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_NIL:
+ receiver = UP(pm_nil_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_SELF:
+ receiver = UP(pm_self_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_TRUE:
+ receiver = UP(pm_true_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_FALSE:
+ receiver = UP(pm_false_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___FILE__:
+ receiver = UP(pm_source_file_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___LINE__:
+ receiver = UP(pm_source_line_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___ENCODING__:
+ receiver = UP(pm_source_encoding_node_create(parser, &identifier));
+ break;
+ default:
+ break;
+ }
+
+ name = parse_method_definition_name(parser);
+ } else {
+ if (!valid_name) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
+ }
+
+ name = identifier;
+ }
+ break;
+ }
+ case PM_TOKEN_PARENTHESIS_LEFT: {
+ /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
+ * expression of this parenthesis should not be processed under this
+ * context. Thus, the context is popped here. */
+ context_pop(parser);
+ parser_lex(parser);
+
+ pm_token_t lparen = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
+
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ pm_token_t rparen = parser->previous;
+
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
+
+ operator = parser->previous;
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
+
+ /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
+ * described the above. */
+ pm_parser_scope_push(parser, true);
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
+ name = parse_method_definition_name(parser);
+ break;
+ }
+ default:
+ pm_parser_scope_push(parser, true);
+ name = parse_method_definition_name(parser);
+ break;
+ }
+
+ pm_token_t lparen = { 0 };
+ pm_token_t rparen = { 0 };
+ pm_parameters_node_t *params;
+
+ bool accept_endless_def = true;
+ switch (parser->current.type) {
+ case PM_TOKEN_PARENTHESIS_LEFT: {
+ parser_lex(parser);
+ lparen = parser->previous;
+
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ params = NULL;
+ } else {
+ /* https://bugs.ruby-lang.org/issues/19107 */
+ bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
+ params = parse_parameters(
+ parser,
+ PM_BINDING_POWER_DEFINED,
+ true,
+ allow_trailing_comma,
+ true,
+ true,
+ false,
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+ (uint16_t) (depth + 1)
+ );
+ }
+
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->command_start = true;
+
+ context_pop(parser);
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
+ }
+
+ rparen = parser->previous;
+ break;
+ }
+ case PM_CASE_PARAMETER: {
+ /* If we're about to lex a label, we need to add the label state to
+ * make sure the next newline is ignored. */
+ if (parser->current.type == PM_TOKEN_LABEL) {
+ lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
+ }
+
+ params = parse_parameters(
+ parser,
+ PM_BINDING_POWER_DEFINED,
+ false,
+ false,
+ true,
+ true,
+ false,
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+ (uint16_t) (depth + 1)
+ );
+
+ /* Reject `def * = 1` and similar. We have to specifically check for
+ * them because they create ambiguity with optional arguments. */
+ accept_endless_def = false;
+
+ context_pop(parser);
+ break;
+ }
+ default: {
+ params = NULL;
+ context_pop(parser);
+ break;
+ }
+ }
+
+ pm_node_t *statements = NULL;
+ pm_token_t equal = { 0 };
+ pm_token_t end_keyword = { 0 };
+
+ if (accept1(parser, PM_TOKEN_EQUAL)) {
+ if (token_is_setter_name(&name)) {
+ pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
+ }
+ if (!accept_endless_def) {
+ pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+ }
+ if (
+ parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+ parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+ ) {
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+ }
+ equal = parser->previous;
+
+ context_push(parser, PM_CONTEXT_DEF);
+ pm_do_loop_stack_push(parser, false);
+ statements = UP(pm_statements_node_create(parser));
+
+ uint8_t allow_flags;
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
+ } else {
+ /* Allow `def foo = puts "Hello"` but not
+ * `private def foo = puts "Hello"` */
+ allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
+ }
+
+ /* Inside a def body, we push true onto the accepts_block_stack so that
+ * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
+ * for primary-level constructs, not commands). During command argument
+ * parsing, the stack is pushed to false, causing `do` to be lexed as
+ * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
+ * def body and instead left for the outer context. */
+ pm_accepts_block_stack_push(parser, true);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
+ pm_accepts_block_stack_pop(parser);
+
+ /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
+ * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
+ * intentionally not caught here — it should bubble up to the outer
+ * context (e.g., `private def f = puts "Hello" do end` where the block
+ * attaches to `private`). */
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+ pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
+ pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
+ }
+
+ if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+ pm_token_t rescue_keyword = parser->previous;
+
+ /* In the Ruby grammar, the rescue value of an endless method
+ * command excludes and/or and in/=>. */
+ pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
+ }
+
+ /* A nested endless def whose body is a command call (e.g.,
+ * `def f = def g = foo bar`) is a command assignment and cannot appear
+ * as a def body. */
+ if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
+ PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+
+ pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
+ pm_do_loop_stack_pop(parser);
+ context_pop(parser);
+ } else {
+ if (lparen.start == NULL) {
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->command_start = true;
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
+ } else {
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ }
+
+ pm_accepts_block_stack_push(parser, true);
+ pm_do_loop_stack_push(parser, false);
+
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
+ }
+
+ pm_accepts_block_stack_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
+ end_keyword = parser->previous;
+ }
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+ pm_parser_scope_pop(parser);
+
+ /* If the final character is `@` as is the case when defining methods to
+ * override the unary operators, we should ignore the @ in the same way we
+ * do for symbols. */
+ pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
+
+ flush_block_exits(parser, previous_block_exits);
+
+ return UP(pm_def_node_create(
+ parser,
+ name_id,
+ &name,
+ receiver,
+ params,
+ statements,
+ &locals,
+ &def_keyword,
+ NTOK2PTR(operator),
+ NTOK2PTR(lparen),
+ NTOK2PTR(rparen),
+ NTOK2PTR(equal),
+ NTOK2PTR(end_keyword)
+ ));
+}
+
+/**
+ * Parse a module definition expression (the `module` keyword).
+ */
+static pm_node_t *
+parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+ pm_token_t module_keyword = parser->previous;
+
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
+ pm_token_t name;
+
+ /* If we can recover from a syntax error that occurred while parsing the
+ * name of the module, then we'll handle that here. */
+ if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ pop_block_exits(parser, previous_block_exits);
+
+ pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
+ }
+
+ while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+ pm_token_t double_colon = parser->previous;
+
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
+ }
+
+ /* Here we retrieve the name of the module. If it wasn't a constant, then
+ * it's possible that `module foo` was passed, which is a syntax error. We
+ * handle that here as well. */
+ name = parser->previous;
+ if (name.type != PM_TOKEN_CONSTANT) {
+ pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
+ }
+
+ if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+ }
+
+ pm_parser_scope_push(parser, true);
+ accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
+ pm_node_t *statements = NULL;
+
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
+ }
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
+
+ if (context_def_p(parser)) {
+ pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+
+ return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
+}
+
+/**
+ * Parse an interpolated word array literal (`%W[...]`).
+ */
+static pm_node_t *
+parse_string_array(pm_parser_t *parser, uint16_t depth) {
+ parser_lex(parser);
+ pm_token_t opening = parser->previous;
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+ /* This is the current node that we are parsing that will be added to the
+ * list of elements. */
+ pm_node_t *current = NULL;
+
+ while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ switch (parser->current.type) {
+ case PM_TOKEN_WORDS_SEP: {
+ /* Reset the explicit encoding if we hit a separator since each
+ * element can have its own encoding. */
+ parser->explicit_encoding = NULL;
+
+ if (current == NULL) {
+ /* If we hit a separator before we have any content, then we
+ * don't need to do anything. */
+ } else {
+ /* If we hit a separator after we've hit content, then we
+ * need to append that content to the list and reset the
+ * current node. */
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ }
+
+ parser_lex(parser);
+ break;
+ }
+ case PM_TOKEN_STRING_CONTENT: {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ pm_node_flag_set(string, parse_unescaped_encoding(parser));
+ parser_lex(parser);
+
+ if (current == NULL) {
+ /* If we hit content and the current node is NULL, then this
+ * is the first string content we've seen. In that case
+ * we're going to create a new string node and set that to
+ * the current. */
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ /* If we hit string content and the current node is an
+ * interpolated string, then we need to append the string
+ * content to the list of child nodes. */
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit string content and the current node is a string
+ * node, then we need to convert the current node into an
+ * interpolated string and add the string content to the
+ * list of child nodes. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(parser, interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+
+ break;
+ }
+ case PM_TOKEN_EMBVAR: {
+ if (current == NULL) {
+ /* If we hit an embedded variable and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit an embedded variable and the current node is a
+ * string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ current = UP(interpolated);
+ } else {
+ /* If we hit an embedded variable and the current node is an
+ * interpolated string, then we'll just add the embedded
+ * variable. */
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ break;
+ }
+ case PM_TOKEN_EMBEXPR_BEGIN: {
+ if (current == NULL) {
+ /* If we hit an embedded expression and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * a string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ current = UP(interpolated);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * an interpolated string, then we'll just continue on. */
+ } else {
+ assert(false && "unreachable");
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ break;
+ }
+ default:
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
+ parser_lex(parser);
+ break;
+ }
+ }
+
+ /* If we have a current node, then we need to append it to the list. */
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ }
+
+ pm_token_t closing = parser->current;
+ if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
+ }
+
+ pm_array_node_close_set(parser, array, &closing);
+ return UP(array);
+}
+
+/**
+ * Parse an interpolated symbol array literal (`%I[...]`).
+ */
+static pm_node_t *
+parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
+ parser_lex(parser);
+ pm_token_t opening = parser->previous;
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+ /* This is the current node that we are parsing that will be added to the
+ * list of elements. */
+ pm_node_t *current = NULL;
+
+ while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ switch (parser->current.type) {
+ case PM_TOKEN_WORDS_SEP: {
+ if (current == NULL) {
+ /* If we hit a separator before we have any content, then we
+ * don't need to do anything. */
+ } else {
+ /* If we hit a separator after we've hit content, then we
+ * need to append that content to the list and reset the
+ * current node. */
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ }
+
+ parser_lex(parser);
+ break;
+ }
+ case PM_TOKEN_STRING_CONTENT: {
+ if (current == NULL) {
+ /* If we hit content and the current node is NULL, then this
+ * is the first string content we've seen. In that case
+ * we're going to create a new string node and set that to
+ * the current. */
+ current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ /* If we hit string content and the current node is an
+ * interpolated string, then we need to append the string
+ * content to the list of child nodes. */
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit string content and the current node is a symbol
+ * node, then we need to convert the current node into an
+ * interpolated string and add the string content to the
+ * list of child nodes. */
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t content = {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->start + cast->value_loc.start,
+ .end = parser->start + cast->value_loc.start + cast->value_loc.length
+ };
+
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
+
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+
+ break;
+ }
+ case PM_TOKEN_EMBVAR: {
+ bool start_location_set = false;
+ if (current == NULL) {
+ /* If we hit an embedded variable and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit an embedded variable and the current node is a
+ * string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+ PM_NODE_START_SET_NODE(interpolated, current);
+ start_location_set = true;
+ current = UP(interpolated);
+ } else {
+ /* If we hit an embedded variable and the current node is an
+ * interpolated string, then we'll just add the embedded
+ * variable. */
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+ if (!start_location_set) {
+ PM_NODE_START_SET_NODE(current, part);
+ }
+ break;
+ }
+ case PM_TOKEN_EMBEXPR_BEGIN: {
+ bool start_location_set = false;
+ if (current == NULL) {
+ /* If we hit an embedded expression and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * a string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+ PM_NODE_START_SET_NODE(interpolated, current);
+ start_location_set = true;
+ current = UP(interpolated);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * an interpolated string, then we'll just continue on. */
+ } else {
+ assert(false && "unreachable");
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+ if (!start_location_set) {
+ PM_NODE_START_SET_NODE(current, part);
+ }
+ break;
+ }
+ default:
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
+ parser_lex(parser);
+ break;
+ }
+ }
+
+ /* If we have a current node, then we need to append it to the list. */
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ }
+
+ pm_token_t closing = parser->current;
+ if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+ }
+ pm_array_node_close_set(parser, array, &closing);
+
+ return UP(array);
+}
+
+/**
+ * Parse a parenthesized expression, which could be a grouping, a multi-target
+ * assignment, or a set of statements.
+ */
+static pm_node_t *
+parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
+ pm_token_t opening = parser->current;
+ pm_node_flags_t paren_flags = 0;
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ parser_lex(parser);
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
+
+ /* If this is the end of the file or we match a right parenthesis, then we
+ * have an empty parentheses node, and we can immediately return. */
+ if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ pop_block_exits(parser, previous_block_exits);
+ return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
+ }
+
+ /* Otherwise, we're going to parse the first statement in the list of
+ * statements within the parentheses. */
+ pm_accepts_block_stack_push(parser, true);
+ context_push(parser, PM_CONTEXT_PARENS);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ /* Determine if this statement is followed by a terminator. In the case of a
+ * single statement, this is fine. But in the case of multiple statements
+ * it's required. */
+ bool terminator_found = false;
+
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ terminator_found = true;
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+ terminator_found = true;
+ }
+
+ if (terminator_found) {
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
+ }
+
+ /* If we hit a right parenthesis, then we're done parsing the parentheses
+ * node, and we can check which kind of node we should return. */
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
+ lex_state_set(parser, PM_LEX_STATE_ENDARG);
+ }
+
+ parser_lex(parser);
+ pm_accepts_block_stack_pop(parser);
+ pop_block_exits(parser, previous_block_exits);
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+ /* If we have a single statement and are ending on a right
+ * parenthesis, then we need to check if this is possibly a multiple
+ * target node. */
+ pm_multi_target_node_t *multi_target;
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
+ multi_target = (pm_multi_target_node_t *) statement;
+ } else {
+ multi_target = pm_multi_target_node_create(parser);
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
+ }
+
+ multi_target->lparen_loc = TOK2LOC(parser, &opening);
+ multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
+ PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
+
+ pm_node_t *result;
+ if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+ result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ accept1(parser, PM_TOKEN_NEWLINE);
+ } else {
+ result = UP(multi_target);
+ }
+
+ if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+ /* All set, this is explicitly allowed by the parent context. */
+ } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ /* All set, we're inside a for loop and we're parsing multiple
+ * targets. */
+ } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+ /* Multi targets are not allowed when it's not a statement
+ * level. */
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ /* Multi targets must be followed by an equal sign in order to
+ * be valid (or a right parenthesis if they are nested). */
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+
+ return result;
+ }
+
+ /* If we have a single statement and are ending on a right parenthesis
+ * and we didn't return a multiple assignment node, then we can return a
+ * regular parentheses node now. */
+ pm_statements_node_t *statements = pm_statements_node_create(parser);
+ pm_statements_node_body_append(parser, statements, statement, true);
+
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
+ }
+
+ /* If we have more than one statement in the set of parentheses, then we are
+ * going to parse all of them as a list of statements. We'll do that here.
+ */
+ context_push(parser, PM_CONTEXT_PARENS);
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
+ pm_statements_node_t *statements = pm_statements_node_create(parser);
+ pm_statements_node_body_append(parser, statements, statement, true);
+
+ /* If we didn't find a terminator and we didn't find a right parenthesis,
+ * then this is a syntax error. */
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+
+ /* Parse each statement within the parentheses. */
+ while (true) {
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_statements_node_body_append(parser, statements, node, true);
+
+ /* If we're recovering from a syntax error, then we need to stop parsing
+ * the statements now. */
+ if (parser->recovering) {
+ /* If this is the level of context where the recovery has happened,
+ * then we can mark the parser as done recovering. */
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
+ break;
+ }
+
+ /* If we couldn't parse an expression at all, then we need to bail out
+ * of the loop. */
+ if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
+
+ /* If we successfully parsed a statement, then we are going to need a
+ * terminator to delimit them. */
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
+ } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ break;
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
+ /* If we're at the end of the file, then we're going to add an error
+ * after this for the ) anyway. */
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+ }
+
+ context_pop(parser);
+ pm_accepts_block_stack_pop(parser);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+ /* When we're parsing multi targets, we allow them to be followed by a right
+ * parenthesis if they are at the statement level. This is only possible if
+ * they are the final statement in a parentheses. We need to explicitly
+ * reject that here. */
+ {
+ pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+ if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+ statement = UP(multi_target);
+ statements->body.nodes[statements->body.size - 1] = statement;
+ }
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+ const uint8_t *offset = parser->start + PM_NODE_END(statement);
+ pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+ pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
+
+ statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+ statements->body.nodes[statements->body.size - 1] = statement;
+
+ pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+ pm_void_statements_check(parser, statements, true);
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
}
/**
* Parse an expression that begins with the previous node that we just lexed.
*/
-static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
switch (parser->current.type) {
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
@@ -18077,11 +19089,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
} else {
// If there was no comma, then we need to add a syntax
// error.
- const uint8_t *location = parser->previous.end;
- PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
-
- parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
}
}
@@ -18099,28 +19109,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
pm_parser_scope_forwarding_positionals_check(parser, &operator);
} else {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ element = UP(pm_splat_node_create(parser, &operator, expression));
} else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
if (parsed_bare_hash) {
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
- element = (pm_node_t *) pm_keyword_hash_node_create(parser);
+ element = UP(pm_keyword_hash_node_create(parser));
pm_static_literals_t hash_keys = { 0 };
- if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
} else {
- element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
+ element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
- if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+ if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
@@ -18129,18 +19139,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_static_literals_t hash_keys = { 0 };
pm_hash_key_static_literals_add(parser, &hash_keys, element);
- pm_token_t operator;
+ pm_token_t operator = { 0 };
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
- } else {
- operator = not_provided(parser);
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
- pm_keyword_hash_node_elements_append(hash, assoc);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
+ pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
- element = (pm_node_t *) hash;
+ element = UP(hash);
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
@@ -18150,213 +19158,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
}
- pm_array_node_elements_append(array, element);
- if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+ pm_array_node_elements_append(parser->arena, array, element);
+ if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
}
accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
- pm_array_node_close_set(array, &parser->previous);
+ pm_array_node_close_set(parser, array, &parser->previous);
pm_accepts_block_stack_pop(parser);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PARENTHESIS_LEFT:
- case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
- pm_token_t opening = parser->current;
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- parser_lex(parser);
- while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
-
- // If this is the end of the file or we match a right parenthesis, then
- // we have an empty parentheses node, and we can immediately return.
- if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
- }
-
- // Otherwise, we're going to parse the first statement in the list
- // of statements within the parentheses.
- pm_accepts_block_stack_push(parser, true);
- context_push(parser, PM_CONTEXT_PARENS);
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
- context_pop(parser);
-
- // Determine if this statement is followed by a terminator. In the
- // case of a single statement, this is fine. But in the case of
- // multiple statements it's required.
- bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- if (terminator_found) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- }
-
- // If we hit a right parenthesis, then we're done parsing the
- // parentheses node, and we can check which kind of node we should
- // return.
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
- lex_state_set(parser, PM_LEX_STATE_ENDARG);
- }
-
- parser_lex(parser);
- pm_accepts_block_stack_pop(parser);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
- // If we have a single statement and are ending on a right
- // parenthesis, then we need to check if this is possibly a
- // multiple target node.
- pm_multi_target_node_t *multi_target;
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
- multi_target = (pm_multi_target_node_t *) statement;
- } else {
- multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, statement);
- }
-
- pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
-
- multi_target->lparen_loc = lparen_loc;
- multi_target->rparen_loc = rparen_loc;
- multi_target->base.location.start = lparen_loc.start;
- multi_target->base.location.end = rparen_loc.end;
-
- pm_node_t *result;
- if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
- result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
- accept1(parser, PM_TOKEN_NEWLINE);
- } else {
- result = (pm_node_t *) multi_target;
- }
-
- if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
- // All set, this is explicitly allowed by the parent
- // context.
- } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
- // All set, we're inside a for loop and we're parsing
- // multiple targets.
- } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
- // Multi targets are not allowed when it's not a
- // statement level.
- pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
- } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
- // Multi targets must be followed by an equal sign in
- // order to be valid (or a right parenthesis if they are
- // nested).
- pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
- }
-
- return result;
- }
-
- // If we have a single statement and are ending on a right parenthesis
- // and we didn't return a multiple assignment node, then we can return a
- // regular parentheses node now.
- pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, statements, statement, true);
-
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
- }
-
- // If we have more than one statement in the set of parentheses,
- // then we are going to parse all of them as a list of statements.
- // We'll do that here.
- context_push(parser, PM_CONTEXT_PARENS);
- pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, statements, statement, true);
-
- // If we didn't find a terminator and we didn't find a right
- // parenthesis, then this is a syntax error.
- if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
- }
-
- // Parse each statement within the parentheses.
- while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
- pm_statements_node_body_append(parser, statements, node, true);
-
- // If we're recovering from a syntax error, then we need to stop
- // parsing the statements now.
- if (parser->recovering) {
- // If this is the level of context where the recovery has
- // happened, then we can mark the parser as done recovering.
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
- break;
- }
-
- // If we couldn't parse an expression at all, then we need to
- // bail out of the loop.
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
-
- // If we successfully parsed a statement, then we are going to
- // need terminator to delimit them.
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
- } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- break;
- } else if (!match1(parser, PM_TOKEN_EOF)) {
- // If we're at the end of the file, then we're going to add
- // an error after this for the ) anyway.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
- }
- }
-
- context_pop(parser);
- pm_accepts_block_stack_pop(parser);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
- // When we're parsing multi targets, we allow them to be followed by
- // a right parenthesis if they are at the statement level. This is
- // only possible if they are the final statement in a parentheses.
- // We need to explicitly reject that here.
- {
- pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
-
- if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
- pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, statement);
-
- statement = (pm_node_t *) multi_target;
- statements->body.nodes[statements->body.size - 1] = statement;
- }
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
- const uint8_t *offset = statement->location.end;
- pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
- pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
-
- statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
- statements->body.nodes[statements->body.size - 1] = statement;
-
- pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
- }
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_void_statements_check(parser, statements, true);
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
- }
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+ return parse_parentheses(parser, binding_power, depth);
case PM_TOKEN_BRACE_LEFT: {
// If we were passed a current_hash_keys via the parser, then that
// means we're already parsing a hash and we want to share the set
@@ -18371,14 +19192,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_accepts_block_stack_push(parser, true);
parser_lex(parser);
- pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+ pm_token_t opening = parser->previous;
+ pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
if (current_hash_keys != NULL) {
- parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+ parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
} else {
pm_static_literals_t hash_keys = { 0 };
- parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+ parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
pm_static_literals_free(&hash_keys);
}
@@ -18386,26 +19208,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_accepts_block_stack_pop(parser);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
- pm_hash_node_closing_loc_set(node, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
+ pm_hash_node_closing_loc_set(parser, node, &parser->previous);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_CHARACTER_LITERAL: {
- parser_lex(parser);
-
- pm_token_t opening = parser->previous;
- opening.type = PM_TOKEN_STRING_BEGIN;
- opening.end = opening.start + 1;
-
- pm_token_t content = parser->previous;
- content.type = PM_TOKEN_STRING_CONTENT;
- content.start = content.start + 1;
+ pm_node_t *node = UP(pm_string_node_create_current_string(
+ parser,
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_BEGIN,
+ .start = parser->current.start,
+ .end = parser->current.start + 1
+ },
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->current.start + 1,
+ .end = parser->current.end
+ },
+ NULL
+ ));
- pm_token_t closing = not_provided(parser);
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
pm_node_flag_set(node, parse_unescaped_encoding(parser));
+ // Skip past the character literal here, since now we have handled
+ // parser->explicit_encoding correctly.
+ parser_lex(parser);
+
// Characters can be followed by strings in which case they are
// automatically concatenated.
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -18416,7 +19245,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18432,16 +19261,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// fact a method call, not a constant read.
if (
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+ return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
}
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
// If we get here, then we have a comma immediately following a
@@ -18456,7 +19285,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
+ pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18469,7 +19298,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t operator = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// Unary .. and ... are special because these are non-associative
// operators that can also be unary operators. In this case we need
@@ -18479,23 +19308,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
}
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
case PM_TOKEN_FLOAT:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
+ return UP(pm_float_node_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_imaginary_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18505,7 +19334,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18515,7 +19344,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18537,26 +19366,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_call_node_t *call = (pm_call_node_t *) node;
pm_arguments_t arguments = { 0 };
- if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
+ if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
// Since we found arguments, we need to turn off the
// variable call bit in the flags.
- pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
+ pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
call->opening_loc = arguments.opening_loc;
call->arguments = arguments.arguments;
call->closing_loc = arguments.closing_loc;
call->block = arguments.block;
- if (arguments.block != NULL) {
- call->base.location.end = arguments.block->location.end;
- } else if (arguments.closing_loc.start == NULL) {
- if (arguments.arguments != NULL) {
- call->base.location.end = arguments.arguments->base.location.end;
- } else {
- call->base.location.end = call->message_loc.end;
- }
+ const pm_location_t *end = pm_arguments_end(&arguments);
+ if (end == NULL) {
+ PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
} else {
- call->base.location.end = arguments.closing_loc.end;
+ PM_NODE_LENGTH_SET_LOCATION(call, end);
}
}
} else {
@@ -18564,19 +19388,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// can still be a method call if it is followed by arguments or
// a block, so we need to check for that here.
if (
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
// If we're about to convert an 'it' implicit local
// variable read into a method call, we need to remove
// it from the list of implicit local variables.
- parse_target_implicit_parameter(parser, node);
+ pm_node_unreference(parser, node);
} else {
// Otherwise, we're about to convert a regular local
// variable read into a method call, in which case we
@@ -18584,16 +19408,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// purposes of warnings.
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
- if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
+ pm_node_unreference(parser, node);
} else {
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
}
}
- pm_node_destroy(parser, node);
- return (pm_node_t *) fcall;
+ return UP(fcall);
}
}
@@ -18625,12 +19448,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t content = parse_strings_empty_content(parser->previous.start);
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
- node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
} else {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
}
- node->location.end = opening.end;
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
} else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
// If we get here, then we tried to find something in the
// heredoc but couldn't actually parse anything, so we'll just
@@ -18638,7 +19461,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
//
// parse_string_part handles its own errors, so there is no need
// for us to add one here.
- node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then the part that we parsed was plain string
// content and we're at the end of the heredoc, so we can return
@@ -18647,8 +19470,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_flag_set(part, parse_unescaped_encoding(parser));
pm_string_node_t *cast = (pm_string_node_t *) part;
- cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
+ cast->opening_loc = TOK2LOC(parser, &opening);
+ cast->closing_loc = TOK2LOC(parser, &parser->current);
cast->base.location = cast->opening_loc;
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -18657,21 +19480,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
- parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
+ parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
}
- node = (pm_node_t *) cast;
+ node = UP(cast);
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
} else {
// If we get here, then we have multiple parts in the heredoc,
// so we'll need to create an interpolated string node to hold
// them all.
pm_node_list_t parts = { 0 };
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
@@ -18682,19 +19505,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
cast->parts = parts;
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
- pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+ pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
} else {
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
- pm_node_list_free(&parts);
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
- pm_interpolated_string_node_closing_set(cast, &parser->previous);
+ pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
}
// If this is a heredoc that is indented with a ~, then we need
@@ -18719,7 +19541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18728,34 +19550,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return node;
}
case PM_TOKEN_INTEGER: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_IMAGINARY: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_KEYWORD___ENCODING__:
parser_lex(parser);
- return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
+ return UP(pm_source_encoding_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___FILE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
+ return UP(pm_source_file_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___LINE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
+ return UP(pm_source_line_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_ALIAS: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
@@ -18775,245 +19597,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
}
- } else {
+ } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+ old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
}
- return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
+ return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
}
case PM_SYMBOL_NODE:
case PM_INTERPOLATED_SYMBOL_NODE: {
- if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
+ if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+ old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
}
}
PRISM_FALLTHROUGH
default:
- return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
- }
- }
- case PM_TOKEN_KEYWORD_CASE: {
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
-
- pm_token_t case_keyword = parser->previous;
- pm_node_t *predicate = NULL;
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- predicate = NULL;
- } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
- predicate = NULL;
- } else if (!token_begins_expression_p(parser->current.type)) {
- predicate = NULL;
- } else {
- predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- }
-
- if (match1(parser, PM_TOKEN_KEYWORD_END)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
- parser_lex(parser);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
- }
-
- // At this point we can create a case node, though we don't yet know
- // if it is a case-in or case-when node.
- pm_token_t end_keyword = not_provided(parser);
- pm_node_t *node;
-
- if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
- pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
- pm_static_literals_t literals = { 0 };
-
- // At this point we've seen a when keyword, so we know this is a
- // case-when node. We will continue to parse the when nodes
- // until we hit the end of the list.
- while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
- parser_lex(parser);
-
- pm_token_t when_keyword = parser->previous;
- pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
-
- do {
- if (accept1(parser, PM_TOKEN_USTAR)) {
- pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
-
- pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
- pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
-
- if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
- } else {
- pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
- pm_when_node_conditions_append(when_node, condition);
-
- // If we found a missing node, then this is a syntax
- // error and we should stop looping.
- if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
-
- // If this is a string node, then we need to mark it
- // as frozen because when clause strings are frozen.
- if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
- pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
- } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
- pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
- }
-
- pm_when_clause_static_literals_add(parser, &literals, condition);
- }
- } while (accept1(parser, PM_TOKEN_COMMA));
-
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
- pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
- }
- } else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
- pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
- }
-
- if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
- if (statements != NULL) {
- pm_when_node_statements_set(when_node, statements);
- }
- }
-
- pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
- }
-
- // If we didn't parse any conditions (in or when) then we need
- // to indicate that we have an error.
- if (case_node->conditions.size == 0) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- }
-
- pm_static_literals_free(&literals);
- node = (pm_node_t *) case_node;
- } else {
- pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
-
- // If this is a case-match node (i.e., it is a pattern matching
- // case statement) then we must have a predicate.
- if (predicate == NULL) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
- }
-
- // At this point we expect that we're parsing a case-in node. We
- // will continue to parse the in nodes until we hit the end of
- // the list.
- while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-
- bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
- parser->pattern_matching_newlines = true;
-
- lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
- parser->command_start = false;
- parser_lex(parser);
-
- pm_token_t in_keyword = parser->previous;
-
- pm_constant_id_list_t captures = { 0 };
- pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
-
- parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
-
- // Since we're in the top-level of the case-in node we need
- // to check for guard clauses in the form of `if` or
- // `unless` statements.
- if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
- pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
- pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
- } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
- pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
- pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
- }
-
- // Now we need to check for the terminator of the in node's
- // pattern. It can be a newline or semicolon optionally
- // followed by a `then` keyword.
- pm_token_t then_keyword;
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
- then_keyword = parser->previous;
- } else {
- then_keyword = not_provided(parser);
- }
- } else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
- then_keyword = parser->previous;
- }
-
- // Now we can actually parse the statements associated with
- // the in node.
- pm_statements_node_t *statements;
- if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- statements = NULL;
- } else {
- statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
- }
-
- // Now that we have the full pattern and statements, we can
- // create the node and attach it to the case node.
- pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
- pm_case_match_node_condition_append(case_node, condition);
- }
-
- // If we didn't parse any conditions (in or when) then we need
- // to indicate that we have an error.
- if (case_node->conditions.size == 0) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- }
-
- node = (pm_node_t *) case_node;
- }
-
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
- pm_token_t else_keyword = parser->previous;
- pm_else_node_t *else_node;
-
- if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
- else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
- } else {
- else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
- }
-
- if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
- pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
- } else {
- pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
- }
- }
-
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
-
- if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
- pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
- } else {
- pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
+ return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
}
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return node;
}
+ case PM_TOKEN_KEYWORD_CASE:
+ return parse_case(parser, flags, depth);
case PM_TOKEN_KEYWORD_BEGIN: {
size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
@@ -19034,15 +19638,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
-
- begin_node->base.location.end = parser->previous.end;
- pm_begin_node_end_keyword_set(begin_node, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
+ PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
+ pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) begin_node;
+ return UP(begin_node);
}
case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
pm_node_list_t current_block_exits = { 0 };
@@ -19059,16 +19660,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
pm_context_t context = parser->current_context->context;
if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
}
flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+ return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_BREAK:
case PM_TOKEN_KEYWORD_NEXT:
@@ -19085,29 +19684,44 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
- parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+ pm_token_t next = parser->current;
+ parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
+
+ // Reject `foo && return bar`.
+ if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
+ }
+ }
+
+ // It's possible that we've parsed a block argument through our
+ // call to parse_arguments. If we found one, we should mark it
+ // as invalid and destroy it, as we don't have a place for it.
+ if (arguments.block != NULL) {
+ pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
+ pm_node_unreference(parser, arguments.block);
+ arguments.block = NULL;
}
}
switch (keyword.type) {
case PM_TOKEN_KEYWORD_BREAK: {
- pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_NEXT: {
- pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_RETURN: {
- pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
parse_return(parser, node);
return node;
}
default:
assert(false && "unreachable");
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
}
case PM_TOKEN_KEYWORD_SUPER: {
@@ -19115,24 +19729,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
if (
- arguments.opening_loc.start == NULL &&
+ arguments.opening_loc.length == 0 &&
arguments.arguments == NULL &&
((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
) {
- return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
}
- return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_super_node_create(parser, &keyword, &arguments));
}
case PM_TOKEN_KEYWORD_YIELD: {
parser_lex(parser);
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
// It's possible that we've parsed a block argument through our
// call to parse_arguments_list. If we found one, we should mark it
@@ -19140,462 +19754,57 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// yield node.
if (arguments.block != NULL) {
pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
- pm_node_destroy(parser, arguments.block);
+ pm_node_unreference(parser, arguments.block);
arguments.block = NULL;
}
- pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
+ pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
return node;
}
- case PM_TOKEN_KEYWORD_CLASS: {
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
-
- pm_token_t class_keyword = parser->previous;
- pm_do_loop_stack_push(parser, false);
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- if (accept1(parser, PM_TOKEN_LESS_LESS)) {
- pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
-
- pm_parser_scope_push(parser, true);
- if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
- }
-
- pm_node_t *statements = NULL;
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
- }
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
- }
-
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
- pm_token_t name = parser->previous;
- if (name.type != PM_TOKEN_CONSTANT) {
- pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
- }
-
- pm_token_t inheritance_operator;
- pm_node_t *superclass;
-
- if (match1(parser, PM_TOKEN_LESS)) {
- inheritance_operator = parser->current;
- lex_state_set(parser, PM_LEX_STATE_BEG);
-
- parser->command_start = true;
- parser_lex(parser);
-
- superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
- } else {
- inheritance_operator = not_provided(parser);
- superclass = NULL;
- }
-
- pm_parser_scope_push(parser, true);
-
- if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
- expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
- } else {
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- }
- pm_node_t *statements = NULL;
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
- }
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
- if (context_def_p(parser)) {
- pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
- pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
- }
- case PM_TOKEN_KEYWORD_DEF: {
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- pm_token_t def_keyword = parser->current;
- size_t opening_newline_index = token_newline_index(parser);
-
- pm_node_t *receiver = NULL;
- pm_token_t operator = not_provided(parser);
- pm_token_t name;
-
- // This context is necessary for lexing `...` in a bare params
- // correctly. It must be pushed before lexing the first param, so it
- // is here.
- context_push(parser, PM_CONTEXT_DEF_PARAMS);
- parser_lex(parser);
-
- // This will be false if the method name is not a valid identifier
- // but could be followed by an operator.
- bool valid_name = true;
-
- switch (parser->current.type) {
- case PM_CASE_OPERATOR:
- pm_parser_scope_push(parser, true);
- lex_state_set(parser, PM_LEX_STATE_ENDFN);
- parser_lex(parser);
-
- name = parser->previous;
- break;
- case PM_TOKEN_IDENTIFIER: {
- parser_lex(parser);
-
- if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
- receiver = parse_variable_call(parser);
-
- pm_parser_scope_push(parser, true);
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- parser_lex(parser);
-
- operator = parser->previous;
- name = parse_method_definition_name(parser);
- } else {
- pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
- pm_parser_scope_push(parser, true);
-
- name = parser->previous;
- }
-
- break;
- }
- case PM_TOKEN_INSTANCE_VARIABLE:
- case PM_TOKEN_CLASS_VARIABLE:
- case PM_TOKEN_GLOBAL_VARIABLE:
- valid_name = false;
- PRISM_FALLTHROUGH
- case PM_TOKEN_CONSTANT:
- case PM_TOKEN_KEYWORD_NIL:
- case PM_TOKEN_KEYWORD_SELF:
- case PM_TOKEN_KEYWORD_TRUE:
- case PM_TOKEN_KEYWORD_FALSE:
- case PM_TOKEN_KEYWORD___FILE__:
- case PM_TOKEN_KEYWORD___LINE__:
- case PM_TOKEN_KEYWORD___ENCODING__: {
- pm_parser_scope_push(parser, true);
- parser_lex(parser);
-
- pm_token_t identifier = parser->previous;
-
- if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- parser_lex(parser);
- operator = parser->previous;
-
- switch (identifier.type) {
- case PM_TOKEN_CONSTANT:
- receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_INSTANCE_VARIABLE:
- receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_CLASS_VARIABLE:
- receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_GLOBAL_VARIABLE:
- receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_NIL:
- receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_SELF:
- receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_TRUE:
- receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_FALSE:
- receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___FILE__:
- receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___LINE__:
- receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___ENCODING__:
- receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
- break;
- default:
- break;
- }
-
- name = parse_method_definition_name(parser);
- } else {
- if (!valid_name) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
- }
-
- name = identifier;
- }
- break;
- }
- case PM_TOKEN_PARENTHESIS_LEFT: {
- // The current context is `PM_CONTEXT_DEF_PARAMS`, however
- // the inner expression of this parenthesis should not be
- // processed under this context. Thus, the context is popped
- // here.
- context_pop(parser);
- parser_lex(parser);
-
- pm_token_t lparen = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
-
- accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- pm_token_t rparen = parser->previous;
-
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
-
- operator = parser->previous;
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
-
- // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
- // reason as described the above.
- pm_parser_scope_push(parser, true);
- context_push(parser, PM_CONTEXT_DEF_PARAMS);
- name = parse_method_definition_name(parser);
- break;
- }
- default:
- pm_parser_scope_push(parser, true);
- name = parse_method_definition_name(parser);
- break;
- }
-
- pm_token_t lparen;
- pm_token_t rparen;
- pm_parameters_node_t *params;
-
- switch (parser->current.type) {
- case PM_TOKEN_PARENTHESIS_LEFT: {
- parser_lex(parser);
- lparen = parser->previous;
-
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- params = NULL;
- } else {
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
- }
-
- lex_state_set(parser, PM_LEX_STATE_BEG);
- parser->command_start = true;
-
- context_pop(parser);
- if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
- parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
- }
-
- rparen = parser->previous;
- break;
- }
- case PM_CASE_PARAMETER: {
- // If we're about to lex a label, we need to add the label
- // state to make sure the next newline is ignored.
- if (parser->current.type == PM_TOKEN_LABEL) {
- lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
- }
-
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
-
- context_pop(parser);
- break;
- }
- default: {
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- params = NULL;
-
- context_pop(parser);
- break;
- }
- }
-
- pm_node_t *statements = NULL;
- pm_token_t equal;
- pm_token_t end_keyword;
-
- if (accept1(parser, PM_TOKEN_EQUAL)) {
- if (token_is_setter_name(&name)) {
- pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
- }
- equal = parser->previous;
-
- context_push(parser, PM_CONTEXT_DEF);
- pm_do_loop_stack_push(parser, false);
- statements = (pm_node_t *) pm_statements_node_create(parser);
-
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
-
- if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
- context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
-
- pm_token_t rescue_keyword = parser->previous;
- pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
- context_pop(parser);
-
- statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
- }
-
- pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
- pm_do_loop_stack_pop(parser);
- context_pop(parser);
- end_keyword = not_provided(parser);
- } else {
- equal = not_provided(parser);
-
- if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
- lex_state_set(parser, PM_LEX_STATE_BEG);
- parser->command_start = true;
- expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
- } else {
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- }
-
- pm_accepts_block_stack_push(parser, true);
- pm_do_loop_stack_push(parser, false);
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
- }
-
- pm_accepts_block_stack_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
- end_keyword = parser->previous;
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
- pm_parser_scope_pop(parser);
-
- /**
- * If the final character is @. As is the case when defining
- * methods to override the unary operators, we should ignore
- * the @ in the same way we do for symbols.
- */
- pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
-
- flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_def_node_create(
- parser,
- name_id,
- &name,
- receiver,
- params,
- statements,
- &locals,
- &def_keyword,
- &operator,
- &lparen,
- &rparen,
- &equal,
- &end_keyword
- );
- }
+ case PM_TOKEN_KEYWORD_CLASS:
+ return parse_class(parser, flags, depth);
+ case PM_TOKEN_KEYWORD_DEF:
+ return parse_def(parser, binding_power, flags, depth);
case PM_TOKEN_KEYWORD_DEFINED: {
parser_lex(parser);
- pm_token_t keyword = parser->previous;
- pm_token_t lparen;
- pm_token_t rparen;
+ pm_token_t keyword = parser->previous;
+ pm_token_t lparen = { 0 };
+ pm_token_t rparen = { 0 };
pm_node_t *expression;
+
context_push(parser, PM_CONTEXT_DEFINED);
+ bool newline = accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
lparen = parser->previous;
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
- if (parser->recovering) {
- rparen = not_provided(parser);
+ if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+ lparen = (pm_token_t) { 0 };
} else {
- accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- rparen = parser->previous;
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+
+ if (!parser->recovering) {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ rparen = parser->previous;
+ }
}
} else {
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+ expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
}
context_pop(parser);
- return (pm_node_t *) pm_defined_node_create(
+ return UP(pm_defined_node_create(
parser,
- &lparen,
+ NTOK2PTR(lparen),
expression,
- &rparen,
- &PM_LOCATION_TOKEN_VALUE(&keyword)
- );
+ NTOK2PTR(rparen),
+ &keyword
+ ));
}
case PM_TOKEN_KEYWORD_END_UPCASE: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
@@ -19613,12 +19822,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
- return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
+ return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_FALSE:
parser_lex(parser);
- return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
+ return UP(pm_false_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_FOR: {
size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
@@ -19634,15 +19843,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ index = UP(pm_splat_node_create(parser, &star_operator, name));
} else if (token_begins_expression_p(parser->current.type)) {
- index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
} else {
pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
- index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
+ index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
}
// Now, if there are multiple index expressions, parse them out.
@@ -19658,16 +19867,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
pm_token_t in_keyword = parser->previous;
- pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
}
}
@@ -19677,13 +19885,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
- return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
+ return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
}
case PM_TOKEN_KEYWORD_IF:
if (parser_end_of_line_p(parser)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
}
size_t opening_newline_index = token_newline_index(parser);
@@ -19700,26 +19908,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
- if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
- pm_node_destroy(parser, name);
+ if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
} else {
- pm_undef_node_append(undef, name);
+ pm_undef_node_append(parser->arena, undef, name);
while (match1(parser, PM_TOKEN_COMMA)) {
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
parser_lex(parser);
name = parse_undef_argument(parser, (uint16_t) (depth + 1));
- if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
- pm_node_destroy(parser, name);
+ if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
break;
}
- pm_undef_node_append(undef, name);
+ pm_undef_node_append(parser->arena, undef, name);
}
}
- return (pm_node_t *) undef;
+ return UP(undef);
}
case PM_TOKEN_KEYWORD_NOT: {
parser_lex(parser);
@@ -19728,28 +19934,46 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_arguments_t arguments = { 0 };
pm_node_t *receiver = NULL;
+ // The `not` keyword without parentheses is only valid in contexts
+ // where it would be parsed as an expression (i.e., at or below
+ // the `not` binding power level). In other contexts (e.g., method
+ // arguments, array elements, assignment right-hand sides),
+ // parentheses are required: `not(x)`. An exception is made for
+ // endless def bodies, where `not` is valid as both `arg` and
+ // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
+ if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+ if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
+ } else {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
+ }
+
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+ }
+
accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
pm_token_t lparen = parser->previous;
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous);
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
} else {
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
- receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+ arguments.opening_loc = TOK2LOC(parser, &lparen);
+ receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
if (!parser->recovering) {
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.closing_loc = TOK2LOC(parser, &parser->previous);
}
}
} else {
- receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+ receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
+ return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
}
case PM_TOKEN_KEYWORD_UNLESS: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19757,81 +19981,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
}
- case PM_TOKEN_KEYWORD_MODULE: {
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
- pm_token_t module_keyword = parser->previous;
-
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
- pm_token_t name;
-
- // If we can recover from a syntax error that occurred while parsing
- // the name of the module, then we'll handle that here.
- if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
- }
-
- while (accept1(parser, PM_TOKEN_COLON_COLON)) {
- pm_token_t double_colon = parser->previous;
-
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
- }
-
- // Here we retrieve the name of the module. If it wasn't a constant,
- // then it's possible that `module foo` was passed, which is a
- // syntax error. We handle that here as well.
- name = parser->previous;
- if (name.type != PM_TOKEN_CONSTANT) {
- pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
- }
-
- pm_parser_scope_push(parser, true);
- accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
- pm_node_t *statements = NULL;
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
-
- if (context_def_p(parser)) {
- pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
- }
+ case PM_TOKEN_KEYWORD_MODULE:
+ return parse_module(parser, flags, depth);
case PM_TOKEN_KEYWORD_NIL:
parser_lex(parser);
- return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
+ return UP(pm_nil_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_REDO: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
@@ -19839,17 +19997,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_KEYWORD_RETRY: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
parse_retry(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_SELF:
parser_lex(parser);
- return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
+ return UP(pm_self_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_TRUE:
parser_lex(parser);
- return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
+ return UP(pm_true_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_UNTIL: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19858,16 +20016,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
context_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
}
@@ -19880,9 +20037,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+ return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_KEYWORD_WHILE: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19892,16 +20049,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
context_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
}
@@ -19914,381 +20070,122 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+ return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_PERCENT_LOWER_I: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
- }
-
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
- }
-
- pm_token_t closing = parser->current;
- if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
- }
- pm_array_node_close_set(array, &closing);
-
- return (pm_node_t *) array;
- }
- case PM_TOKEN_PERCENT_UPPER_I: {
- parser_lex(parser);
- pm_token_t opening = parser->previous;
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // This is the current node that we are parsing that will be added to the
- // list of elements.
- pm_node_t *current = NULL;
-
- while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- switch (parser->current.type) {
- case PM_TOKEN_WORDS_SEP: {
- if (current == NULL) {
- // If we hit a separator before we have any content, then we don't
- // need to do anything.
- } else {
- // If we hit a separator after we've hit content, then we need to
- // append that content to the list and reset the current node.
- pm_array_node_elements_append(array, current);
- current = NULL;
- }
-
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
parser_lex(parser);
- break;
- }
- case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- if (current == NULL) {
- // If we hit content and the current node is NULL, then this is
- // the first string content we've seen. In that case we're going
- // to create a new string node and set that to the current.
- current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
- parser_lex(parser);
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
- // If we hit string content and the current node is an
- // interpolated string, then we need to append the string content
- // to the list of child nodes.
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- parser_lex(parser);
-
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit string content and the current node is a symbol node,
- // then we need to convert the current node into an interpolated
- // string and add the string content to the list of child nodes.
- pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
- pm_token_t bounds = not_provided(parser);
-
- pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
- pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
- pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
- parser_lex(parser);
-
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_symbol_node_append(interpolated, first_string);
- pm_interpolated_symbol_node_append(interpolated, second_string);
-
- xfree(current);
- current = (pm_node_t *) interpolated;
- } else {
- assert(false && "unreachable");
- }
-
- break;
- }
- case PM_TOKEN_EMBVAR: {
- bool start_location_set = false;
- if (current == NULL) {
- // If we hit an embedded variable and the current node is NULL,
- // then this is the start of a new string. We'll set the current
- // node to a new interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit an embedded variable and the current node is a string
- // node, then we'll convert the current into an interpolated
- // string and add the string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
- pm_interpolated_symbol_node_append(interpolated, current);
- interpolated->base.location.start = current->location.start;
- start_location_set = true;
- current = (pm_node_t *) interpolated;
- } else {
- // If we hit an embedded variable and the current node is an
- // interpolated string, then we'll just add the embedded variable.
- }
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
- if (!start_location_set) {
- current->location.start = part->location.start;
- }
- break;
- }
- case PM_TOKEN_EMBEXPR_BEGIN: {
- bool start_location_set = false;
- if (current == NULL) {
- // If we hit an embedded expression and the current node is NULL,
- // then this is the start of a new string. We'll set the current
- // node to a new interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit an embedded expression and the current node is a
- // string node, then we'll convert the current into an
- // interpolated string and add the string node to the list of
- // parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
- pm_interpolated_symbol_node_append(interpolated, current);
- interpolated->base.location.start = current->location.start;
- start_location_set = true;
- current = (pm_node_t *) interpolated;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
- // If we hit an embedded expression and the current node is an
- // interpolated string, then we'll just continue on.
- } else {
- assert(false && "unreachable");
- }
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
- if (!start_location_set) {
- current->location.start = part->location.start;
- }
- break;
+ // current is arena-allocated so no explicit free is needed.
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
}
- default:
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
- parser_lex(parser);
- break;
}
- }
- // If we have a current node, then we need to append it to the list.
- if (current) {
- pm_array_node_elements_append(array, current);
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
}
- pm_array_node_close_set(array, &closing);
+ pm_array_node_close_set(parser, array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
+ case PM_TOKEN_PERCENT_UPPER_I:
+ return parse_symbol_array(parser, depth);
case PM_TOKEN_PERCENT_LOWER_W: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // skip all leading whitespaces
- accept1(parser, PM_TOKEN_WORDS_SEP);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- pm_array_node_elements_append(array, string);
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(parser, interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+ parser_lex(parser);
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
}
- pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
- }
- case PM_TOKEN_PERCENT_UPPER_W: {
- parser_lex(parser);
- pm_token_t opening = parser->previous;
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // This is the current node that we are parsing that will be added
- // to the list of elements.
- pm_node_t *current = NULL;
-
- while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- switch (parser->current.type) {
- case PM_TOKEN_WORDS_SEP: {
- // Reset the explicit encoding if we hit a separator
- // since each element can have its own encoding.
- parser->explicit_encoding = NULL;
-
- if (current == NULL) {
- // If we hit a separator before we have any content,
- // then we don't need to do anything.
- } else {
- // If we hit a separator after we've hit content,
- // then we need to append that content to the list
- // and reset the current node.
- pm_array_node_elements_append(array, current);
- current = NULL;
- }
-
- parser_lex(parser);
- break;
- }
- case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- pm_node_flag_set(string, parse_unescaped_encoding(parser));
- parser_lex(parser);
-
- if (current == NULL) {
- // If we hit content and the current node is NULL,
- // then this is the first string content we've seen.
- // In that case we're going to create a new string
- // node and set that to the current.
- current = string;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
- // If we hit string content and the current node is
- // an interpolated string, then we need to append
- // the string content to the list of child nodes.
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit string content and the current node is
- // a string node, then we need to convert the
- // current node into an interpolated string and add
- // the string content to the list of child nodes.
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- pm_interpolated_string_node_append(interpolated, string);
- current = (pm_node_t *) interpolated;
- } else {
- assert(false && "unreachable");
- }
-
- break;
- }
- case PM_TOKEN_EMBVAR: {
- if (current == NULL) {
- // If we hit an embedded variable and the current
- // node is NULL, then this is the start of a new
- // string. We'll set the current node to a new
- // interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit an embedded variable and the current
- // node is a string node, then we'll convert the
- // current into an interpolated string and add the
- // string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
- } else {
- // If we hit an embedded variable and the current
- // node is an interpolated string, then we'll just
- // add the embedded variable.
- }
-
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
- break;
- }
- case PM_TOKEN_EMBEXPR_BEGIN: {
- if (current == NULL) {
- // If we hit an embedded expression and the current
- // node is NULL, then this is the start of a new
- // string. We'll set the current node to a new
- // interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit an embedded expression and the current
- // node is a string node, then we'll convert the
- // current into an interpolated string and add the
- // string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
- // If we hit an embedded expression and the current
- // node is an interpolated string, then we'll just
- // continue on.
- } else {
- assert(false && "unreachable");
- }
-
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
- break;
- }
- default:
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
- parser_lex(parser);
- break;
- }
- }
-
- // If we have a current node, then we need to append it to the list.
- if (current) {
- pm_array_node_elements_append(array, current);
- }
-
- pm_token_t closing = parser->current;
- if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
- }
-
- pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ pm_array_node_close_set(parser, array, &closing);
+ return UP(array);
}
+ case PM_TOKEN_PERCENT_UPPER_W:
+ return parse_string_array(parser, depth);
case PM_TOKEN_REGEXP_BEGIN: {
pm_token_t opening = parser->current;
parser_lex(parser);
@@ -20305,10 +20202,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
- pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
-
- return node;
+ pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+ pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
+ return UP(node);
}
pm_interpolated_regular_expression_node_t *interpolated;
@@ -20320,7 +20216,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// regular expression) or if it's not then it has interpolation.
pm_string_t unescaped = parser->current_string;
pm_token_t content = parser->current;
- bool ascii_only = parser->current_regular_expression_ascii_only;
parser_lex(parser);
// If we hit an end, then we can create a regular expression
@@ -20329,26 +20224,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- // If we're not immediately followed by a =~, then we want
- // to parse all of the errors at this point. If it is
- // followed by a =~, then it will get parsed higher up while
- // parsing the named captures as well.
+ // If we're not immediately followed by a =~, then we
+ // parse and validate now. If it is followed by a =~,
+ // then it will get parsed in the =~ handler where
+ // named captures can also be extracted.
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
- parse_regular_expression_errors(parser, node);
+ pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
}
- pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
- return (pm_node_t *) node;
+ return UP(node);
}
// If we get here, then we have interpolation so we'll need to create
// a regular expression node with interpolation.
interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
-
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
// This is extremely strange, but the first string part of a
// regular expression will always be tagged as binary if we
@@ -20356,7 +20247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
}
- pm_interpolated_regular_expression_node_append(interpolated, part);
+ pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
} else {
// If the first part of the body of the regular expression is not a
// string content, then we have interpolation and we need to create an
@@ -20369,20 +20260,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *part;
while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_regular_expression_node_append(interpolated, part);
+ pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
}
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
}
pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
- return (pm_node_t *) interpolated;
+ return UP(interpolated);
}
case PM_TOKEN_BACKTICK:
case PM_TOKEN_PERCENT_LOWER_X: {
@@ -20404,7 +20295,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
- return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
+ return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
}
pm_interpolated_x_string_node_t *node;
@@ -20419,7 +20310,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
if (match1(parser, PM_TOKEN_STRING_END)) {
- pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
return node;
@@ -20429,13 +20320,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// create a string node with interpolation.
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_interpolated_xstring_node_append(node, part);
+ pm_interpolated_xstring_node_append(parser->arena, node, part);
} else {
// If the first part of the body of the string is not a string
// content, then we have interpolation and we need to create an
@@ -20446,20 +20334,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *part;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_xstring_node_append(node, part);
+ pm_interpolated_xstring_node_append(parser->arena, node, part);
}
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
}
- pm_interpolated_xstring_node_closing_set(node, &closing);
+ pm_interpolated_xstring_node_closing_set(parser, node, &closing);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_USTAR: {
parser_lex(parser);
@@ -20469,17 +20357,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// still lex past it though and create a missing node place.
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_prefix(parser, diag_id);
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
pm_token_t operator = parser->previous;
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
if (match1(parser, PM_TOKEN_COMMA)) {
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -20495,11 +20383,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_TILDE: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20508,10 +20396,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20520,22 +20408,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS_NUM: {
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
pm_token_t exponent_operator = parser->previous;
- pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
- node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
} else {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE:
@@ -20545,7 +20433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parse_negative_numeric(node);
break;
default:
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
break;
}
}
@@ -20579,13 +20467,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+ pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
break;
}
case PM_CASE_PARAMETER: {
pm_accepts_block_stack_push(parser, false);
- pm_token_t opening = not_provided(parser);
- block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
+ block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
break;
}
@@ -20603,39 +20490,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
opening = parser->previous;
if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
} else {
expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
opening = parser->previous;
if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- pm_accepts_block_stack_push(parser, true);
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
- body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
+ body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
} else {
parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
}
pm_constant_id_list_t locals;
pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
- pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
+ return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
}
case PM_TOKEN_UPLUS: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20644,13 +20529,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_STRING_BEGIN:
- return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
+ return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
@@ -20673,17 +20558,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we get here, then we are assuming this token is closing a
// parent context, so we'll indicate that to the user so that
// they know how we behaved.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
// We're going to make a special case here, because "cannot
// parse expression" is pretty generic, and we know here that we
// have an unexpected token.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
} else {
pm_parser_err_prefix(parser, diag_id);
}
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
}
}
@@ -20698,8 +20583,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
* or any of the binary operators that can be written to a variable.
*/
static pm_node_t *
-parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
- pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
+parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
+
+ // Assignments whose value is a command call (e.g., a = b c) can only
+ // be followed by modifiers (if/unless/while/until/rescue) and not by
+ // operators with higher binding power. If we find one, emit an error
+ // and skip the operator and its right-hand side.
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ parser_lex(parser);
+ parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ }
// Contradicting binding powers, the right-hand-side value of the assignment
// allows the `rescue` modifier.
@@ -20709,10 +20604,10 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
pm_token_t rescue = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
@@ -20767,35 +20662,46 @@ parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
* operator that allows multiple values after it.
*/
static pm_node_t *
-parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
bool permitted = true;
if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
- pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
parse_assignment_value_local(parser, value);
bool single_value = true;
- if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
+ // Block calls (command call + do block, e.g., `foo bar do end`) cannot
+ // be followed by a comma to form a multi-value RHS because each element
+ // of a multi-value assignment must be an `arg`, not a `block_call`.
+ if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
single_value = false;
- pm_token_t opening = not_provided(parser);
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- pm_array_node_elements_append(array, value);
- value = (pm_node_t *) array;
+ pm_array_node_t *array = pm_array_node_create(parser, NULL);
+ pm_array_node_elements_append(parser->arena, array, value);
+ value = UP(array);
while (accept1(parser, PM_TOKEN_COMMA)) {
pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
- pm_array_node_elements_append(array, element);
- if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+ pm_array_node_elements_append(parser->arena, array, element);
+ if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
parse_assignment_value_local(parser, element);
}
}
+ // Assignments whose value is a command call (e.g., a = b c) can only
+ // be followed by modifiers (if/unless/while/until/rescue) and not by
+ // operators with higher binding power. If we find one, emit an error
+ // and skip the operator and its right-hand side.
+ if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ parser_lex(parser);
+ parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ }
+
// Contradicting binding powers, the right-hand-side value of the assignment
// allows the `rescue` modifier.
if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
@@ -20810,15 +20716,15 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
// but without parenthesis.
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
pm_call_node_t *call_node = (pm_call_node_t *) value;
- if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
+ if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
accepts_command_call_inner = true;
}
}
- pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
@@ -20835,43 +20741,18 @@ static void
parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
if (call_node->arguments != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
- pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
+ pm_node_unreference(parser, UP(call_node->arguments));
call_node->arguments = NULL;
}
if (call_node->block != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
- pm_node_destroy(parser, (pm_node_t *) call_node->block);
+ pm_node_unreference(parser, UP(call_node->block));
call_node->block = NULL;
}
}
-/**
- * This struct is used to pass information between the regular expression parser
- * and the named capture callback.
- */
-typedef struct {
- /** The parser that is parsing the regular expression. */
- pm_parser_t *parser;
-
- /** The call node wrapping the regular expression node. */
- pm_call_node_t *call;
-
- /** The match write node that is being created. */
- pm_match_write_node_t *match;
-
- /** The list of names that have been parsed. */
- pm_constant_id_list_t names;
-
- /**
- * Whether the content of the regular expression is shared. This impacts
- * whether or not we used owned constants or shared constants in the
- * constant pool for the names of the captures.
- */
- bool shared;
-} parse_regular_expression_named_capture_data_t;
-
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
cursor++;
@@ -20892,7 +20773,7 @@ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const
return cursor;
}
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
uint8_t value = (uint8_t) (*cursor - '0');
cursor++;
@@ -20911,8 +20792,8 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con
return cursor;
}
-static inline const uint8_t *
-pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+static PRISM_INLINE const uint8_t *
+pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
const uint8_t *start = cursor - 1;
cursor++;
@@ -20923,7 +20804,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
if (*cursor != '{') {
size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
- uint32_t value = escape_unicode(parser, cursor, length);
+ uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
@@ -20943,7 +20824,10 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
}
size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
- uint32_t value = escape_unicode(parser, cursor, length);
+ if (length == 0) {
+ break;
+ }
+ uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
(void) pm_buffer_append_unicode_codepoint(unescaped, value);
cursor += length;
@@ -20953,7 +20837,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
}
static void
-pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
+pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
const uint8_t *end = source + length;
pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
@@ -20971,7 +20855,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
break;
case 'u':
- cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
+ cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
break;
default:
pm_buffer_append_byte(unescaped, '\\');
@@ -20993,10 +20877,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
* capture group.
*/
static void
-parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
- parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
-
- pm_parser_t *parser = callback_data->parser;
+parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
pm_call_node_t *call = callback_data->call;
pm_constant_id_list_t *names = &callback_data->names;
@@ -21014,55 +20895,56 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
// unescaped, which is what we need.
const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
if (PRISM_UNLIKELY(cursor != NULL)) {
- pm_named_capture_escape(parser, &unescaped, source, length, cursor);
+ pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
source = (const uint8_t *) pm_buffer_value(&unescaped);
length = pm_buffer_length(&unescaped);
}
- pm_location_t location;
+ const uint8_t *start;
+ const uint8_t *end;
pm_constant_id_t name;
// If the name of the capture group isn't a valid identifier, we do
// not add it to the local table.
if (!pm_slice_is_valid_local(parser, source, source + length)) {
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
return;
}
- if (callback_data->shared) {
+ if (shared) {
// If the unescaped string is a slice of the source, then we can
// copy the names directly. The pointers will line up.
- location = (pm_location_t) { .start = source, .end = source + length };
- name = pm_parser_constant_id_location(parser, location.start, location.end);
+ start = source;
+ end = source + length;
+ name = pm_parser_constant_id_raw(parser, start, end);
} else {
// Otherwise, the name is a slice of the malloc-ed owned string,
// in which case we need to copy it out into a new string.
- location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
-
- void *memory = xmalloc(length);
- if (memory == NULL) abort();
+ start = parser->start + PM_NODE_START(call->receiver);
+ end = parser->start + PM_NODE_END(call->receiver);
+ uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
memcpy(memory, source, length);
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+ name = pm_parser_constant_id_owned(parser, memory, length);
}
// Add this name to the list of constants if it is valid, not duplicated,
// and not a keyword.
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
- pm_constant_id_list_append(names, name);
+ pm_constant_id_list_append(parser->arena, names, name);
int depth;
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
// If the local is not already a local but it is a keyword, then we
// do not want to add a capture for this.
if (pm_local_is_keyword((const char *) source, length)) {
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
return;
}
// If the identifier is not already a local, then we will add it to
// the local table.
- pm_parser_local_add(parser, name, location.start, location.end, 0);
+ pm_parser_local_add(parser, name, start, end, 0);
}
// Here we lazily create the MatchWriteNode since we know we're
@@ -21073,45 +20955,37 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
// Next, create the local variable target and add it to the list of
// targets for the match.
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
- pm_node_list_append(&callback_data->match->targets, target);
+ pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
+ pm_node_list_append(parser->arena, &callback_data->match->targets, target);
}
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
}
/**
- * Potentially change a =~ with a regular expression with named captures into a
- * match write node.
+ * Potentially change a =~ with an interpolated regular expression with named
+ * captures into a match write node. This is for the interpolated case where
+ * we have concatenated content rather than a regular expression node.
*/
static pm_node_t *
-parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
- parse_regular_expression_named_capture_data_t callback_data = {
- .parser = parser,
+parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
+ pm_regexp_name_data_t callback_data = {
.call = call,
+ .match = NULL,
.names = { 0 },
- .shared = content->type == PM_STRING_SHARED
};
- parse_regular_expression_error_data_t error_data = {
- .parser = parser,
- .start = call->receiver->location.start,
- .end = call->receiver->location.end,
- .shared = content->type == PM_STRING_SHARED
- };
-
- pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
- pm_constant_id_list_free(&callback_data.names);
+ pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
if (callback_data.match != NULL) {
- return (pm_node_t *) callback_data.match;
+ return UP(callback_data.match);
} else {
- return (pm_node_t *) call;
+ return UP(call);
}
}
-static inline pm_node_t *
-parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
pm_token_t token = parser->current;
switch (token.type) {
@@ -21124,13 +20998,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// is parsed because it could be referenced in the value.
pm_call_node_t *call_node = (pm_call_node_t *) node;
if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
+ pm_parser_local_add_location(parser, &call_node->message_loc, 0);
}
}
PRISM_FALLTHROUGH
case PM_CASE_WRITABLE: {
+ // When we have `it = value`, we need to add `it` as a local
+ // variable before parsing the value, in case the value
+ // references the variable.
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_local_add_location(parser, &node->location, 0);
+ }
+
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
@@ -21143,8 +21024,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_multi_target_node_targets_append(parser, multi_target, node);
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
- return parse_write(parser, (pm_node_t *) multi_target, &token, value);
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ return parse_write(parser, UP(multi_target), &token, value);
}
case PM_SOURCE_ENCODING_NODE:
case PM_FALSE_NODE:
@@ -21156,7 +21037,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In these special cases, we have specific error messages
// and we will replace them with local variable writes.
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
return parse_unwriteable_write(parser, node, &token, value);
}
default:
@@ -21177,71 +21058,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21251,16 +21126,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21272,8 +21144,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_and_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21284,8 +21156,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_and_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21311,71 +21183,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21385,16 +21251,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21406,8 +21269,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_or_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21418,8 +21281,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_or_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21455,71 +21318,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21530,14 +21387,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
-
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21545,8 +21399,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21557,8 +21411,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21571,7 +21425,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In this case we have an operator but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
return node;
}
}
@@ -21579,15 +21433,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_TOKEN_KEYWORD_AND: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_and_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_OR:
case PM_TOKEN_PIPE_PIPE: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_or_node_create(parser, node, &token, right));
}
case PM_TOKEN_EQUAL_TILDE: {
// Note that we _must_ parse the value before adding the local
@@ -21598,11 +21452,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
//
// In this case, `foo` should be a method call and not a local yet.
parser_lex(parser);
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// By default, we're going to create a call node and then return it.
pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
- pm_node_t *result = (pm_node_t *) call;
+ pm_node_t *result = UP(call);
// If the receiver of this =~ is a regular expression node, then we
// need to introduce local variables for it based on its named
@@ -21643,14 +21497,25 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_string_t owned;
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
- result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
- pm_string_free(&owned);
+ result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+ pm_string_cleanup(&owned);
}
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
- // If we have a regular expression node, then we can just parse
- // the named captures directly off the unescaped string.
- const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
- result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+ // If we have a regular expression node, then we can parse
+ // the named captures and validate encoding in one pass.
+ pm_regular_expression_node_t *regexp = (pm_regular_expression_node_t *) node;
+
+ pm_regexp_name_data_t name_data = {
+ .call = call,
+ .match = NULL,
+ .names = { 0 },
+ };
+
+ pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
+
+ if (name_data.match != NULL) {
+ result = UP(name_data.match);
+ }
}
return result;
@@ -21682,21 +21547,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_RESCUE_MODIFIER_NODE: {
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_AND_NODE: {
pm_and_node_t *cast = (pm_and_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_OR_NODE: {
pm_or_node_t *cast = (pm_or_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
@@ -21704,20 +21569,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
break;
}
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
}
case PM_TOKEN_GREATER:
case PM_TOKEN_GREATER_EQUAL:
case PM_TOKEN_LESS:
case PM_TOKEN_LESS_EQUAL: {
if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
}
parser_lex(parser);
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
}
case PM_TOKEN_AMPERSAND_DOT:
case PM_TOKEN_DOT: {
@@ -21728,28 +21593,28 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// This if statement handles the foo.() syntax.
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
+ return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
}
switch (PM_NODE_TYPE(node)) {
case PM_RESCUE_MODIFIER_NODE: {
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_AND_NODE: {
pm_and_node_t *cast = (pm_and_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_OR_NODE: {
pm_or_node_t *cast = (pm_or_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
@@ -21770,23 +21635,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
break;
}
default: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
- message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
+ message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
}
}
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
if (
(previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
arguments.arguments == NULL &&
- arguments.opening_loc.start == NULL &&
+ arguments.opening_loc.length == 0 &&
match1(parser, PM_TOKEN_COMMA)
) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
} else {
- return (pm_node_t *) call;
+ return UP(call);
}
}
case PM_TOKEN_DOT_DOT:
@@ -21795,40 +21660,40 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *right = NULL;
if (token_begins_expression_p(parser->current.type)) {
- right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
+ return UP(pm_range_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_IF_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_QUESTION_MARK: {
context_push(parser, PM_CONTEXT_TERNARY);
@@ -21838,7 +21703,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t qmark = parser->current;
parser_lex(parser);
- pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
+ pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
if (parser->recovering) {
// If parsing the true expression of this ternary resulted in a syntax
@@ -21847,27 +21712,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// before the `expect` function call to make sure it doesn't
// accidentally move past a ':' token that occurs after the syntax
// error.
- pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
+ pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
context_pop(parser);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
pm_token_t colon = parser->previous;
- pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
+ pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
context_pop(parser);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
case PM_TOKEN_COLON_COLON: {
parser_lex(parser);
@@ -21880,7 +21741,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
) {
// If we have a constant immediately following a '::' operator, then
// this can either be a constant path or a method call, depending on
@@ -21891,11 +21752,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t message = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
- path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+ path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
} else {
// Otherwise, this is a constant path. That would look like Foo::Bar.
- path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If this is followed by a comma then it is a multiple assignment.
@@ -21915,15 +21776,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// If we have an identifier following a '::' operator, then it is for
// sure a method call.
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
- return (pm_node_t *) call;
+ return UP(call);
}
case PM_TOKEN_PARENTHESIS_LEFT: {
// If we have a parenthesis following a '::' operator, then it is the
@@ -21931,11 +21792,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_arguments_t arguments = { 0 };
parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
+ return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
}
default: {
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
}
}
@@ -21944,31 +21805,31 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
parser_lex(parser);
accept1(parser, PM_TOKEN_NEWLINE);
- pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
+ return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
}
case PM_TOKEN_BRACKET_LEFT: {
parser_lex(parser);
pm_arguments_t arguments = { 0 };
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.opening_loc = TOK2LOC(parser, &parser->previous);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
+ parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
}
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.closing_loc = TOK2LOC(parser, &parser->previous);
// If we have a comma after the closing bracket then this is a multiple
// assignment and we should parse the targets.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
- return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
// If we're at the end of the arguments, we can now check if there is a
@@ -21984,17 +21845,17 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (block != NULL) {
if (arguments.block != NULL) {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
if (arguments.arguments == NULL) {
arguments.arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
+ pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
}
- arguments.block = (pm_node_t *) block;
+ arguments.block = UP(block);
}
- return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
+ return UP(pm_call_node_aref_create(parser, node, &arguments));
}
case PM_TOKEN_KEYWORD_IN: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22009,9 +21870,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
}
case PM_TOKEN_EQUAL_GREATER: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22026,9 +21886,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_required_node_create(parser, node, pattern, &operator));
}
default:
assert(false && "unreachable");
@@ -22041,16 +21900,83 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
#undef PM_PARSE_PATTERN_MULTI
/**
- * Determine if a given call node looks like a "command", which means it has
- * arguments but does not have parentheses.
+ * Some nodes act as statements and limit which operators can follow. This
+ * function inspects the node and the upcoming token to determine whether the
+ * expression loop should stop. It is called both after prefix parsing and after
+ * each infix operator.
+ *
+ * As a side effect, this function also attaches do-blocks to command-style call
+ * nodes when appropriate.
+ *
+ * Returns true if the expression loop should stop (i.e., the next operator
+ * should not be consumed).
*/
-static inline bool
-pm_call_node_command_p(const pm_call_node_t *node) {
- return (
- (node->opening_loc.start == NULL) &&
- (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
- (node->arguments != NULL || node->block != NULL)
- );
+static bool
+parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
+ pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_MULTI_WRITE_NODE:
+ case PM_RETURN_NODE:
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ return left > PM_BINDING_POWER_MODIFIER;
+ case PM_CLASS_VARIABLE_WRITE_NODE:
+ case PM_CONSTANT_PATH_WRITE_NODE:
+ case PM_CONSTANT_WRITE_NODE:
+ case PM_GLOBAL_VARIABLE_WRITE_NODE:
+ case PM_INSTANCE_VARIABLE_WRITE_NODE:
+ case PM_LOCAL_VARIABLE_WRITE_NODE:
+ return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
+ case PM_CALL_NODE: {
+ // Calls with an implicit array on the right-hand side are
+ // statements and can only be followed by modifiers.
+ if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
+ return left > PM_BINDING_POWER_MODIFIER;
+ }
+
+ // Command-style calls (including block commands like
+ // `foo bar do end`) can only be followed by composition
+ // (and/or) and modifier (if/unless/etc.) operators.
+ if (pm_command_call_value_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION;
+ }
+
+ // A block call (command with do-block, or any call chained
+ // from one) can only be followed by call chaining (., ::,
+ // &.), composition (and/or), and modifier operators.
+ if (pm_block_call_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
+ }
+
+ return false;
+ }
+ case PM_SUPER_NODE:
+ case PM_YIELD_NODE:
+ // Command-style super/yield (without parens) can only be followed
+ // by composition and modifier operators.
+ if (pm_command_call_value_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION;
+ }
+ return false;
+ case PM_DEF_NODE:
+ // An endless method whose body is a command-style call (e.g.,
+ // `def f = foo bar`) is a command assignment and can only be
+ // followed by modifiers.
+ return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
+ case PM_RESCUE_MODIFIER_NODE:
+ // A rescue modifier whose handler is a pattern match (=> or in)
+ // produces a statement and cannot be followed by operators above
+ // the modifier level.
+ if (left > PM_BINDING_POWER_MODIFIER) {
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+ pm_node_t *rescue_expression = cast->rescue_expression;
+ return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
+ }
+ return false;
+ default:
+ return false;
+ }
}
/**
@@ -22062,46 +21988,40 @@ pm_call_node_command_p(const pm_call_node_t *node) {
* determine if they need to perform additional cleanup.
*/
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
+ // Some prefix nodes are statements and can only be followed by modifiers
+ // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
+ // here before entering the infix loop.
switch (PM_NODE_TYPE(node)) {
- case PM_MISSING_NODE:
- // If we found a syntax error, then the type of node returned by
- // parse_expression_prefix is going to be a missing node.
+ case PM_ERROR_RECOVERY_NODE:
return node;
case PM_PRE_EXECUTION_NODE:
+ return node;
case PM_POST_EXECUTION_NODE:
case PM_ALIAS_GLOBAL_VARIABLE_NODE:
case PM_ALIAS_METHOD_NODE:
- case PM_MULTI_WRITE_NODE:
case PM_UNDEF_NODE:
- // These expressions are statements, and cannot be followed by
- // operators (except modifiers).
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
return node;
}
break;
case PM_CALL_NODE:
- // If we have a call node, then we need to check if it looks like a
- // method call without parentheses that contains arguments. If it
- // does, then it has different rules for parsing infix operators,
- // namely that it only accepts composition (and/or) and modifiers
- // (if/unless/etc.).
- if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
+ case PM_SUPER_NODE:
+ case PM_YIELD_NODE:
+ case PM_DEF_NODE:
+ if (parse_expression_terminator(parser, node)) {
return node;
}
break;
case PM_SYMBOL_NODE:
- // If we have a symbol node that is being parsed as a label, then we
- // need to immediately return, because there should never be an
- // infix operator following this node.
- if (pm_symbol_node_label_p(node)) {
+ if (pm_symbol_node_label_p(parser, node)) {
return node;
}
break;
@@ -22109,8 +22029,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
break;
}
- // Otherwise we'll look and see if the next token can be parsed as an infix
- // operator. If it can, then we'll parse it using parse_expression_infix.
+ // Look and see if the next token can be parsed as an infix operator. If it
+ // can, then we'll parse it using parse_expression_infix.
pm_binding_powers_t current_binding_powers;
pm_token_type_t current_token_type;
@@ -22120,39 +22040,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
binding_power <= current_binding_powers.left &&
current_binding_powers.binary
) {
- node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
-
- switch (PM_NODE_TYPE(node)) {
- case PM_MULTI_WRITE_NODE:
- // Multi-write nodes are statements, and cannot be followed by
- // operators except modifiers.
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- case PM_CLASS_VARIABLE_WRITE_NODE:
- case PM_CONSTANT_PATH_WRITE_NODE:
- case PM_CONSTANT_WRITE_NODE:
- case PM_GLOBAL_VARIABLE_WRITE_NODE:
- case PM_INSTANCE_VARIABLE_WRITE_NODE:
- case PM_LOCAL_VARIABLE_WRITE_NODE:
- // These expressions are statements, by virtue of the right-hand
- // side of their write being an implicit array.
- if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- case PM_CALL_NODE:
- // These expressions are also statements, by virtue of the
- // right-hand side of the expression (i.e., the last argument to
- // the call node) being an implicit array.
- if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- default:
- break;
- }
+ node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
+ if (parse_expression_terminator(parser, node)) return node;
// If the operator is nonassoc and we should not be able to parse the
// upcoming infix operator, break.
@@ -22160,7 +22049,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
// If this is a non-assoc operator and we are about to parse the
// exact same operator, then we need to add an error.
if (match1(parser, current_token_type)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
break;
}
@@ -22173,7 +22062,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
//
if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
break;
}
@@ -22185,7 +22074,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
}
}
- if (accepts_command_call) {
+ if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
// A command-style method call is only accepted on method chains.
// Thus, we check whether the parsed node can continue method chains.
// The method chain can continue if the parsed node is one of the following five kinds:
@@ -22200,29 +22089,29 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
if (
// (1) foo[1]
!(
- cast->call_operator_loc.start == NULL &&
- cast->message_loc.start != NULL &&
- cast->message_loc.start[0] == '[' &&
- cast->message_loc.end[-1] == ']'
+ cast->call_operator_loc.length == 0 &&
+ cast->message_loc.length > 0 &&
+ parser->start[cast->message_loc.start] == '[' &&
+ parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
) &&
// (2) foo.bar
!(
- cast->call_operator_loc.start != NULL &&
+ cast->call_operator_loc.length > 0 &&
cast->arguments == NULL &&
cast->block == NULL &&
- cast->opening_loc.start == NULL
+ cast->opening_loc.length == 0
) &&
// (3) foo.bar(1)
!(
- cast->call_operator_loc.start != NULL &&
- cast->opening_loc.start != NULL
+ cast->call_operator_loc.length > 0 &&
+ cast->opening_loc.length > 0
) &&
// (4) foo.bar do end
!(
cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
)
) {
- accepts_command_call = false;
+ flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
}
break;
}
@@ -22230,10 +22119,21 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
case PM_CONSTANT_PATH_NODE:
break;
default:
- accepts_command_call = false;
+ flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
break;
}
}
+
+ if (context_terminator(parser->current_context->context, &parser->current)) {
+ pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
+ if (
+ !next_binding_powers.binary ||
+ binding_power > next_binding_powers.left ||
+ (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
+ ) {
+ return node;
+ }
+ }
}
return node;
@@ -22252,15 +22152,16 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
);
- pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
+ pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
parser,
arguments,
pm_parser_constant_id_constant(parser, "print", 5)
- ), true);
+ )), true);
}
if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
@@ -22271,47 +22172,49 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
);
pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
- pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
+ pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
parser,
pm_parser_constant_id_constant(parser, "$F", 2),
- (pm_node_t *) call
+ UP(call)
);
- pm_statements_node_body_prepend(statements, (pm_node_t *) write);
+ pm_statements_node_body_prepend(parser->arena, statements, UP(write));
}
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
);
if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
- pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
+ pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
parser,
- (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
- &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
- (pm_node_t *) pm_true_node_synthesized_create(parser)
- ));
+ UP(pm_symbol_node_synthesized_create(parser, "chomp")),
+ NULL,
+ UP(pm_true_node_synthesized_create(parser))
+ )));
- pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
- pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
+ pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
+ pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
}
pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
+ pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
parser,
- (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
+ UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
statements
- ), true);
+ )), true);
statements = wrapped_statements;
}
@@ -22355,7 +22258,6 @@ parse_program(pm_parser_t *parser) {
statements = wrap_statements(parser, statements);
} else {
flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
}
// If this is an empty file, then we're still going to parse all of the
@@ -22363,10 +22265,10 @@ parse_program(pm_parser_t *parser) {
// correct the location information.
if (statements == NULL) {
statements = pm_statements_node_create(parser);
- pm_statements_node_location_set(statements, parser->start, parser->start);
+ statements->base.location = (pm_location_t) { 0 };
}
- return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
+ return UP(pm_program_node_create(parser, &locals, statements));
}
/******************************************************************************/
@@ -22375,8 +22277,8 @@ parse_program(pm_parser_t *parser) {
/**
* A vendored version of strnstr that is used to find a substring within a
- * string with a given length. This function is used to search for the Ruby
- * engine name within a shebang when the -x option is passed to Ruby.
+ * string with a given length. This function is used to search for "ruby"
+ * within a shebang when the -x option is passed to Ruby.
*
* The only modification that we made here is that we don't do NULL byte checks
* because we know the little parameter will not have a NULL byte and we allow
@@ -22386,7 +22288,7 @@ static const char *
pm_strnstr(const char *big, const char *little, size_t big_length) {
size_t little_length = strlen(little);
- for (const char *big_end = big + big_length; big < big_end; big++) {
+ for (const char *max = big + big_length - little_length; big <= max; big++) {
if (*big == *little && memcmp(big, little, little_length) == 0) return big;
}
@@ -22404,7 +22306,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) {
static void
pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
- pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+ pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
}
}
#endif
@@ -22439,11 +22341,14 @@ pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const c
/**
* Initialize a parser with the given start and end pointers.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+void
+pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+ assert(arena != NULL);
assert(source != NULL);
*parser = (pm_parser_t) {
+ .arena = arena,
+ .metadata_arena = { 0 },
.node_id = 0,
.lex_state = PM_LEX_STATE_BEG,
.enclosure_nesting = 0,
@@ -22462,7 +22367,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
.next_start = NULL,
.heredoc_end = NULL,
- .data_loc = { .start = NULL, .end = NULL },
+ .data_loc = { 0 },
.comment_list = { 0 },
.magic_comment_list = { 0 },
.warning_list = { 0 },
@@ -22472,11 +22377,11 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.encoding = PM_ENCODING_UTF_8_ENTRY,
.encoding_changed_callback = NULL,
.encoding_comment_start = source,
- .lex_callback = NULL,
+ .lex_callback = { 0 },
.filepath = { 0 },
.constant_pool = { 0 },
- .newline_list = { 0 },
- .integer_base = 0,
+ .line_offsets = { 0 },
+ .integer = { 0 },
.current_string = PM_STRING_EMPTY,
.start_line = 1,
.explicit_encoding = NULL,
@@ -22485,6 +22390,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.partial_script = false,
.command_start = true,
.recovering = false,
+ .continuable = true,
.encoding_locked = false,
.encoding_changed = false,
.pattern_matching_newlines = false,
@@ -22492,32 +22398,30 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.current_block_exits = NULL,
.semantic_token_seen = false,
.frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
- .current_regular_expression_ascii_only = false,
.warn_mismatched_indentation = true
};
- // Initialize the constant pool. We're going to completely guess as to the
- // number of constants that we'll need based on the size of the input. The
- // ratio we chose here is actually less arbitrary than you might think.
- //
- // We took ~50K Ruby files and measured the size of the file versus the
- // number of constants that were found in those files. Then we found the
- // average and standard deviation of the ratios of constants/bytesize. Then
- // we added 1.34 standard deviations to the average to get a ratio that
- // would fit 75% of the files (for a two-tailed distribution). This works
- // because there was about a 0.77 correlation and the distribution was
- // roughly normal.
- //
- // This ratio will need to change if we add more constants to the constant
- // pool for another node type.
- uint32_t constant_size = ((uint32_t) size) / 95;
- pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
-
- // Initialize the newline list. Similar to the constant pool, we're going to
- // guess at the number of newlines that we'll need based on the size of the
- // input.
+ /* Pre-size the arenas based on input size to reduce the number of block
+ * allocations (and the kernel page zeroing they trigger). The ratios were
+ * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
+ * The reserve call is a no-op when the capacity is at or below the default
+ * arena block size, so small inputs don't waste an extra allocation. */
+ if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
+ if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
+
+ /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
+ * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
+ * We use 120 as a balance between over-allocation waste and resize
+ * frequency. Resizes are cheap with arena allocation, so we lean toward
+ * under-estimating. */
+ uint32_t constant_size = ((uint32_t) size) / 120;
+ pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
+
+ /* Initialize the line offset list. Similar to the constant pool, we are
+ * going to estimate the number of newlines that we will need based on the
+ * size of the input. */
size_t newline_size = size / 22;
- pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
+ pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
// If options were provided to this parse, establish them here.
if (options != NULL) {
@@ -22554,7 +22458,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
- const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
+ const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
pm_parser_scope_push(parser, scope_index == 0);
// Scopes given from the outside are not allowed to have numbered
@@ -22562,20 +22466,24 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
- const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
+ const pm_string_t *local = pm_options_scope_local(scope, local_index);
const uint8_t *source = pm_string_source(local);
size_t length = pm_string_length(local);
- void *allocated = xmalloc(length);
- if (allocated == NULL) continue;
-
+ uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
memcpy(allocated, source, length);
- pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
+ pm_parser_local_add_owned(parser, allocated, length);
}
}
}
+ // Now that we have established the user-provided options, check if
+ // a version was given and parse as the latest version otherwise.
+ if (parser->version == PM_OPTIONS_VERSION_UNSET) {
+ parser->version = PM_OPTIONS_VERSION_LATEST;
+ }
+
pm_accepts_block_stack_push(parser, true);
// Skip past the UTF-8 BOM if it exists.
@@ -22609,8 +22517,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// If the shebang does not include "ruby" and this is the main script being
// parsed, then we will start searching the file for a shebang that does
// contain "ruby" as if -x were passed on the command line.
- const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
- size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
const char *engine;
@@ -22629,7 +22537,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
}
search_shebang = false;
- } else if (options->main_script && !parser->parsing_eval) {
+ } else if (options != NULL && options->main_script && !parser->parsing_eval) {
search_shebang = true;
}
}
@@ -22650,7 +22558,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
const uint8_t *newline = next_newline(cursor, parser->end - cursor);
while (newline != NULL) {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
cursor = newline + 1;
newline = next_newline(cursor, parser->end - cursor);
@@ -22679,8 +22587,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
} else {
- pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
- pm_newline_list_clear(&parser->newline_list);
+ pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
+ pm_line_offset_list_clear(&parser->line_offsets);
}
}
@@ -22691,56 +22599,28 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
}
/**
- * Register a callback that will be called whenever prism changes the encoding
- * it is using to parse based on the magic comment.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
- parser->encoding_changed_callback = callback;
-}
-
-/**
- * Free all of the memory associated with the comment list.
- */
-static inline void
-pm_comment_list_free(pm_list_t *list) {
- pm_list_node_t *node, *next;
-
- for (node = list->head; node != NULL; node = next) {
- next = node->next;
-
- pm_comment_t *comment = (pm_comment_t *) node;
- xfree(comment);
- }
-}
-
-/**
- * Free all of the memory associated with the magic comment list.
+ * Allocate and initialize a parser with the given start and end pointers.
+ *
+ * The resulting parser must eventually be freed with `pm_parser_free()`. The
+ * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()`
+ * does not free the arena.
*/
-static inline void
-pm_magic_comment_list_free(pm_list_t *list) {
- pm_list_node_t *node, *next;
-
- for (node = list->head; node != NULL; node = next) {
- next = node->next;
+pm_parser_t *
+pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
+ pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
+ if (parser == NULL) abort();
- pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
- xfree(magic_comment);
- }
+ pm_parser_init(arena, parser, source, size, options);
+ return parser;
}
/**
* Free any memory associated with the given parser.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_parser_free(pm_parser_t *parser) {
- pm_string_free(&parser->filepath);
- pm_diagnostic_list_free(&parser->error_list);
- pm_diagnostic_list_free(&parser->warning_list);
- pm_comment_list_free(&parser->comment_list);
- pm_magic_comment_list_free(&parser->magic_comment_list);
- pm_constant_pool_free(&parser->constant_pool);
- pm_newline_list_free(&parser->newline_list);
+void
+pm_parser_cleanup(pm_parser_t *parser) {
+ pm_string_cleanup(&parser->filepath);
+ pm_arena_cleanup(&parser->metadata_arena);
while (parser->current_scope != NULL) {
// Normally, popping the scope doesn't free the locals since it is
@@ -22756,145 +22636,224 @@ pm_parser_free(pm_parser_t *parser) {
}
/**
- * Parse the Ruby source associated with the given parser and return the tree.
+ * Free both the memory held by the given parser and the parser itself.
*/
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse(pm_parser_t *parser) {
- return parse_program(parser);
+void
+pm_parser_free(pm_parser_t *parser) {
+ pm_parser_cleanup(parser);
+ xfree_sized(parser, sizeof(pm_parser_t));
}
/**
- * Read into the stream until the gets callback returns false. If the last read
- * line from the stream matches an __END__ marker, then halt and return false,
- * otherwise return true.
+ * Returns true if the given diagnostic ID represents an error that cannot be
+ * fixed by appending more input. These are errors where the existing source
+ * contains definitively invalid syntax (as opposed to merely incomplete input).
*/
static bool
-pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
-#define LINE_SIZE 4096
- char line[LINE_SIZE];
-
- while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
- size_t length = LINE_SIZE;
- while (length > 0 && line[length - 1] == '\n') length--;
-
- if (length == LINE_SIZE) {
- // If we read a line that is the maximum size and it doesn't end
- // with a newline, then we'll just append it to the buffer and
- // continue reading.
- length--;
- pm_buffer_append_string(buffer, line, length);
- continue;
- }
-
- // Append the line to the buffer.
- length--;
- pm_buffer_append_string(buffer, line, length);
-
- // Check if the line matches the __END__ marker. If it does, then stop
- // reading and return false. In most circumstances, this means we should
- // stop reading from the stream so that the DATA constant can pick it
- // up.
- switch (length) {
- case 7:
- if (strncmp(line, "__END__", 7) == 0) return false;
- break;
- case 8:
- if (strncmp(line, "__END__\n", 8) == 0) return false;
- break;
- case 9:
- if (strncmp(line, "__END__\r\n", 9) == 0) return false;
- break;
- }
+pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
+ switch (diag_id) {
+ case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
+ case PM_ERR_BEGIN_UPCASE_BRACE:
+ case PM_ERR_CLASS_VARIABLE_BARE:
+ case PM_ERR_END_UPCASE_BRACE:
+ case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
+ case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
+ case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
+ case PM_ERR_EXPRESSION_NOT_WRITABLE:
+ case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
+ case PM_ERR_FLOAT_PARSE:
+ case PM_ERR_GLOBAL_VARIABLE_BARE:
+ case PM_ERR_HASH_KEY:
+ case PM_ERR_HEREDOC_IDENTIFIER:
+ case PM_ERR_INSTANCE_VARIABLE_BARE:
+ case PM_ERR_INVALID_BLOCK_EXIT:
+ case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
+ case PM_ERR_INVALID_FLOAT_EXPONENT:
+ case PM_ERR_INVALID_NUMBER_BINARY:
+ case PM_ERR_INVALID_NUMBER_DECIMAL:
+ case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
+ case PM_ERR_INVALID_NUMBER_OCTAL:
+ case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
+ case PM_ERR_NO_LOCAL_VARIABLE:
+ case PM_ERR_PARAMETER_ORDER:
+ case PM_ERR_STATEMENT_UNDEF:
+ case PM_ERR_VOID_EXPRESSION:
+ return true;
+ default:
+ return false;
}
-
- return true;
-#undef LINE_SIZE
}
/**
- * Determine if there was an unterminated heredoc at the end of the input, which
- * would mean the stream isn't finished and we should keep reading.
+ * Determine whether the source parsed by the given parser could become valid if
+ * more input were appended. This is used by tools like IRB to decide whether to
+ * prompt for continuation or to display an error.
+ *
+ * The parser starts with continuable=true. This function scans all errors to
+ * detect two categories of non-continuable errors:
+ *
+ * 1. Fatal errors: errors like invalid number literals or bare global variables
+ * that indicate definitively invalid syntax. These are only considered fatal
+ * if they occur before EOF (at EOF they could be from truncated input, e.g.
+ * `"\x` is an incomplete hex escape).
*
- * For the other lex modes we can check if the lex mode has been closed, but for
- * heredocs when we hit EOF we close the lex mode and then go back to parse the
- * rest of the line after the heredoc declaration so that we get more of the
- * syntax tree.
+ * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context
+ * errors indicate tokens that don't belong. A stray token is a cascade
+ * effect (and does not prevent continuability) if:
+ *
+ * a. A non-stray, non-fatal error appeared earlier in the error list at a
+ * strictly earlier source position (the stray was caused by a preceding
+ * parse failure, e.g. a truncated heredoc), OR
+ * b. The stray token is at EOF, starts after position 0 (there is valid
+ * code before it), and either is a single byte (likely a truncated
+ * token like `\`) or there are non-stray errors elsewhere.
+ *
+ * Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray —
+ * they are complete tokens and cannot become part of a longer valid
+ * construct by appending more input.
+ *
+ * c. The stray token is `=` at the start of a line, which could be the
+ * beginning of `=begin` (an embedded document). The remaining bytes
+ * after `=` may parse as an identifier, so the error is not at EOF,
+ * but the construct is genuinely incomplete.
*/
-static bool
-pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
+static void
+pm_parse_continuable(pm_parser_t *parser) {
+ // If there are no errors then there is nothing to continue.
+ if (parser->error_list.size == 0) {
+ parser->continuable = false;
+ return;
+ }
- for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
- if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
- return true;
+ if (!parser->continuable) return;
+
+ size_t source_length = (size_t) (parser->end - parser->start);
+
+ // First pass: check if there are any non-stray, non-fatal errors.
+ bool has_non_stray_error = false;
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
+ has_non_stray_error = true;
+ break;
}
}
- return false;
-}
+ // Second pass: check each error. We track the minimum source position
+ // among non-stray, non-fatal errors seen so far in list order, which
+ // lets us detect cascade stray tokens.
+ size_t non_stray_min_start = SIZE_MAX;
-/**
- * Parse a stream of Ruby source and return the tree.
- *
- * Prism is designed around having the entire source in memory at once, but you
- * can stream stdin in to Ruby so we need to support a streaming API.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
- pm_buffer_init(buffer);
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ size_t error_start = (size_t) error->location.start;
+ size_t error_end = error_start + (size_t) error->location.length;
+ bool at_eof = error_end >= source_length;
- bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
- pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
- pm_node_t *node = pm_parse(parser);
+ // Fatal errors are non-continuable unless they occur at EOF.
+ if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
+ parser->continuable = false;
+ return;
+ }
- while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
- pm_node_destroy(parser, node);
- eof = pm_parse_stream_read(buffer, stream, stream_fgets);
+ // Track non-stray, non-fatal error positions in list order.
+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
+ error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
+ if (error_start < non_stray_min_start) non_stray_min_start = error_start;
+ continue;
+ }
- pm_parser_free(parser);
- pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
- node = pm_parse(parser);
+ // This is a stray token. Determine if it is a cascade effect
+ // of a preceding error or genuinely stray.
+
+ // Rule (a): a non-stray error was seen earlier in the list at a
+ // strictly earlier position — this stray is a cascade effect.
+ if (non_stray_min_start < error_start) continue;
+
+ // Rule (b): this stray is at EOF with valid code before it.
+ // Single-byte stray tokens at EOF (like `\` for line continuation)
+ // are likely truncated tokens. Multi-byte stray tokens (like the
+ // keyword `end`) need additional evidence that they are cascade
+ // effects (i.e. non-stray errors exist elsewhere).
+ if (at_eof && error_start > 0) {
+ // Exception: closing delimiters at EOF are genuinely stray.
+ if (error->location.length == 1) {
+ const uint8_t *byte = parser->start + error_start;
+ if (*byte == ')' || *byte == ']' || *byte == '}') {
+ parser->continuable = false;
+ return;
+ }
+
+ // Single-byte non-delimiter stray at EOF: cascade.
+ continue;
+ }
+
+ // Multi-byte stray at EOF: cascade only if there are
+ // non-stray errors (evidence of a preceding parse failure).
+ if (has_non_stray_error) continue;
+ }
+
+ // Rule (c): a stray `=` at the start of a line could be the
+ // beginning of an embedded document (`=begin`). The remaining
+ // bytes after `=` parse as an identifier, so the error is not
+ // at EOF, but the construct is genuinely incomplete.
+ if (error->location.length == 1) {
+ const uint8_t *byte = parser->start + error_start;
+ if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
+ }
+
+ // This stray token is genuinely non-continuable.
+ parser->continuable = false;
+ return;
}
+}
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
+pm_node_t *
+pm_parse(pm_parser_t *parser) {
+ pm_node_t *node = parse_program(parser);
+ pm_parse_continuable(parser);
return node;
}
/**
- * Parse the source and return true if it parses without errors or warnings.
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * Prism is designed around having the entire source in memory at once, but you
+ * can stream stdin in to Ruby so we need to support a streaming API.
*/
-PRISM_EXPORTED_FUNCTION bool
-pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
- pm_options_t options = { 0 };
- pm_options_read(&options, data);
+pm_node_t *
+pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
+ bool eof = pm_source_stream_read(source);
- pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+ pm_node_t *node = pm_parse(tmp);
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
+ while (!eof && tmp->error_list.size > 0) {
+ eof = pm_source_stream_read(source);
- bool result = parser.error_list.size == 0;
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_free(tmp);
+ pm_arena_cleanup(arena);
- return result;
+ tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+ node = pm_parse(tmp);
+ }
+
+ *parser = tmp;
+ return node;
}
#undef PM_CASE_KEYWORD
#undef PM_CASE_OPERATOR
#undef PM_CASE_WRITABLE
#undef PM_STRING_EMPTY
-#undef PM_LOCATION_NODE_BASE_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
// We optionally support serializing to a binary string. For systems that don't
// want or need this functionality, it can be turned off with the
// PRISM_EXCLUDE_SERIALIZATION define.
#ifndef PRISM_EXCLUDE_SERIALIZATION
-static inline void
+static PRISM_INLINE void
pm_serialize_header(pm_buffer_t *buffer) {
pm_buffer_append_string(buffer, "PRISM", 5);
pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
@@ -22906,7 +22865,7 @@ pm_serialize_header(pm_buffer_t *buffer) {
/**
* Serialize the AST represented by the given node to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_header(buffer);
pm_serialize_content(parser, node, buffer);
@@ -22917,13 +22876,14 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
* Parse and serialize the AST represented by the given source to the given
* buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_init(&arena, &parser, source, size, &options);
pm_node_t *node = pm_parse(&parser);
@@ -22931,216 +22891,53 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
pm_serialize_content(&parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
/**
* Parse and serialize the AST represented by the source that is read out of the
* given stream into to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
- pm_parser_t parser;
+void
+pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
+ pm_arena_t arena = { 0 };
+ pm_parser_t *parser;
pm_options_t options = { 0 };
pm_options_read(&options, data);
- pm_buffer_t parser_buffer;
- pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
+ pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
pm_serialize_header(buffer);
- pm_serialize_content(&parser, node, buffer);
+ pm_serialize_content(parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');
- pm_node_destroy(&parser, node);
- pm_buffer_free(&parser_buffer);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_free(parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
/**
* Parse and serialize the comments in the given source to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_init(&arena, &parser, source, size, &options);
- pm_node_t *node = pm_parse(&parser);
+ pm_parse(&parser);
pm_serialize_header(buffer);
pm_serialize_encoding(parser.encoding, buffer);
pm_buffer_append_varsint(buffer, parser.start_line);
- pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+ pm_serialize_comment_list(&parser.comment_list, buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
#endif
-
-/******************************************************************************/
-/* Slice queries for the Ruby API */
-/******************************************************************************/
-
-/** The category of slice returned from pm_slice_type. */
-typedef enum {
- /** Returned when the given encoding name is invalid. */
- PM_SLICE_TYPE_ERROR = -1,
-
- /** Returned when no other types apply to the slice. */
- PM_SLICE_TYPE_NONE,
-
- /** Returned when the slice is a valid local variable name. */
- PM_SLICE_TYPE_LOCAL,
-
- /** Returned when the slice is a valid constant name. */
- PM_SLICE_TYPE_CONSTANT,
-
- /** Returned when the slice is a valid method name. */
- PM_SLICE_TYPE_METHOD_NAME
-} pm_slice_type_t;
-
-/**
- * Check that the slice is a valid local variable name or constant.
- */
-pm_slice_type_t
-pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
- // first, get the right encoding object
- const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
- if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
-
- // check that there is at least one character
- if (length == 0) return PM_SLICE_TYPE_NONE;
-
- size_t width;
- if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
- // valid because alphabetical
- } else if (*source == '_') {
- // valid because underscore
- width = 1;
- } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
- // valid because multibyte
- } else {
- // invalid because no match
- return PM_SLICE_TYPE_NONE;
- }
-
- // determine the type of the slice based on the first character
- const uint8_t *end = source + length;
- pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
-
- // next, iterate through all of the bytes of the string to ensure that they
- // are all valid identifier characters
- source += width;
-
- while (source < end) {
- if ((width = encoding->alnum_char(source, end - source)) != 0) {
- // valid because alphanumeric
- source += width;
- } else if (*source == '_') {
- // valid because underscore
- source++;
- } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
- // valid because multibyte
- source += width;
- } else {
- // invalid because no match
- break;
- }
- }
-
- // accept a ! or ? at the end of the slice as a method name
- if (*source == '!' || *source == '?' || *source == '=') {
- source++;
- result = PM_SLICE_TYPE_METHOD_NAME;
- }
-
- // valid if we are at the end of the slice
- return source == end ? result : PM_SLICE_TYPE_NONE;
-}
-
-/**
- * Check that the slice is a valid local variable name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- case PM_SLICE_TYPE_CONSTANT:
- case PM_SLICE_TYPE_METHOD_NAME:
- return PM_STRING_QUERY_FALSE;
- case PM_SLICE_TYPE_LOCAL:
- return PM_STRING_QUERY_TRUE;
- }
-
- assert(false && "unreachable");
- return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid constant name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- case PM_SLICE_TYPE_LOCAL:
- case PM_SLICE_TYPE_METHOD_NAME:
- return PM_STRING_QUERY_FALSE;
- case PM_SLICE_TYPE_CONSTANT:
- return PM_STRING_QUERY_TRUE;
- }
-
- assert(false && "unreachable");
- return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid method name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
-#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
-#define C1(c) (*source == c)
-#define C2(s) (memcmp(source, s, 2) == 0)
-#define C3(s) (memcmp(source, s, 3) == 0)
-
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- break;
- case PM_SLICE_TYPE_LOCAL:
- // numbered parameters are not valid method names
- return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
- case PM_SLICE_TYPE_CONSTANT:
- // all constants are valid method names
- case PM_SLICE_TYPE_METHOD_NAME:
- // all method names are valid method names
- return PM_STRING_QUERY_TRUE;
- }
-
- switch (length) {
- case 1:
- return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
- case 2:
- return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
- case 3:
- return B(C3("===") || C3("<=>") || C3("[]="));
- default:
- return PM_STRING_QUERY_FALSE;
- }
-
-#undef B
-#undef C1
-#undef C2
-#undef C3
-}