summaryrefslogtreecommitdiff
path: root/prism/prism.c
diff options
context:
space:
mode:
Diffstat (limited to 'prism/prism.c')
-rw-r--r--prism/prism.c3012
1 files changed, 1640 insertions, 1372 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 2815723ebd..3b10c3aa18 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -14,180 +14,6 @@ pm_version(void) {
*/
#define PM_TAB_WHITESPACE_SIZE 8
-#ifndef PM_DEBUG_LOGGING
-/**
- * Debugging logging will provide you with additional debugging functions as
- * well as automatically replace some functions with their debugging
- * counterparts.
- */
-#define PM_DEBUG_LOGGING 0
-#endif
-
-#if PM_DEBUG_LOGGING
-
-/******************************************************************************/
-/* Debugging */
-/******************************************************************************/
-
-PRISM_ATTRIBUTE_UNUSED static const char *
-debug_context(pm_context_t context) {
- switch (context) {
- case PM_CONTEXT_BEGIN: return "BEGIN";
- case PM_CONTEXT_BEGIN_ENSURE: return "BEGIN_ENSURE";
- case PM_CONTEXT_BEGIN_ELSE: return "BEGIN_ELSE";
- case PM_CONTEXT_BEGIN_RESCUE: return "BEGIN_RESCUE";
- case PM_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
- case PM_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
- case PM_CONTEXT_BLOCK_ENSURE: return "BLOCK_ENSURE";
- case PM_CONTEXT_BLOCK_ELSE: return "BLOCK_ELSE";
- case PM_CONTEXT_BLOCK_RESCUE: return "BLOCK_RESCUE";
- case PM_CONTEXT_CASE_IN: return "CASE_IN";
- case PM_CONTEXT_CASE_WHEN: return "CASE_WHEN";
- case PM_CONTEXT_CLASS: return "CLASS";
- case PM_CONTEXT_CLASS_ELSE: return "CLASS_ELSE";
- case PM_CONTEXT_CLASS_ENSURE: return "CLASS_ENSURE";
- case PM_CONTEXT_CLASS_RESCUE: return "CLASS_RESCUE";
- case PM_CONTEXT_DEF: return "DEF";
- case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
- case PM_CONTEXT_DEF_ENSURE: return "DEF_ENSURE";
- case PM_CONTEXT_DEF_ELSE: return "DEF_ELSE";
- case PM_CONTEXT_DEF_RESCUE: return "DEF_RESCUE";
- case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
- case PM_CONTEXT_DEFINED: return "DEFINED";
- case PM_CONTEXT_ELSE: return "ELSE";
- case PM_CONTEXT_ELSIF: return "ELSIF";
- case PM_CONTEXT_EMBEXPR: return "EMBEXPR";
- case PM_CONTEXT_FOR_INDEX: return "FOR_INDEX";
- case PM_CONTEXT_FOR: return "FOR";
- case PM_CONTEXT_IF: return "IF";
- case PM_CONTEXT_LAMBDA_BRACES: return "LAMBDA_BRACES";
- case PM_CONTEXT_LAMBDA_DO_END: return "LAMBDA_DO_END";
- case PM_CONTEXT_LAMBDA_ENSURE: return "LAMBDA_ENSURE";
- case PM_CONTEXT_LAMBDA_ELSE: return "LAMBDA_ELSE";
- case PM_CONTEXT_LAMBDA_RESCUE: return "LAMBDA_RESCUE";
- case PM_CONTEXT_MAIN: return "MAIN";
- case PM_CONTEXT_MODULE: return "MODULE";
- case PM_CONTEXT_MODULE_ELSE: return "MODULE_ELSE";
- case PM_CONTEXT_MODULE_ENSURE: return "MODULE_ENSURE";
- case PM_CONTEXT_MODULE_RESCUE: return "MODULE_RESCUE";
- case PM_CONTEXT_NONE: return "NONE";
- case PM_CONTEXT_PARENS: return "PARENS";
- case PM_CONTEXT_POSTEXE: return "POSTEXE";
- case PM_CONTEXT_PREDICATE: return "PREDICATE";
- case PM_CONTEXT_PREEXE: return "PREEXE";
- case PM_CONTEXT_RESCUE_MODIFIER: return "RESCUE_MODIFIER";
- case PM_CONTEXT_SCLASS: return "SCLASS";
- case PM_CONTEXT_SCLASS_ENSURE: return "SCLASS_ENSURE";
- case PM_CONTEXT_SCLASS_ELSE: return "SCLASS_ELSE";
- case PM_CONTEXT_SCLASS_RESCUE: return "SCLASS_RESCUE";
- case PM_CONTEXT_TERNARY: return "TERNARY";
- case PM_CONTEXT_UNLESS: return "UNLESS";
- case PM_CONTEXT_UNTIL: return "UNTIL";
- case PM_CONTEXT_WHILE: return "WHILE";
- }
- return NULL;
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_contexts(pm_parser_t *parser) {
- pm_context_node_t *context_node = parser->current_context;
- fprintf(stderr, "CONTEXTS: ");
-
- if (context_node != NULL) {
- while (context_node != NULL) {
- fprintf(stderr, "%s", debug_context(context_node->context));
- context_node = context_node->prev;
- if (context_node != NULL) {
- fprintf(stderr, " <- ");
- }
- }
- } else {
- fprintf(stderr, "NONE");
- }
-
- fprintf(stderr, "\n");
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_node(const pm_parser_t *parser, const pm_node_t *node) {
- pm_buffer_t output_buffer = { 0 };
- pm_prettyprint(&output_buffer, parser, node);
-
- fprintf(stderr, "%.*s", (int) output_buffer.length, output_buffer.value);
- pm_buffer_free(&output_buffer);
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_lex_mode(pm_parser_t *parser) {
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
- bool first = true;
-
- while (lex_mode != NULL) {
- if (first) {
- first = false;
- } else {
- fprintf(stderr, " <- ");
- }
-
- switch (lex_mode->mode) {
- case PM_LEX_DEFAULT: fprintf(stderr, "DEFAULT"); break;
- case PM_LEX_EMBEXPR: fprintf(stderr, "EMBEXPR"); break;
- case PM_LEX_EMBVAR: fprintf(stderr, "EMBVAR"); break;
- case PM_LEX_HEREDOC: fprintf(stderr, "HEREDOC"); break;
- case PM_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break;
- case PM_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break;
- case PM_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break;
- }
-
- lex_mode = lex_mode->prev;
- }
-
- fprintf(stderr, "\n");
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_state(pm_parser_t *parser) {
- fprintf(stderr, "STATE: ");
- bool first = true;
-
- if (parser->lex_state == PM_LEX_STATE_NONE) {
- fprintf(stderr, "NONE\n");
- return;
- }
-
-#define CHECK_STATE(state) \
- if (parser->lex_state & state) { \
- if (!first) fprintf(stderr, "|"); \
- fprintf(stderr, "%s", #state); \
- first = false; \
- }
-
- CHECK_STATE(PM_LEX_STATE_BEG)
- CHECK_STATE(PM_LEX_STATE_END)
- CHECK_STATE(PM_LEX_STATE_ENDARG)
- CHECK_STATE(PM_LEX_STATE_ENDFN)
- CHECK_STATE(PM_LEX_STATE_ARG)
- CHECK_STATE(PM_LEX_STATE_CMDARG)
- CHECK_STATE(PM_LEX_STATE_MID)
- CHECK_STATE(PM_LEX_STATE_FNAME)
- CHECK_STATE(PM_LEX_STATE_DOT)
- CHECK_STATE(PM_LEX_STATE_CLASS)
- CHECK_STATE(PM_LEX_STATE_LABEL)
- CHECK_STATE(PM_LEX_STATE_LABELED)
- CHECK_STATE(PM_LEX_STATE_FITEM)
-
-#undef CHECK_STATE
-
- fprintf(stderr, "\n");
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_token(pm_token_t * token) {
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
-}
-
-#endif
-
// Macros for min/max.
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
@@ -423,7 +249,7 @@ lex_mode_pop(pm_parser_t *parser) {
* This is the equivalent of IS_lex_state is CRuby.
*/
static inline bool
-lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
+lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
return parser->lex_state & state;
}
@@ -491,8 +317,52 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
parser->lex_state = state;
}
+#ifndef PM_DEBUG_LOGGING
+/**
+ * Debugging logging will print additional information to stdout whenever the
+ * lexer state changes.
+ */
+#define PM_DEBUG_LOGGING 0
+#endif
+
#if PM_DEBUG_LOGGING
-static inline void
+PRISM_ATTRIBUTE_UNUSED static void
+debug_state(pm_parser_t *parser) {
+ fprintf(stderr, "STATE: ");
+ bool first = true;
+
+ if (parser->lex_state == PM_LEX_STATE_NONE) {
+ fprintf(stderr, "NONE\n");
+ return;
+ }
+
+#define CHECK_STATE(state) \
+ if (parser->lex_state & state) { \
+ if (!first) fprintf(stderr, "|"); \
+ fprintf(stderr, "%s", #state); \
+ first = false; \
+ }
+
+ CHECK_STATE(PM_LEX_STATE_BEG)
+ CHECK_STATE(PM_LEX_STATE_END)
+ CHECK_STATE(PM_LEX_STATE_ENDARG)
+ CHECK_STATE(PM_LEX_STATE_ENDFN)
+ CHECK_STATE(PM_LEX_STATE_ARG)
+ CHECK_STATE(PM_LEX_STATE_CMDARG)
+ CHECK_STATE(PM_LEX_STATE_MID)
+ CHECK_STATE(PM_LEX_STATE_FNAME)
+ CHECK_STATE(PM_LEX_STATE_DOT)
+ CHECK_STATE(PM_LEX_STATE_CLASS)
+ CHECK_STATE(PM_LEX_STATE_LABEL)
+ CHECK_STATE(PM_LEX_STATE_LABELED)
+ CHECK_STATE(PM_LEX_STATE_FITEM)
+
+#undef CHECK_STATE
+
+ fprintf(stderr, "\n");
+}
+
+static void
debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
debug_state(parser);
@@ -672,6 +542,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+/**
+ * Add an error for an expected heredoc terminator. This is a special function
+ * only because it grabs its location off of a lex mode instead of a node or a
+ * token.
+ */
+static void
+pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
+
+ PM_PARSER_ERR_FORMAT(
+ parser,
+ ident_start,
+ ident_start + ident_length,
+ PM_ERR_HEREDOC_TERM,
+ (int) ident_length,
+ (const char *) ident_start
+ );
+}
+
/******************************************************************************/
/* Scope-related functions */
/******************************************************************************/
@@ -688,7 +578,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
.previous = parser->current_scope,
.locals = { 0 },
.parameters = PM_SCOPE_PARAMETERS_NONE,
- .numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_NONE,
+ .implicit_parameters = { 0 },
.shareable_constant = (closed || parser->current_scope == NULL) ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
.closed = closed
};
@@ -729,42 +619,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
return scope;
}
-static void
-pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
+typedef enum {
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
+} pm_scope_forwarding_param_check_result_t;
+
+static pm_scope_forwarding_param_check_result_t
+pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
pm_scope_t *scope = parser->current_scope;
- while (scope) {
+ bool conflict = false;
+
+ while (scope != NULL) {
if (scope->parameters & mask) {
- if (!scope->closed) {
- pm_parser_err_token(parser, token, diag);
- return;
+ if (scope->closed) {
+ if (conflict) {
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
+ } else {
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
+ }
}
- return;
+
+ conflict = true;
}
+
if (scope->closed) break;
scope = scope->previous;
}
- pm_parser_err_token(parser, token, diag);
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
}
-static inline void
+static void
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
+ break;
+ }
}
-static inline void
+static void
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+ break;
+ }
}
-static inline void
-pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+static void
+pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ // This shouldn't happen, because ... is not allowed in the
+ // declaration of blocks. If we get here, we assume we already have
+ // an error for this.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+ break;
+ }
}
-static inline void
+static void
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
+ break;
+ }
}
/**
@@ -1108,6 +1053,31 @@ pm_check_value_expression(pm_node_t *node) {
return NULL;
case PM_BEGIN_NODE: {
pm_begin_node_t *cast = (pm_begin_node_t *) node;
+
+ if (cast->statements == NULL && cast->ensure_clause != NULL) {
+ node = (pm_node_t *) cast->ensure_clause;
+ }
+ else {
+ if (cast->rescue_clause != NULL) {
+ if (cast->rescue_clause->statements == NULL) {
+ return NULL;
+ }
+ else if (cast->else_clause != NULL) {
+ node = (pm_node_t *) cast->else_clause;
+ }
+ else {
+ node = (pm_node_t *) cast->statements;
+ }
+ }
+ else {
+ node = (pm_node_t *) cast->statements;
+ }
+ }
+
+ break;
+ }
+ case PM_ENSURE_NODE: {
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
node = (pm_node_t *) cast->statements;
break;
}
@@ -1405,7 +1375,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
static inline void
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
if (pm_conditional_predicate_warn_write_literal_p(node)) {
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0 : PM_WARN_EQUAL_IN_CONDITIONAL);
+ pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
}
}
@@ -1555,7 +1525,7 @@ not_provided(pm_parser_t *parser) {
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
}
-#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
+#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
@@ -1683,7 +1653,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
* it's important that it be as fast as possible.
*/
static inline size_t
-char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
+char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
if (parser->encoding_changed) {
size_t width;
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -2752,8 +2722,7 @@ static pm_call_node_t *
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location.start = parser->start;
- node->base.location.end = parser->start;
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
node->arguments = arguments;
node->name = name;
@@ -2924,6 +2893,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
}
/**
+ * Validate that index expressions do not have keywords or blocks if we are
+ * parsing as Ruby 3.4+.
+ */
+static void
+pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
+ pm_node_t *node;
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
+ break;
+ }
+ }
+ }
+
+ if (block != NULL) {
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
+ }
+ }
+}
+
+/**
* Allocate and initialize a new IndexAndWriteNode node.
*/
static pm_index_and_write_node_t *
@@ -2931,6 +2923,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
+ pm_index_arguments_check(parser, target->arguments, target->block);
+
*node = (pm_index_and_write_node_t) {
{
.type = PM_INDEX_AND_WRITE_NODE,
@@ -2980,8 +2974,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
.message_loc = target->message_loc,
.read_name = 0,
.write_name = target->name,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -3002,6 +2996,8 @@ static pm_index_operator_write_node_t *
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
+ pm_index_arguments_check(parser, target->arguments, target->block);
+
*node = (pm_index_operator_write_node_t) {
{
.type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3017,8 +3013,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
.arguments = target->arguments,
.closing_loc = target->closing_loc,
.block = target->block,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -3075,6 +3071,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
+ pm_index_arguments_check(parser, target->arguments, target->block);
+
*node = (pm_index_or_write_node_t) {
{
.type = PM_INDEX_OR_WRITE_NODE,
@@ -3139,6 +3137,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
pm_node_flags_t flags = target->base.flags;
+ pm_index_arguments_check(parser, target->arguments, target->block);
+
*node = (pm_index_target_node_t) {
{
.type = PM_INDEX_TARGET_NODE,
@@ -3358,9 +3358,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
},
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -3474,9 +3474,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
}
},
.target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -3510,22 +3510,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
* Allocate and initialize a new ConstantPathNode node.
*/
static pm_constant_path_node_t *
-pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
+pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
pm_assert_value_expression(parser, parent);
-
pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
+ if (name_token->type == PM_TOKEN_CONSTANT) {
+ name = pm_parser_constant_id_token(parser, name_token);
+ }
+
*node = (pm_constant_path_node_t) {
{
.type = PM_CONSTANT_PATH_NODE,
.location = {
.start = parent == NULL ? delimiter->start : parent->location.start,
- .end = child->location.end
+ .end = name_token->end
},
},
.parent = parent,
- .child = child,
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
+ .name = name,
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
};
return node;
@@ -3596,9 +3601,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
},
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -4180,7 +4185,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
}
/**
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
*/
static pm_rational_node_t *
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4190,16 +4195,44 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
*node = (pm_rational_node_t) {
{
.type = PM_RATIONAL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
+ .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
.location = PM_LOCATION_TOKEN_VALUE(token)
},
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
- .type = PM_TOKEN_FLOAT,
- .start = token->start,
- .end = token->end - 1
- }))
+ .numerator = { 0 },
+ .denominator = { 0 }
};
+ const uint8_t *start = token->start;
+ const uint8_t *end = token->end - 1; // r
+
+ while (start < end && *start == '0') start++; // 0.1 -> .1
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
+
+ size_t length = (size_t) (end - start);
+ if (length == 1) {
+ node->denominator.value = 1;
+ return node;
+ }
+
+ const uint8_t *point = memchr(start, '.', length);
+ assert(point && "should have a decimal point");
+
+ uint8_t *digits = malloc(length);
+ if (digits == NULL) {
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
+ abort();
+ }
+
+ memcpy(digits, start, (unsigned long) (point - start));
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
+
+ digits[0] = '1';
+ if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
+ free(digits);
+
+ pm_integers_reduce(&node->numerator, &node->denominator);
return node;
}
@@ -4449,9 +4482,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
},
.name = pm_global_variable_write_name(parser, target),
.name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -4510,7 +4543,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
*node = (pm_global_variable_read_node_t) {
{
.type = PM_GLOBAL_VARIABLE_READ_NODE,
- .location = { .start = parser->start, .end = parser->start }
+ .location = PM_LOCATION_NULL_VALUE(parser)
},
.name = name
};
@@ -4552,11 +4585,11 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
*node = (pm_global_variable_write_node_t) {
{
.type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .location = { .start = parser->start, .end = parser->start }
+ .location = PM_LOCATION_NULL_VALUE(parser)
},
.name = name,
- .name_loc = { .start = parser->start, .end = parser->start },
- .operator_loc = { .start = parser->start, .end = parser->start },
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
.value = value
};
@@ -4833,7 +4866,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons
}
/**
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
* token.
*/
static pm_rational_node_t *
@@ -4844,16 +4877,24 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
*node = (pm_rational_node_t) {
{
.type = PM_RATIONAL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
+ .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
.location = PM_LOCATION_TOKEN_VALUE(token)
},
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
- .type = PM_TOKEN_INTEGER,
- .start = token->start,
- .end = token->end - 1
- }))
+ .numerator = { 0 },
+ .denominator = { .value = 1, 0 }
};
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+ switch (base) {
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+ default: assert(false && "unreachable"); break;
+ }
+
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+
return node;
}
@@ -4957,9 +4998,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
},
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -5034,6 +5075,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
}
/**
+ * Append a part into a list of string parts. Importantly this handles nested
+ * interpolated strings by not necessarily removing the marker for static
+ * literals.
+ */
+static void
+pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+ switch (PM_NODE_TYPE(part)) {
+ case PM_STRING_NODE:
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+ break;
+ case PM_EMBEDDED_STATEMENTS_NODE: {
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+ if (embedded == NULL) {
+ // If there are no statements or more than one statement, then
+ // we lose the static literal flag.
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+ // If the embedded statement is a string, then we can keep the
+ // static literal flag and mark the string as frozen.
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the embedded statement is an interpolated string and it's
+ // a static literal, then we can keep the static literal flag.
+ } else {
+ // Otherwise we lose the static literal flag.
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ break;
+ }
+ case PM_EMBEDDED_VARIABLE_NODE:
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
+ break;
+ default:
+ assert(false && "unexpected node type");
+ break;
+ }
+
+ pm_node_list_append(parts, part);
+}
+
+/**
* Allocate a new InterpolatedRegularExpressionNode node.
*/
static pm_interpolated_regular_expression_node_t *
@@ -5066,54 +5151,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
node->base.location.end = part->location.end;
}
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
- }
-
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
- }
-
- pm_node_list_append(&node->parts, part);
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
}
static inline void
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
+ pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
}
/**
* Append a part to an InterpolatedStringNode node.
+ *
+ * This has some somewhat complicated semantics, because we need to update
+ * multiple flags that have somewhat confusing interactions.
+ *
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
+ * single static literal string that can be pushed onto the stack on its own.
+ * Note that this doesn't necessarily mean that the string will be frozen or
+ * not; the instructions in CRuby will be either putobject or putstring,
+ * depending on the combination of `--enable-frozen-string-literal`,
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
+ * explicitly frozen. This will only happen if the string is comprised entirely
+ * of string parts that are themselves static literals and frozen.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
+ * be explicitly marked as mutable. This will happen from
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
+ * is necessary to indicate that the string should be left up to the runtime,
+ * which could potentially use a chilled string otherwise.
*/
static inline void
-pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
+pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+#define CLEAR_FLAGS(node) \
+ node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+
+#define MUTABLE_FLAGS(node) \
+ node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
node->base.location.start = part->location.start;
}
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
- }
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
+ switch (PM_NODE_TYPE(part)) {
+ case PM_STRING_NODE:
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+ break;
+ case PM_INTERPOLATED_STRING_NODE:
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the string that we're concatenating is a static literal,
+ // then we can keep the static literal flag for this string.
+ } else {
+ // Otherwise, we lose the static literal flag here and we should
+ // also clear the mutability flags.
+ CLEAR_FLAGS(node);
+ }
+ break;
+ case PM_EMBEDDED_STATEMENTS_NODE: {
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+ if (embedded == NULL) {
+ // If we're embedding multiple statements or no statements, then
+ // the string is not longer a static literal.
+ CLEAR_FLAGS(node);
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+ // If the embedded statement is a string, then we can make that
+ // string as frozen and static literal, and not touch the static
+ // literal status of this string.
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+ MUTABLE_FLAGS(node);
+ }
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the embedded statement is an interpolated string, but that
+ // string is marked as static literal, then we can keep our
+ // static literal status for this string.
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+ MUTABLE_FLAGS(node);
+ }
+ } else {
+ // In all other cases, we lose the static literal flag here and
+ // become mutable.
+ CLEAR_FLAGS(node);
+ }
+
+ break;
+ }
+ case PM_EMBEDDED_VARIABLE_NODE:
+ // Embedded variables clear static literal, which means we also
+ // should clear the mutability flags.
+ CLEAR_FLAGS(node);
+ break;
+ default:
+ assert(false && "unexpected node type");
+ break;
}
pm_node_list_append(&node->parts, part);
- node->base.location.end = MAX(node->base.location.end, part->location.end);
- if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
- switch (parser->frozen_string_literal) {
- case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
- break;
- case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
- break;
- }
- }
+#undef CLEAR_FLAGS
+#undef MUTABLE_FLAGS
}
/**
@@ -5122,11 +5266,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
static pm_interpolated_string_node_t *
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
+
+ switch (parser->frozen_string_literal) {
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
+ break;
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
+ break;
+ }
*node = (pm_interpolated_string_node_t) {
{
.type = PM_INTERPOLATED_STRING_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
+ .flags = flags,
.location = {
.start = opening->start,
.end = closing->end,
@@ -5140,7 +5294,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
if (parts != NULL) {
pm_node_t *part;
PM_NODE_LIST_FOREACH(parts, index, part) {
- pm_interpolated_string_node_append(parser, node, part);
+ pm_interpolated_string_node_append(node, part);
}
}
@@ -5162,15 +5316,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
node->base.location.start = part->location.start;
}
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
- }
-
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
- }
-
- pm_node_list_append(&node->parts, part);
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
node->base.location.end = MAX(node->base.location.end, part->location.end);
}
@@ -5236,11 +5382,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
static inline void
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
- }
-
- pm_node_list_append(&node->parts, part);
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
node->base.location.end = part->location.end;
}
@@ -5251,6 +5393,23 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
}
/**
+ * Create a local variable read that is reading the implicit 'it' variable.
+ */
+static pm_it_local_variable_read_node_t *
+pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+ pm_it_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_it_local_variable_read_node_t);
+
+ *node = (pm_it_local_variable_read_node_t) {
+ {
+ .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
+ .location = PM_LOCATION_TOKEN_VALUE(name)
+ }
+ };
+
+ return node;
+}
+
+/**
* Allocate and initialize a new ItParametersNode node.
*/
static pm_it_parameters_node_t *
@@ -5452,10 +5611,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
}
},
.name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
.name = name,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
.depth = depth
};
@@ -5563,28 +5722,6 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
}
/**
- * Returns true if the given node is `it` default parameter.
- */
-static inline bool
-pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
- // Check if it's a local variable reference
- if (node->type != PM_CALL_NODE) {
- return false;
- }
-
- // Check if it's a variable call
- pm_call_node_t *call_node = (pm_call_node_t *) node;
- if (!PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- return false;
- }
-
- // Check if it's called `it`
- pm_constant_id_t id = ((pm_call_node_t *)node)->name;
- pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
- return pm_token_is_it(constant->start, constant->start + constant->length);
-}
-
-/**
* Returns true if the given bounds comprise a numbered parameter (i.e., they
* are of the form /^_\d$/).
*/
@@ -6734,7 +6871,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
case PM_REDO_NODE:
case PM_RETRY_NODE:
case PM_RETURN_NODE:
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
break;
default:
break;
@@ -6841,7 +6978,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
}
/**
- * Read through the contents of a string and check if it consists solely of US ASCII code points.
+ * Read through the contents of a string and check if it consists solely of
+ * US-ASCII code points.
*/
static bool
pm_ascii_only_p(const pm_string_t *contents) {
@@ -6856,26 +6994,71 @@ pm_ascii_only_p(const pm_string_t *contents) {
}
/**
+ * Validate that the contents of the given symbol are all valid UTF-8.
+ */
+static void
+parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
+
+ if (width == 0) {
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ break;
+ }
+
+ cursor += width;
+ }
+}
+
+/**
+ * Validate that the contents of the given symbol are all valid in the encoding
+ * of the parser.
+ */
+static void
+parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+ const pm_encoding_t *encoding = parser->encoding;
+
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+ size_t width = encoding->char_width(cursor, end - cursor);
+
+ if (width == 0) {
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ break;
+ }
+
+ cursor += width;
+ }
+}
+
+/**
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
* points. Otherwise, the encoding may be explicitly set with an escape
* sequence.
+ *
+ * If the validate flag is set, then it will check the contents of the symbol
+ * to ensure that all characters are valid in the encoding.
*/
static inline pm_node_flags_t
-parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
+parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
if (parser->explicit_encoding != NULL) {
// A Symbol may optionally have its encoding explicitly set. This will
// happen if an escape sequence results in a non-ASCII code point.
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
+ } else if (validate) {
+ parse_symbol_encoding_validate_other(parser, location, contents);
}
} else if (pm_ascii_only_p(contents)) {
// Ruby stipulates that all source files must use an ASCII-compatible
// encoding. Thus, all symbols appearing in source are eligible for
// "downgrading" to US-ASCII.
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
+ } else if (validate) {
+ parse_symbol_encoding_validate_other(parser, location, contents);
}
return 0;
@@ -7043,7 +7226,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
*/
static pm_symbol_node_t *
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
parser->current_string = PM_STRING_EMPTY;
return node;
}
@@ -7065,7 +7248,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
assert((label.end - label.start) >= 0);
pm_string_shared_init(&node->unescaped, label.start, label.end);
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
break;
}
@@ -7097,9 +7280,9 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
{
.type = PM_SYMBOL_NODE,
.flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
- .location = { .start = parser->start, .end = parser->start }
+ .location = PM_LOCATION_NULL_VALUE(parser)
},
- .value_loc = { .start = parser->start, .end = parser->start },
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
.unescaped = { 0 }
};
@@ -7150,7 +7333,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
.unescaped = node->unescaped
};
- pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
+ pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
// We are explicitly _not_ using pm_node_destroy here because we don't want
// to trash the unescaped string. We could instead copy the string if we
@@ -7499,10 +7683,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
*node = (pm_while_node_t) {
{
.type = PM_WHILE_NODE,
- .location = { .start = parser->start, .end = parser->start }
+ .location = PM_LOCATION_NULL_VALUE(parser)
},
- .keyword_loc = { .start = parser->start, .end = parser->start },
- .closing_loc = { .start = parser->start, .end = parser->start },
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
.predicate = predicate,
.statements = statements
};
@@ -7658,51 +7842,6 @@ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t leng
}
/**
- * Create a local variable read that is reading the implicit 'it' variable.
- */
-static pm_local_variable_read_node_t *
-pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *name) {
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
- return NULL;
- }
-
- if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED) {
- pm_parser_err_token(parser, name, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
- return NULL;
- }
-
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IT;
-
- pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
- pm_parser_local_add(parser, name_id, name->start, name->end, 0);
-
- return pm_local_variable_read_node_create_constant_id(parser, name, name_id, 0, false);
-}
-
-/**
- * Convert a `it` variable call node to a node for `it` default parameter.
- */
-static pm_node_t *
-pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
- if (
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
- !parser->current_scope->closed &&
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
- pm_node_is_it(parser, node)
- ) {
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
-
- if (read != NULL) {
- pm_node_destroy(parser, node);
- node = (pm_node_t *) read;
- }
- }
-
- return node;
-}
-
-/**
* Add a parameter name to the current scope and check whether the name of the
* parameter is unique or not.
*
@@ -7737,6 +7876,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
pm_locals_free(&scope->locals);
+ pm_node_list_free(&scope->implicit_parameters);
xfree(scope);
}
@@ -7808,7 +7948,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
* is beyond the end of the source then return '\0'.
*/
static inline uint8_t
-peek_at(pm_parser_t *parser, const uint8_t *cursor) {
+peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
if (cursor < parser->end) {
return *cursor;
} else {
@@ -7831,7 +7971,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
* that position is beyond the end of the source then return '\0'.
*/
static inline uint8_t
-peek(pm_parser_t *parser) {
+peek(const pm_parser_t *parser) {
return peek_at(parser, parser->current.end);
}
@@ -7897,6 +8037,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
}
/**
+ * This is equivalent to the predicate of warn_balanced in CRuby.
+ */
+static inline bool
+ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
+}
+
+/**
* Here we're going to check if this is a "magic" comment, and perform whatever
* actions are necessary for it here.
*/
@@ -8113,7 +8261,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// We only want to attempt to compare against encoding comments if it's
// the first line in the file (or the second in the case of a shebang).
- if (parser->current.start == parser->encoding_comment_start) {
+ if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
if (
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
@@ -8135,7 +8283,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// If we have hit a ractor pragma, attempt to lex that.
uint32_t value_length = (uint32_t) (value_end - value_start);
if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
+ const uint8_t *cursor = parser->current.start;
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
+
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
} else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8209,6 +8362,8 @@ context_terminator(pm_context_t context, pm_token_t *token) {
case PM_CONTEXT_MODULE_ENSURE:
case PM_CONTEXT_SCLASS_ENSURE:
return token->type == PM_TOKEN_KEYWORD_END;
+ case PM_CONTEXT_LOOP_PREDICATE:
+ return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
case PM_CONTEXT_FOR_INDEX:
return token->type == PM_TOKEN_KEYWORD_IN;
case PM_CONTEXT_CASE_WHEN:
@@ -8381,6 +8536,7 @@ context_human(pm_context_t context) {
case PM_CONTEXT_IF: return "if statement";
case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
+ case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
case PM_CONTEXT_MAIN: return "top level context";
case PM_CONTEXT_MODULE: return "module definition";
case PM_CONTEXT_PARENS: return "parentheses";
@@ -8410,10 +8566,11 @@ context_human(pm_context_t context) {
/* Specific token lexers */
/******************************************************************************/
-static void
-pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
+static inline void
+pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
if (invalid != NULL) {
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
}
}
@@ -8421,7 +8578,7 @@ static size_t
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -8429,7 +8586,7 @@ static size_t
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -8437,7 +8594,7 @@ static size_t
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -8445,7 +8602,7 @@ static size_t
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -8591,6 +8748,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
type = lex_optional_float_suffix(parser, seen_e);
}
+ // At this point we have a completed number, but we want to provide the user
+ // with a good experience if they put an additional .xxx fractional
+ // component on the end, so we'll check for that here.
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
+ const uint8_t *fraction_start = parser->current.end;
+ const uint8_t *fraction_end = parser->current.end + 2;
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+ }
+
return type;
}
@@ -8683,7 +8850,7 @@ lex_global_variable(pm_parser_t *parser) {
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
// $0 isn't allowed to be followed by anything.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
}
@@ -8719,9 +8886,9 @@ lex_global_variable(pm_parser_t *parser) {
} else {
// If we get here, then we have a $ followed by something that
// isn't recognized as a global variable.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
- size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9092,12 +9259,20 @@ escape_hexadecimal_digit(const uint8_t value) {
* validated.
*/
static inline uint32_t
-escape_unicode(const uint8_t *string, size_t length) {
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
uint32_t value = 0;
for (size_t index = 0; index < length; index++) {
if (index != 0) value <<= 4;
value |= escape_hexadecimal_digit(string[index]);
}
+
+ // Here we're going to verify that the value is actually a valid Unicode
+ // codepoint and not a surrogate pair.
+ if (value >= 0xD800 && value <= 0xDFFF) {
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ return 0xFFFD;
+ }
+
return value;
}
@@ -9106,7 +9281,7 @@ escape_unicode(const uint8_t *string, size_t length) {
*/
static inline uint8_t
escape_byte(uint8_t value, const uint8_t flags) {
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
return value;
}
@@ -9206,22 +9381,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
static inline void
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
-
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
- uint8_t byte2 = (uint8_t) (byte & 0xF);
-
- if (byte1 >= 0xA) {
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
- } else {
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
- }
-
- if (byte2 >= 0xA) {
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
- } else {
- pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
- }
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
}
escape_write_byte_encoded(parser, buffer, byte);
@@ -9256,57 +9416,57 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
switch (peek(parser)) {
case '\\': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
return;
}
case '\'': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
return;
}
case 'a': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
return;
}
case 'b': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
return;
}
case 'e': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
return;
}
case 'f': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
return;
}
case 'n': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
return;
}
case 'r': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
return;
}
case 's': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
return;
}
case 't': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
return;
}
case 'v': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
return;
}
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -9323,7 +9483,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
}
- escape_write_byte_encoded(parser, buffer, value);
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
return;
}
case 'x': {
@@ -9342,8 +9502,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
}
+ value = escape_byte(value, flags);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
+ } else {
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ }
}
escape_write_byte_encoded(parser, buffer, value);
@@ -9357,22 +9522,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
const uint8_t *start = parser->current.end - 1;
parser->current.end++;
- if (
- (parser->current.end + 4 <= parser->end) &&
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[3])
- ) {
- uint32_t value = escape_unicode(parser->current.end, 4);
-
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
- }
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
-
- parser->current.end += 4;
- } else if (peek(parser) == '{') {
+ if (peek(parser) == '{') {
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
parser->current.end++;
@@ -9390,7 +9540,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
} else if (hexadecimal_length == 0) {
// there are not hexadecimal characters
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
return;
}
@@ -9400,7 +9551,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
extra_codepoints_start = unicode_start;
}
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9422,7 +9573,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
}
} else {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
+
+ if (length == 4) {
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
+
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
+ }
+
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
+ parser->current.end += 4;
+ } else {
+ parser->current.end += length;
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
}
return;
@@ -9447,6 +9612,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ return;
+ }
+
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
case ' ':
@@ -9474,7 +9645,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
case 'C': {
parser->current.end++;
if (peek(parser) != '-') {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
@@ -9497,6 +9669,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ return;
+ }
+
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
case ' ':
@@ -9511,7 +9689,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
default: {
if (!char_is_ascii_printable(peeked)) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
@@ -9524,7 +9703,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
case 'M': {
parser->current.end++;
if (peek(parser) != '-') {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
return;
}
@@ -9542,6 +9722,12 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ return;
+ }
+
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
return;
case ' ':
@@ -9556,7 +9742,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
default:
if (!char_is_ascii_printable(peeked)) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
return;
}
@@ -9676,8 +9863,8 @@ lex_at_variable(pm_parser_t *parser) {
}
} else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0) {
- diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0;
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
}
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
@@ -10251,7 +10438,9 @@ parser_lex(pm_parser_t *parser) {
// pass and we're at the start of the file, then we need
// to do another pass to potentially find other patterns
// for encoding comments.
- if (length >= 10) parser_lex_magic_comment_encoding(parser);
+ if (length >= 10 && !parser->encoding_locked) {
+ parser_lex_magic_comment_encoding(parser);
+ }
}
lexed_comment = true;
@@ -10517,6 +10706,8 @@ parser_lex(pm_parser_t *parser) {
type = PM_TOKEN_USTAR_STAR;
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR_STAR;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -10540,6 +10731,8 @@ parser_lex(pm_parser_t *parser) {
type = PM_TOKEN_USTAR;
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -10656,13 +10849,17 @@ parser_lex(pm_parser_t *parser) {
// If we have quotes, then we're going to go until we find the
// end quote.
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
parser->current.end++;
}
}
size_t ident_length = (size_t) (parser->current.end - ident_start);
+ bool ident_error = false;
+
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
- // TODO: handle unterminated heredoc
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+ ident_error = true;
}
parser->explicit_encoding = NULL;
@@ -10687,7 +10884,7 @@ parser_lex(pm_parser_t *parser) {
// this is not a valid heredoc declaration. In this case we
// will add an error, but we will still return a heredoc
// start.
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+ if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
body_start = parser->end;
} else {
// Otherwise, we want to indicate that the body of the
@@ -10710,6 +10907,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_LESS_LESS_EQUAL);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+ }
+
if (lex_state_operator_p(parser)) {
lex_state_set(parser, PM_LEX_STATE_ARG);
} else {
@@ -10823,6 +11024,8 @@ parser_lex(pm_parser_t *parser) {
type = PM_TOKEN_UAMPERSAND;
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_UAMPERSAND;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -10897,6 +11100,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_UPLUS);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG);
LEX(PM_TOKEN_PLUS);
}
@@ -10934,6 +11141,10 @@ parser_lex(pm_parser_t *parser) {
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG);
LEX(PM_TOKEN_MINUS);
}
@@ -11032,6 +11243,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_REGEXP_BEGIN);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+ }
+
if (lex_state_operator_p(parser)) {
lex_state_set(parser, PM_LEX_STATE_ARG);
} else {
@@ -11067,7 +11282,7 @@ parser_lex(pm_parser_t *parser) {
// operator because we don't want to move into the string
// lex mode unnecessarily.
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
LEX(PM_TOKEN_PERCENT);
}
@@ -11086,10 +11301,7 @@ parser_lex(pm_parser_t *parser) {
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
-
- if (parser->current.end < parser->end) {
- LEX(PM_TOKEN_STRING_BEGIN);
- }
+ LEX(PM_TOKEN_STRING_BEGIN);
}
// Delimiters for %-literals cannot be alphanumeric. We
@@ -11216,6 +11428,10 @@ parser_lex(pm_parser_t *parser) {
}
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+ }
+
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
LEX(PM_TOKEN_PERCENT);
}
@@ -12014,7 +12230,7 @@ parser_lex(pm_parser_t *parser) {
// terminator) but still continue parsing so that content after the
// declaration of the heredoc can be parsed.
if (parser->current.end >= parser->end) {
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+ pm_parser_err_heredoc_term(parser, lex_mode);
parser->next_start = lex_mode->as.heredoc.next_start;
parser->heredoc_end = parser->current.end;
lex_state_set(parser, PM_LEX_STATE_END);
@@ -12026,9 +12242,10 @@ parser_lex(pm_parser_t *parser) {
// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
- if (!line_continuation && current_token_starts_line(parser)) {
+ if (current_token_starts_line(parser)) {
const uint8_t *start = parser->current.start;
- if (start + ident_length <= parser->end) {
+
+ if (!line_continuation && (start + ident_length <= parser->end)) {
const uint8_t *newline = next_newline(start, parser->end - start);
const uint8_t *ident_end = newline;
const uint8_t *terminator_end = newline;
@@ -12184,11 +12401,8 @@ parser_lex(pm_parser_t *parser) {
}
parser->current.end = breakpoint + 1;
-
- if (!was_line_continuation) {
- pm_token_buffer_flush(parser, &token_buffer);
- LEX(PM_TOKEN_STRING_CONTENT);
- }
+ pm_token_buffer_flush(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
}
// Otherwise we hit a newline and it wasn't followed by
@@ -12653,6 +12867,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
parser->previous.type = PM_TOKEN_MISSING;
}
+/**
+ * A special expect1 that expects a heredoc terminator and handles popping the
+ * lex mode accordingly.
+ */
+static void
+expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+ lex_mode_pop(parser);
+ parser_lex(parser);
+ } else {
+ pm_parser_err_heredoc_term(parser, lex_mode);
+ lex_mode_pop(parser);
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+}
+
static pm_node_t *
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
@@ -12781,24 +13012,100 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
}
/**
+ * Certain expressions are not targetable, but in order to provide a better
+ * experience we give a specific error message. In order to maintain as much
+ * information in the tree as possible, we replace them with local variable
+ * writes.
+ */
+static pm_node_t *
+parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
+ switch (PM_NODE_TYPE(target)) {
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
+ default: break;
+ }
+
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
+
+ pm_node_destroy(parser, target);
+ return (pm_node_t *) result;
+}
+
+/**
+ * When an implicit local variable is written to or targeted, it becomes a
+ * regular, named local variable. This function removes it from the list of
+ * implicit parameters when that happens.
+ */
+static void
+parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ if (implicit_parameters->nodes[index] == node) {
+ // If the node is not the last one in the list, we need to shift the
+ // remaining nodes down to fill the gap. This is extremely unlikely
+ // to happen.
+ if (index != implicit_parameters->size - 1) {
+ memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+ }
+
+ implicit_parameters->size--;
+ break;
+ }
+ }
+}
+
+/**
* Convert the given node into a valid target node.
+ *
+ * @param multiple Whether or not this target is part of a larger set of
+ * targets. If it is, then the &. operator is not allowed.
+ * @param splat Whether or not this target is a child of a splat target. If it
+ * is, then fewer patterns are allowed.
*/
static pm_node_t *
-parse_target(pm_parser_t *parser, pm_node_t *target) {
+parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
switch (PM_NODE_TYPE(target)) {
case PM_MISSING_NODE:
return target;
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_FALSE_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_NIL_NODE:
+ case PM_SELF_NODE:
+ case PM_TRUE_NODE: {
+ // In these special cases, we have specific error messages and we
+ // will replace them with local variable writes.
+ return parse_unwriteable_target(parser, target);
+ }
case PM_CLASS_VARIABLE_READ_NODE:
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
return target;
case PM_CONSTANT_PATH_NODE:
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
target->type = PM_CONSTANT_PATH_TARGET_NODE;
+
return target;
case PM_CONSTANT_READ_NODE:
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
target->type = PM_CONSTANT_TARGET_NODE;
+
return target;
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
@@ -12809,7 +13116,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
return target;
case PM_LOCAL_VARIABLE_READ_NODE: {
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
+ parse_target_implicit_parameter(parser, target);
+ }
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
uint32_t name = cast->name;
@@ -12821,17 +13131,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
return target;
}
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
+
+ parse_target_implicit_parameter(parser, target);
+ pm_node_destroy(parser, target);
+
+ return node;
+ }
case PM_INSTANCE_VARIABLE_READ_NODE:
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
return target;
case PM_MULTI_TARGET_NODE:
+ if (splat_parent) {
+ // Multi target is not accepted in all positions. If this is one
+ // of them, then we need to add an error.
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+
return target;
case PM_SPLAT_NODE: {
pm_splat_node_t *splat = (pm_splat_node_t *) target;
if (splat->expression != NULL) {
- splat->expression = parse_target(parser, splat->expression);
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
}
return (pm_node_t *) splat;
@@ -12869,6 +13194,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
}
if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
+ }
+
parse_write_name(parser, &call->name);
return (pm_node_t *) pm_call_target_node_create(parser, call);
}
@@ -12896,10 +13225,11 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
* assignment.
*/
static pm_node_t *
-parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
- pm_node_t *result = parse_target(parser, target);
+parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
+ pm_node_t *result = parse_target(parser, target, multiple, false);
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
+ // parens after the targets.
if (
!match1(parser, PM_TOKEN_EQUAL) &&
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -12942,13 +13272,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
}
case PM_CONSTANT_PATH_NODE: {
pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
+
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
return parse_shareable_constant_write(parser, node);
}
case PM_CONSTANT_READ_NODE: {
pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
+
if (context_def_p(parser)) {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
}
+
pm_node_destroy(parser, target);
return parse_shareable_constant_write(parser, node);
}
@@ -12962,18 +13299,34 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
return (pm_node_t *) node;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
pm_constant_id_t name = local_read->name;
+ pm_location_t name_loc = target->location;
+
uint32_t depth = local_read->depth;
- pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
- pm_location_t name_loc = target->location;
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
+ parse_target_implicit_parameter(parser, target);
+ }
+
+ pm_locals_unread(&scope->locals, name);
pm_node_destroy(parser, target);
return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
}
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
+
+ parse_target_implicit_parameter(parser, target);
+ pm_node_destroy(parser, target);
+
+ return node;
+ }
case PM_INSTANCE_VARIABLE_READ_NODE: {
pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
pm_node_destroy(parser, target);
@@ -13127,7 +13480,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
while (accept1(parser, PM_TOKEN_COMMA)) {
if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13143,7 +13496,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
if (token_begins_expression_p(parser->current.type)) {
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
- name = parse_target(parser, name);
+ name = parse_target(parser, name, true, true);
}
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13151,7 +13504,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
has_rest = true;
} else if (token_begins_expression_p(parser->current.type)) {
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
- target = parse_target(parser, target);
+ target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
} else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13188,8 +13541,8 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
*/
static pm_statements_node_t *
parse_statements(pm_parser_t *parser, pm_context_t context) {
- // First, skip past any optional terminators that might be at the beginning of
- // the statements.
+ // First, skip past any optional terminators that might be at the beginning
+ // of the statements.
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
// If we have a terminator, then we can just return NULL.
@@ -13205,20 +13558,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
pm_statements_node_body_append(parser, statements, node);
- // If we're recovering from a syntax error, then we need to stop parsing the
- // statements now.
+ // If we're recovering from a syntax error, then we need to stop parsing
+ // the statements now.
if (parser->recovering) {
- // If this is the level of context where the recovery has happened, then
- // we can mark the parser as done recovering.
+ // If this is the level of context where the recovery has happened,
+ // then we can mark the parser as done recovering.
if (context_terminator(context, &parser->current)) parser->recovering = false;
break;
}
- // If we have a terminator, then we will parse all consecutive terminators
- // and then continue parsing the statements list.
+ // If we have a terminator, then we will parse all consecutive
+ // terminators and then continue parsing the statements list.
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- // If we have a terminator, then we will continue parsing the statements
- // list.
+ // If we have a terminator, then we will continue parsing the
+ // statements list.
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
@@ -13226,27 +13579,28 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
continue;
}
- // At this point we have a list of statements that are not terminated by a
- // newline or semicolon. At this point we need to check if we're at the end
- // of the statements list. If we are, then we should break out of the loop.
+ // At this point we have a list of statements that are not terminated by
+ // a newline or semicolon. At this point we need to check if we're at
+ // the end of the statements list. If we are, then we should break out
+ // of the loop.
if (context_terminator(context, &parser->current)) break;
// At this point, we have a syntax error, because the statement was not
// terminated by a newline or semicolon, and we're not at the end of the
- // statements list. Ideally we should scan forward to determine if we should
- // insert a missing terminator or break out of parsing the statements list
- // at this point.
+ // statements list. Ideally we should scan forward to determine if we
+ // should insert a missing terminator or break out of parsing the
+ // statements list at this point.
//
- // We don't have that yet, so instead we'll do a more naive approach. If we
- // were unable to parse an expression, then we will skip past this token and
- // continue parsing the statements list. Otherwise we'll add an error and
- // continue parsing the statements list.
+ // We don't have that yet, so instead we'll do a more naive approach. If
+ // we were unable to parse an expression, then we will skip past this
+ // token and continue parsing the statements list. Otherwise we'll add
+ // an error and continue parsing the statements list.
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
parser_lex(parser);
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
// This is an inlined version of accept1 because the error that we
// want to add has varargs. If this happens again, we should
// probably extract a helper function.
@@ -13268,7 +13622,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
*/
static void
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
if (duplicated != NULL) {
pm_buffer_t buffer = { 0 };
@@ -13294,13 +13648,16 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
*/
static void
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
+ pm_node_t *previous;
+
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
pm_diagnostic_list_append_format(
&parser->warning_list,
node->location.start,
node->location.end,
PM_WARN_DUPLICATED_WHEN_CLAUSE,
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
);
}
}
@@ -13486,9 +13843,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
parse_arguments_append(parser, arguments, argument);
- if (contains_keyword_splat) {
- pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
- }
+
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
@@ -13566,7 +13924,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
}
+ bool contains_keywords = false;
bool contains_keyword_splat = false;
+
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -13580,6 +13940,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
+ contains_keywords = true;
// Create the set of static literals for this hash.
pm_static_literals_t hash_keys = { 0 };
@@ -13608,9 +13969,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
parse_arguments_append(parser, arguments, argument);
- if (contains_keyword_splat) {
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
- }
+
+ pm_node_flags_t flags = 0;
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+
break;
}
}
@@ -13723,7 +14087,6 @@ typedef enum {
PM_PARAMETERS_ORDER_OPTIONAL,
PM_PARAMETERS_ORDER_NAMED,
PM_PARAMETERS_ORDER_NONE,
-
} pm_parameters_order_t;
/**
@@ -13748,31 +14111,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
* Check if current parameter follows valid parameters ordering. If not it adds
* an error to the list without stopping the parsing, otherwise sets the
* parameters state to the one corresponding to the current parameter.
+ *
+ * It returns true if it was successful, and false otherwise.
*/
-static void
+static bool
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
pm_parameters_order_t state = parameters_ordering[token->type];
- if (state == PM_PARAMETERS_NO_CHANGE) return;
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
// If we see another ordered argument after a optional argument
// we only continue parsing ordered arguments until we stop seeing ordered arguments.
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
- return;
+ return true;
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
- return;
+ return true;
}
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
- }
-
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
+ return false;
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
+ return false;
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
// We know what transition we failed on, so we can provide a better error here.
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
- } else if (state < *current) {
- *current = state;
+ return false;
}
+
+ if (state < *current) *current = state;
+ return true;
}
/**
@@ -13841,27 +14210,22 @@ parse_parameters(
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
}
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
- update_parameter_state(parser, &parser->current, &order);
- parser_lex(parser);
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
+ parser_lex(parser);
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
- if (params->keyword_rest != NULL) {
- // If we already have a keyword rest parameter, then we replace it with the
- // forwarding parameter and move the keyword rest parameter to the posts list.
- pm_node_t *keyword_rest = params->keyword_rest;
- pm_parameters_node_posts_append(params, keyword_rest);
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
- params->keyword_rest = NULL;
- }
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
- } else {
- update_parameter_state(parser, &parser->current, &order);
- parser_lex(parser);
+ if (params->keyword_rest != NULL) {
+ // If we already have a keyword rest parameter, then we replace it with the
+ // forwarding parameter and move the keyword rest parameter to the posts list.
+ pm_node_t *keyword_rest = params->keyword_rest;
+ pm_parameters_node_posts_append(params, keyword_rest);
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
+ params->keyword_rest = NULL;
}
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
break;
}
case PM_TOKEN_CLASS_VARIABLE:
@@ -13905,7 +14269,7 @@ parse_parameters(
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
@@ -13918,7 +14282,7 @@ parse_parameters(
// If the value of the parameter increased the number of
// reads of that parameter, then we need to warn that we
// have a circular definition.
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
+ if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
}
@@ -13956,6 +14320,12 @@ parse_parameters(
pm_token_t local = name;
local.end -= 1;
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+ }
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
pm_parser_local_add_token(parser, &local, 1);
@@ -13991,10 +14361,10 @@ parse_parameters(
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
- uint32_t reads = pm_locals_reads(&parser->current_scope->locals, name_id);
+ uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
- if (pm_locals_reads(&parser->current_scope->locals, name_id) != reads) {
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
}
@@ -14031,6 +14401,7 @@ parse_parameters(
pm_token_t operator = parser->previous;
pm_token_t name;
bool repeated = false;
+
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
name = parser->previous;
repeated = pm_parser_parameter_name_check(parser, &name);
@@ -14044,6 +14415,7 @@ parse_parameters(
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
+
if (params->rest == NULL) {
pm_parameters_node_rest_set(params, param);
} else {
@@ -14055,6 +14427,7 @@ parse_parameters(
}
case PM_TOKEN_STAR_STAR:
case PM_TOKEN_USTAR_STAR: {
+ pm_parameters_order_t previous_order = order;
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
@@ -14062,6 +14435,10 @@ parse_parameters(
pm_node_t *param;
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
+ }
+
param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
} else {
pm_token_t name;
@@ -14159,7 +14536,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
pm_rescue_node_operator_set(rescue, &parser->previous);
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
- reference = parse_target(parser, reference);
+ reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
break;
@@ -14189,7 +14566,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
pm_rescue_node_operator_set(rescue, &parser->previous);
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
- reference = parse_target(parser, reference);
+ reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
break;
@@ -14395,37 +14772,107 @@ parse_block_parameters(
}
/**
+ * Return true if any of the visible scopes to the current context are using
+ * numbered parameters.
+ */
+static bool
+outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
+ }
+
+ return false;
+}
+
+/**
+ * These are the names of the various numbered parameters. We have them here so
+ * that when we insert them into the constant pool we can use a constant string
+ * and not have to allocate.
+ */
+static const char * const pm_numbered_parameter_names[] = {
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
+};
+
+/**
* Return the node that should be used in the parameters field of a block-like
* (block or lambda) node, depending on the kind of parameters that were
* declared in the current scope.
*/
static pm_node_t *
parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
- uint8_t masked = parser->current_scope->parameters & PM_SCOPE_PARAMETERS_TYPE_MASK;
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ // If we have ordinary parameters, then we will return them as the set of
+ // parameters.
+ if (parameters != NULL) {
+ // If we also have implicit parameters, then this is an error.
+ if (implicit_parameters->size > 0) {
+ pm_node_t *node = implicit_parameters->nodes[0];
+
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
+ } else {
+ assert(false && "unreachable");
+ }
+ }
- if (masked == PM_SCOPE_PARAMETERS_NONE) {
- assert(parameters == NULL);
- return NULL;
- } else if (masked == PM_SCOPE_PARAMETERS_ORDINARY) {
- assert(parameters != NULL);
return parameters;
- } else if (masked == PM_SCOPE_PARAMETERS_NUMBERED) {
- assert(parameters == NULL);
+ }
+
+ // If we don't have any implicit parameters, then the set of parameters is
+ // NULL.
+ if (implicit_parameters->size == 0) {
+ return NULL;
+ }
- int8_t maximum = parser->current_scope->numbered_parameters;
- if (maximum > 0) {
- const pm_location_t location = { .start = opening->start, .end = closing->end };
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, (uint8_t) maximum);
+ // If we don't have ordinary parameters, then we now must validate our set
+ // of implicit parameters. We can only have numbered parameters or it, but
+ // they cannot be mixed.
+ uint8_t numbered_parameter = 0;
+ bool it_parameter = false;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ pm_node_t *node = implicit_parameters->nodes[index];
+
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+ if (it_parameter) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+ } else {
+ assert(false && "unreachable");
+ }
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ if (numbered_parameter > 0) {
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
+ } else {
+ it_parameter = true;
+ }
}
+ }
- return NULL;
- } else if (masked == PM_SCOPE_PARAMETERS_IT) {
- assert(parameters == NULL);
+ if (numbered_parameter > 0) {
+ // Go through the parent scopes and mark them as being disallowed from
+ // using numbered parameters because this inner scope is using them.
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
+ }
+
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
+ return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
+ }
+
+ if (it_parameter) {
return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
- } else {
- assert(false && "unreachable");
- return NULL;
}
+
+ return NULL;
}
/**
@@ -14442,9 +14889,6 @@ parse_block(pm_parser_t *parser) {
pm_block_parameters_node_t *block_parameters = NULL;
if (accept1(parser, PM_TOKEN_PIPE)) {
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
-
pm_token_t block_parameters_opening = parser->previous;
if (match1(parser, PM_TOKEN_PIPE)) {
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
@@ -14513,7 +14957,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
} else {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14531,7 +14975,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
// If we get here, then the subsequent token cannot be used as an infix
// operator. In this case we assume the subsequent token is part of an
// argument to this method call.
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
// If we have done with the arguments and still not consumed the comma,
// then we have a trailing comma where we need to check whether it is
@@ -14562,11 +15006,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
if (arguments->block == NULL && !arguments->has_forwarding) {
arguments->block = (pm_node_t *) block;
} else {
- if (arguments->has_forwarding) {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
- } else {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
- }
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
+
if (arguments->block != NULL) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
@@ -14604,6 +15045,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node, const char *type) {
case PM_CONTEXT_LAMBDA_ELSE:
case PM_CONTEXT_LAMBDA_ENSURE:
case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_LOOP_PREDICATE:
case PM_CONTEXT_POSTEXE:
case PM_CONTEXT_UNTIL:
case PM_CONTEXT_WHILE:
@@ -14945,7 +15387,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
- case PM_NUMBERED_REFERENCE_READ_NODE
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
// Assert here that the flags are the same so that we can safely switch the type
// of the node without having to move the flags.
@@ -15003,6 +15445,10 @@ parse_string_part(pm_parser_t *parser) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^^^
case PM_TOKEN_EMBEXPR_BEGIN: {
+ // Ruby disallows seeing encoding around interpolation in strings,
+ // even though it is known at parse time.
+ parser->explicit_encoding = NULL;
+
pm_lex_state_t state = parser->lex_state;
int brace_nesting = parser->brace_nesting;
@@ -15025,6 +15471,13 @@ parse_string_part(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
pm_token_t closing = parser->previous;
+ // If this set of embedded statements only contains a single
+ // statement, then Ruby does not consider it as a possible statement
+ // that could emit a line event.
+ if (statements != NULL && statements->body.size == 1) {
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
+ }
+
return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
}
@@ -15035,6 +15488,10 @@ parse_string_part(pm_parser_t *parser) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^^
case PM_TOKEN_EMBVAR: {
+ // Ruby disallows seeing encoding around interpolation in strings,
+ // even though it is known at parse time.
+ parser->explicit_encoding = NULL;
+
lex_state_set(parser, PM_LEX_STATE_BEG);
parser_lex(parser);
@@ -15158,7 +15615,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
return (pm_node_t *) symbol;
}
@@ -15258,7 +15715,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
}
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
}
/**
@@ -15283,7 +15740,7 @@ parse_undef_argument(pm_parser_t *parser) {
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
return (pm_node_t *) symbol;
}
@@ -15324,7 +15781,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
return (pm_node_t *) symbol;
}
@@ -15350,74 +15807,43 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
}
/**
- * Return true if any of the visible scopes to the current context are using
- * numbered parameters.
- */
-static bool
-outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
- if (scope->numbered_parameters > 0) return true;
- }
-
- return false;
-}
-
-/**
- * These are the names of the various numbered parameters. We have them here so
- * that when we insert them into the constant pool we can use a constant string
- * and not have to allocate.
- */
-static const char * const pm_numbered_parameter_names[] = {
- "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
-};
-
-/**
* Parse an identifier into either a local variable read. If the local variable
* is not found, it returns NULL instead.
*/
-static pm_local_variable_read_node_t *
+static pm_node_t *
parse_variable(pm_parser_t *parser) {
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
+
+ if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
+ return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
}
pm_scope_t *current_scope = parser->current_scope;
- if (!current_scope->closed && current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
- // Now that we know we have a numbered parameter, we need to check
- // if it's allowed in this context. If it is, then we will create a
- // local variable read. If it's not, then we'll create a normal call
- // node but add an error.
- if (current_scope->parameters & PM_SCOPE_PARAMETERS_ORDINARY) {
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
- } else if (current_scope->parameters & PM_SCOPE_PARAMETERS_IT) {
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_IT);
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
- } else {
- // Indicate that this scope is using numbered params so that child
- // scopes cannot. We subtract the value for the character '0' to get
- // the actual integer value of the number (only _1 through _9 are
- // valid).
- int8_t numbered_parameters = (int8_t) (parser->previous.start[1] - '0');
- current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED;
-
- if (numbered_parameters > current_scope->numbered_parameters) {
- current_scope->numbered_parameters = numbered_parameters;
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
+ if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
+ // When you use a numbered parameter, it implies the existence of
+ // all of the locals that exist before it. For example, referencing
+ // _2 means that _1 must exist. Therefore here we loop through all
+ // of the possibilities and add them into the constant pool.
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
+ for (uint8_t number = 1; number <= maximum; number++) {
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
}
- // When you use a numbered parameter, it implies the existence
- // of all of the locals that exist before it. For example,
- // referencing _2 means that _1 must exist. Therefore here we
- // loop through all of the possibilities and add them into the
- // constant pool.
- for (int8_t numbered_param = 1; numbered_param <= numbered_parameters - 1; numbered_param++) {
- pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_param - 1], 2);
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
}
- // Finally we can create the local variable read node.
- pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
- return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
+ pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
+ pm_node_list_append(&current_scope->implicit_parameters, node);
+
+ return node;
+ } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+ pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
+ pm_node_list_append(&current_scope->implicit_parameters, node);
+
+ return node;
}
}
@@ -15432,8 +15858,8 @@ parse_variable_call(pm_parser_t *parser) {
pm_node_flags_t flags = 0;
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
- pm_local_variable_read_node_t *node = parse_variable(parser);
- if (node != NULL) return (pm_node_t *) node;
+ pm_node_t *node = parse_variable(parser);
+ if (node != NULL) return node;
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
}
@@ -15551,8 +15977,236 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
nodes->size = write_index;
}
+/**
+ * Return a string content token at a particular location that is empty.
+ */
+static pm_token_t
+parse_strings_empty_content(const uint8_t *location) {
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
+}
+
+/**
+ * Parse a set of strings that could be concatenated together.
+ */
+static inline pm_node_t *
+parse_strings(pm_parser_t *parser, pm_node_t *current) {
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
+
+ bool concating = false;
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
+
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+ pm_node_t *node = NULL;
+
+ // Here we have found a string literal. We'll parse it and add it to
+ // the list of strings.
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ assert(lex_mode->mode == PM_LEX_STRING);
+ bool lex_interpolation = lex_mode->as.string.interpolation;
+
+ pm_token_t opening = parser->current;
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+ // If we get here, then we have an end immediately after a
+ // start. In that case we'll create an empty content token and
+ // return an uninterpolated string.
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
+ node = (pm_node_t *) string;
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ // If we get here, then we have an end of a label immediately
+ // after a start. In that case we'll create an empty symbol
+ // node.
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
+ node = (pm_node_t *) symbol;
+ } else if (!lex_interpolation) {
+ // If we don't accept interpolation then we expect the string to
+ // start with a single string content node.
+ pm_string_t unescaped;
+ pm_token_t content;
+
+ if (match1(parser, PM_TOKEN_EOF)) {
+ unescaped = PM_STRING_EMPTY;
+ content = not_provided(parser);
+ } else {
+ unescaped = parser->current_string;
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
+ content = parser->previous;
+ }
+
+ // It is unfortunately possible to have multiple string content
+ // nodes in a row in the case that there's heredoc content in
+ // the middle of the string, like this cursed example:
+ //
+ // <<-END+'b
+ // a
+ // END
+ // c'+'d'
+ //
+ // In that case we need to switch to an interpolated string to
+ // be able to contain all of the parts.
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_list_t parts = { 0 };
+
+ pm_token_t delimiters = not_provided(parser);
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
+ pm_node_list_append(&parts, part);
+
+ do {
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
+ pm_node_list_append(&parts, part);
+ parser_lex(parser);
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
+
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+
+ pm_node_list_free(&parts);
+ } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ }
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // In this case we've hit string content so we know the string
+ // at least has something in it. We'll need to check if the
+ // following token is the end (in which case we can return a
+ // plain string) or if it's not then it has interpolation.
+ pm_token_t content = parser->current;
+ pm_string_t unescaped = parser->current_string;
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
+
+ // Kind of odd behavior, but basically if we have an
+ // unterminated string and it ends in a newline, we back up one
+ // character so that the error message is on the last line of
+ // content in the string.
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
+ const uint8_t *location = parser->previous.end;
+ if (location > parser->start && location[-1] == '\n') location--;
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ } else {
+ // If we get here, then we have interpolation so we'll need
+ // to create a string or symbol node with interpolation.
+ pm_node_list_t parts = { 0 };
+ pm_token_t string_opening = not_provided(parser);
+ pm_token_t string_closing = not_provided(parser);
+
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
+ pm_node_list_append(&parts, part);
+
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+ if ((part = parse_string_part(parser)) != NULL) {
+ pm_node_list_append(&parts, part);
+ }
+ }
+
+ if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ }
+
+ pm_node_list_free(&parts);
+ }
+ } else {
+ // If we get here, then the first part of the string is not plain
+ // string content, in which case we need to parse the string as an
+ // interpolated string.
+ pm_node_list_t parts = { 0 };
+ pm_node_t *part;
+
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+ if ((part = parse_string_part(parser)) != NULL) {
+ pm_node_list_append(&parts, part);
+ }
+ }
+
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ }
+
+ pm_node_list_free(&parts);
+ }
+
+ if (current == NULL) {
+ // If the node we just parsed is a symbol node, then we can't
+ // concatenate it with anything else, so we can now return that
+ // node.
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
+ return node;
+ }
+
+ // If we don't already have a node, then it's fine and we can just
+ // set the result to be the node we just parsed.
+ current = node;
+ } else {
+ // Otherwise we need to check the type of the node we just parsed.
+ // If it cannot be concatenated with the previous node, then we'll
+ // need to add a syntax error.
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
+ }
+
+ // If we haven't already created our container for concatenation,
+ // we'll do that now.
+ if (!concating) {
+ concating = true;
+ pm_token_t bounds = not_provided(parser);
+
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
+ pm_interpolated_string_node_append(container, current);
+ current = (pm_node_t *) container;
+ }
+
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+ }
+ }
+
+ return current;
+}
+
+#define PM_PARSE_PATTERN_SINGLE 0
+#define PM_PARSE_PATTERN_TOP 1
+#define PM_PARSE_PATTERN_MULTI 2
+
static pm_node_t *
-parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
/**
* Add the newly created local to the list of captures for this pattern matching
@@ -15581,9 +16235,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
}
// If there is a [ or ( that follows, then this is part of a larger pattern
@@ -15602,7 +16254,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
}
@@ -15614,7 +16266,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
}
@@ -15763,20 +16415,51 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
}
/**
+ * Check that the slice of the source given by the bounds parameters constitutes
+ * a valid local variable name.
+ */
+static bool
+pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ ptrdiff_t length = end - start;
+ if (length == 0) return false;
+
+ // First ensure that it starts with a valid identifier starting character.
+ size_t width = char_is_identifier_start(parser, start);
+ if (width == 0) return false;
+
+ // Next, ensure that it's not an uppercase character.
+ if (parser->encoding_changed) {
+ if (parser->encoding->isupper_char(start, length)) return false;
+ } else {
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
+ }
+
+ // Next, iterate through all of the bytes of the string to ensure that they
+ // are all valid identifier characters.
+ const uint8_t *cursor = start + width;
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
+ return cursor == end;
+}
+
+/**
* Create an implicit node for the value of a hash pattern that has omitted the
* value. This will use an implicit local variable target.
*/
static pm_node_t *
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
int depth = -1;
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
- } else {
+
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
depth = pm_parser_local_depth_constant_id(parser, constant_id);
+ } else {
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+ }
}
if (depth == -1) {
@@ -15800,7 +16483,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
*/
static void
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
}
}
@@ -15831,7 +16514,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
} else {
// Here we have a value for the first assoc in the list, so
// we will parse it now.
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
}
pm_token_t operator = not_provided(parser);
@@ -15846,7 +16529,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
// If we get anything else, then this is an error. For this we'll
// create a missing node for the value and create an assoc node for
// the first node in the list.
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
+ pm_parser_err_node(parser, first_node, diag_id);
pm_token_t operator = not_provided(parser);
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -15874,8 +16558,20 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
pm_node_list_append(&assocs, assoc);
}
} else {
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+ pm_node_t *key;
+
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+ key = parse_strings(parser, NULL);
+
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+ } else if (!pm_symbol_node_label_p(key)) {
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+ }
+ } else {
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+ key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+ }
parse_pattern_hash_key(parser, &keys, key);
pm_node_t *value = NULL;
@@ -15883,7 +16579,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
} else {
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
}
pm_token_t operator = not_provided(parser);
@@ -15940,7 +16636,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
// Otherwise, we'll parse the inner pattern, then deal with it depending
// on the type it returns.
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -16007,11 +16703,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
first_node = parse_pattern_keyword_rest(parser, captures);
break;
case PM_TOKEN_STRING_BEGIN:
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
break;
default: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
parser_lex(parser);
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
break;
@@ -16087,19 +16783,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
if (variable == NULL) {
- if (
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
- !parser->current_scope->closed &&
- (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
- pm_token_is_it(parser->previous.start, parser->previous.end)
- ) {
- pm_local_variable_read_node_t *read = pm_local_variable_read_node_create_it(parser, &parser->previous);
- if (read == NULL) read = pm_local_variable_read_node_create(parser, &parser->previous, 0);
- variable = (pm_node_t *) read;
- } else {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
- }
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+ variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
}
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
@@ -16162,8 +16847,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
parser_lex(parser);
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
}
@@ -16214,7 +16898,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
pm_token_t opening = parser->current;
parser_lex(parser);
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16273,7 +16957,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
* Parse a pattern matching expression.
*/
static pm_node_t *
-parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
pm_node_t *node = NULL;
bool leading_rest = false;
@@ -16283,14 +16967,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
case PM_TOKEN_LABEL: {
parser_lex(parser);
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
+
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+ }
+
+ return node;
}
case PM_TOKEN_USTAR_STAR: {
node = parse_pattern_keyword_rest(parser, captures);
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
+
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+ }
+
+ return node;
}
case PM_TOKEN_USTAR: {
- if (top_pattern) {
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
parser_lex(parser);
node = (pm_node_t *) parse_pattern_rest(parser, captures);
leading_rest = true;
@@ -16309,7 +17005,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
return (pm_node_t *) parse_pattern_hash(parser, captures, node);
}
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
// If we have a comma, then we are now parsing either an array pattern or a
// find pattern. We need to parse all of the patterns, put them into a big
// list, and then determine which type of node we have.
@@ -16383,10 +17079,12 @@ parse_negative_numeric(pm_node_t *node) {
cast->value = -cast->value;
break;
}
- case PM_RATIONAL_NODE:
- node->location.start--;
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
+ case PM_RATIONAL_NODE: {
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
+ cast->base.location.start--;
+ cast->numerator.negative = true;
break;
+ }
case PM_IMAGINARY_NODE:
node->location.start--;
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
@@ -16398,217 +17096,6 @@ parse_negative_numeric(pm_node_t *node) {
}
/**
- * Return a string content token at a particular location that is empty.
- */
-static pm_token_t
-parse_strings_empty_content(const uint8_t *location) {
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
-}
-
-/**
- * Parse a set of strings that could be concatenated together.
- */
-static inline pm_node_t *
-parse_strings(pm_parser_t *parser, pm_node_t *current) {
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
-
- bool concating = false;
- bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
-
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
- pm_node_t *node = NULL;
-
- // Here we have found a string literal. We'll parse it and add it to
- // the list of strings.
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
- assert(lex_mode->mode == PM_LEX_STRING);
- bool lex_interpolation = lex_mode->as.string.interpolation;
-
- pm_token_t opening = parser->current;
- parser_lex(parser);
-
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
- // If we get here, then we have an end immediately after a
- // start. In that case we'll create an empty content token and
- // return an uninterpolated string.
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&string->unescaped, content.start, content.end);
- node = (pm_node_t *) string;
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- // If we get here, then we have an end of a label immediately
- // after a start. In that case we'll create an empty symbol
- // node.
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
- node = (pm_node_t *) symbol;
- } else if (!lex_interpolation) {
- // If we don't accept interpolation then we expect the string to
- // start with a single string content node.
- pm_string_t unescaped;
- pm_token_t content;
- if (match1(parser, PM_TOKEN_EOF)) {
- unescaped = PM_STRING_EMPTY;
- content = not_provided(parser);
- } else {
- unescaped = parser->current_string;
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
- content = parser->previous;
- }
-
- // It is unfortunately possible to have multiple string content
- // nodes in a row in the case that there's heredoc content in
- // the middle of the string, like this cursed example:
- //
- // <<-END+'b
- // a
- // END
- // c'+'d'
- //
- // In that case we need to switch to an interpolated string to
- // be able to contain all of the parts.
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_node_list_t parts = { 0 };
-
- pm_token_t delimiters = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
- pm_node_list_append(&parts, part);
-
- do {
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
- pm_node_list_append(&parts, part);
- parser_lex(parser);
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
-
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-
- pm_node_list_free(&parts);
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
- } else if (accept1(parser, PM_TOKEN_STRING_END)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- } else {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
- parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- }
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- // In this case we've hit string content so we know the string
- // at least has something in it. We'll need to check if the
- // following token is the end (in which case we can return a
- // plain string) or if it's not then it has interpolation.
- pm_token_t content = parser->current;
- pm_string_t unescaped = parser->current_string;
- parser_lex(parser);
-
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
- } else {
- // If we get here, then we have interpolation so we'll need
- // to create a string or symbol node with interpolation.
- pm_node_list_t parts = { 0 };
- pm_token_t string_opening = not_provided(parser);
- pm_token_t string_closing = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_node_list_append(&parts, part);
-
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_node_list_append(&parts, part);
- }
- }
-
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- }
-
- pm_node_list_free(&parts);
- }
- } else {
- // If we get here, then the first part of the string is not plain
- // string content, in which case we need to parse the string as an
- // interpolated string.
- pm_node_list_t parts = { 0 };
- pm_node_t *part;
-
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_node_list_append(&parts, part);
- }
- }
-
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- }
-
- pm_node_list_free(&parts);
- }
-
- if (current == NULL) {
- // If the node we just parsed is a symbol node, then we can't
- // concatenate it with anything else, so we can now return that
- // node.
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
- return node;
- }
-
- // If we don't already have a node, then it's fine and we can just
- // set the result to be the node we just parsed.
- current = node;
- } else {
- // Otherwise we need to check the type of the node we just parsed.
- // If it cannot be concatenated with the previous node, then we'll
- // need to add a syntax error.
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
- }
-
- // If we haven't already created our container for concatenation,
- // we'll do that now.
- if (!concating) {
- concating = true;
- pm_token_t bounds = not_provided(parser);
-
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
- pm_interpolated_string_node_append(parser, container, current);
- current = (pm_node_t *) container;
- }
-
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
- }
- }
-
- return current;
-}
-
-/**
* Append an error to the error list on the parser using the given diagnostic
* ID. This function is a specialization that handles formatting the specific
* kind of error that is being appended.
@@ -16620,6 +17107,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
break;
}
+ case PM_ERR_HASH_VALUE:
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ break;
+ }
case PM_ERR_UNARY_RECEIVER: {
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -16702,6 +17194,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_IF:
case PM_CONTEXT_LAMBDA_BRACES:
case PM_CONTEXT_LAMBDA_DO_END:
+ case PM_CONTEXT_LOOP_PREDICATE:
case PM_CONTEXT_PARENS:
case PM_CONTEXT_POSTEXE:
case PM_CONTEXT_PREDICATE:
@@ -16780,6 +17273,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_LAMBDA_ELSE:
case PM_CONTEXT_LAMBDA_ENSURE:
case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_LOOP_PREDICATE:
case PM_CONTEXT_PARENS:
case PM_CONTEXT_POSTEXE:
case PM_CONTEXT_PREDICATE:
@@ -16799,6 +17293,63 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
}
/**
+ * This struct is used to pass information between the regular expression parser
+ * and the error callback.
+ */
+typedef struct {
+ /** The parser that we are parsing the regular expression for. */
+ pm_parser_t *parser;
+
+ /** The start of the regular expression. */
+ const uint8_t *start;
+
+ /** The end of the regular expression. */
+ const uint8_t *end;
+
+ /**
+ * Whether or not the source of the regular expression is shared. This
+ * impacts the location of error messages, because if it is shared then we
+ * can use the location directly and if it is not, then we use the bounds of
+ * the regular expression itself.
+ */
+ bool shared;
+} parse_regular_expression_error_data_t;
+
+/**
+ * This callback is called when the regular expression parser encounters a
+ * syntax error.
+ */
+static void
+parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
+ pm_location_t location;
+
+ if (callback_data->shared) {
+ location = (pm_location_t) { .start = start, .end = end };
+ } else {
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+ }
+
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+}
+
+/**
+ * Parse the errors for the regular expression and add them to the parser.
+ */
+static void
+parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
+ const pm_string_t *unescaped = &node->unescaped;
+ parse_regular_expression_error_data_t error_data = {
+ .parser = parser,
+ .start = node->base.location.start,
+ .end = node->base.location.end,
+ .shared = unescaped->type == PM_STRING_SHARED
+ };
+
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
+}
+
+/**
* Parse an expression that begins with the previous node that we just lexed.
*/
static inline pm_node_t *
@@ -16818,8 +17369,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
break;
}
- if (pm_array_node_size(array) != 0) {
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
+ // Ensure that we have a comma between elements in the array.
+ if ((pm_array_node_size(array) != 0) && !accept1(parser, PM_TOKEN_COMMA)) {
+ const uint8_t *location = parser->previous.end;
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
+
+ parser->previous.start = location;
+ parser->previous.type = PM_TOKEN_MISSING;
}
// If we have a right bracket immediately following a comma,
@@ -16976,7 +17532,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return (pm_node_t *) multi_target;
}
- return parse_target_validate(parser, (pm_node_t *) multi_target);
+ return parse_target_validate(parser, (pm_node_t *) multi_target, false);
}
// If we have a single statement and are ending on a right parenthesis
@@ -16997,7 +17553,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we didn't find a terminator and we didn't find a right
// parenthesis, then this is a syntax error.
- if (!terminator_found) {
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
}
@@ -17026,7 +17582,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
break;
- } else {
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
+ // If we're at the end of the file, then we're going to add
+ // an error after this for the ) anyway.
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
}
}
@@ -17137,12 +17695,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_UCOLON_COLON: {
parser_lex(parser);
-
pm_token_t delimiter = parser->previous;
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
@@ -17247,8 +17803,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
) {
pm_arguments_t arguments = { 0 };
parse_arguments_list(parser, &arguments, true, accepts_command_call);
-
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
+
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ // If we're about to convert an 'it' implicit local
+ // variable read into a method call, we need to remove
+ // it from the list of implicit local variables.
+ parse_target_implicit_parameter(parser, node);
+ } else {
+ // Otherwise, we're about to convert a regular local
+ // variable read into a method call, in which case we
+ // need to indicate that this was not a read for the
+ // purposes of warnings.
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
+
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
+ parse_target_implicit_parameter(parser, node);
+ } else {
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
+ }
+ }
+
pm_node_destroy(parser, node);
return (pm_node_t *) fcall;
}
@@ -17256,31 +17832,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
- } else {
- // Check if `it` is not going to be assigned.
- switch (parser->current.type) {
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
- case PM_TOKEN_AMPERSAND_EQUAL:
- case PM_TOKEN_CARET_EQUAL:
- case PM_TOKEN_EQUAL:
- case PM_TOKEN_GREATER_GREATER_EQUAL:
- case PM_TOKEN_LESS_LESS_EQUAL:
- case PM_TOKEN_MINUS_EQUAL:
- case PM_TOKEN_PARENTHESIS_RIGHT:
- case PM_TOKEN_PERCENT_EQUAL:
- case PM_TOKEN_PIPE_EQUAL:
- case PM_TOKEN_PIPE_PIPE_EQUAL:
- case PM_TOKEN_PLUS_EQUAL:
- case PM_TOKEN_SLASH_EQUAL:
- case PM_TOKEN_STAR_EQUAL:
- case PM_TOKEN_STAR_STAR_EQUAL:
- break;
- default:
- // Once we know it's neither a method call nor an
- // assignment, we can finally create `it` default
- // parameter.
- node = pm_node_check_it(parser, node);
- }
}
return node;
@@ -17302,8 +17853,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then we have an empty heredoc. We'll create
// an empty content token and return an empty string node.
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+ expect1_heredoc_term(parser, lex_mode);
pm_token_t content = parse_strings_empty_content(parser->previous.start);
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -17344,8 +17894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
node = (pm_node_t *) cast;
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+ expect1_heredoc_term(parser, lex_mode);
} else {
// If we get here, then we have multiple parts in the heredoc,
// so we'll need to create an interpolated string node to hold
@@ -17367,20 +17916,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
cast->parts = parts;
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
-
+ expect1_heredoc_term(parser, lex_mode);
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+
cast->base.location = cast->opening_loc;
node = (pm_node_t *) cast;
} else {
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
pm_node_list_free(&parts);
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
-
+ expect1_heredoc_term(parser, lex_mode);
pm_interpolated_string_node_closing_set(cast, &parser->previous);
+
cast->base.location = cast->opening_loc;
node = (pm_node_t *) cast;
}
@@ -17545,6 +18092,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// as frozen because when clause strings are frozen.
if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
}
pm_when_clause_static_literals_add(parser, &literals, condition);
@@ -17601,7 +18150,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t in_keyword = parser->previous;
pm_constant_id_list_t captures = { 0 };
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
pm_constant_id_list_free(&captures);
@@ -17630,7 +18179,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
then_keyword = not_provided(parser);
}
} else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
then_keyword = parser->previous;
}
@@ -17830,7 +18379,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (accept1(parser, PM_TOKEN_LESS_LESS)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
pm_parser_scope_push(parser, true);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -17950,7 +18499,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
receiver = parse_variable_call(parser);
- receiver = pm_node_check_it(parser, receiver);
pm_parser_scope_push(parser, true);
lex_state_set(parser, PM_LEX_STATE_FNAME);
@@ -18084,7 +18632,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
lex_state_set(parser, PM_LEX_STATE_BEG);
parser->command_start = true;
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+
rparen = parser->previous;
break;
}
@@ -18282,7 +18835,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match1(parser, PM_TOKEN_COMMA)) {
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
} else {
- index = parse_target(parser, index);
+ index = parse_target(parser, index, false, false);
}
context_pop(parser);
@@ -18404,9 +18957,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t double_colon = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
-
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
+ constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
}
// Here we retrieve the name of the module. If it wasn't a constant,
@@ -18473,12 +19024,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
case PM_TOKEN_KEYWORD_UNTIL: {
+ context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
pm_do_loop_stack_push(parser, true);
+
parser_lex(parser);
pm_token_t keyword = parser->previous;
-
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
+
pm_do_loop_stack_pop(parser);
+ context_pop(parser);
expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
pm_statements_node_t *statements = NULL;
@@ -18494,12 +19048,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
}
case PM_TOKEN_KEYWORD_WHILE: {
+ context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
pm_do_loop_stack_push(parser, true);
+
parser_lex(parser);
pm_token_t keyword = parser->previous;
-
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
+
pm_do_loop_stack_pop(parser);
+ context_pop(parser);
expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
pm_statements_node_t *statements = NULL;
@@ -18786,15 +19343,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we hit string content and the current node is
// an interpolated string, then we need to append
// the string content to the list of child nodes.
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
// If we hit string content and the current node is
// a string node, then we need to convert the
// current node into an interpolated string and add
// the string content to the list of child nodes.
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(parser, interpolated, current);
- pm_interpolated_string_node_append(parser, interpolated, string);
+ pm_interpolated_string_node_append(interpolated, current);
+ pm_interpolated_string_node_append(interpolated, string);
current = (pm_node_t *) interpolated;
} else {
assert(false && "unreachable");
@@ -18819,7 +19376,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(interpolated, current);
current = (pm_node_t *) interpolated;
} else {
// If we hit an embedded variable and the current
@@ -18828,7 +19385,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_node_t *part = parse_string_part(parser);
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
break;
}
case PM_TOKEN_EMBEXPR_BEGIN: {
@@ -18848,7 +19405,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(interpolated, current);
current = (pm_node_t *) interpolated;
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
// If we hit an embedded expression and the current
@@ -18859,7 +19416,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_node_t *part = parse_string_part(parser);
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
break;
}
default:
@@ -18919,13 +19476,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
bool ascii_only = parser->current_regular_expression_ascii_only;
parser_lex(parser);
- // If we hit an end, then we can create a regular expression node
- // without interpolation, which can be represented more succinctly and
- // more easily compiled.
+ // If we hit an end, then we can create a regular expression
+ // node without interpolation, which can be represented more
+ // succinctly and more easily compiled.
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
- return node;
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+
+ // If we're not immediately followed by a =~, then we want
+ // to parse all of the errors at this point. If it is
+ // followed by a =~, then it will get parsed higher up while
+ // parsing the named captures as well.
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
+ parse_regular_expression_errors(parser, node);
+ }
+
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
+ return (pm_node_t *) node;
}
// If we get here, then we have interpolation so we'll need to create
@@ -18935,6 +19501,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
+
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ // This is extremely strange, but the first string part of a
+ // regular expression will always be tagged as binary if we
+ // are in a US-ASCII file, no matter its contents.
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
+ }
+
pm_interpolated_regular_expression_node_append(interpolated, part);
} else {
// If the first part of the body of the regular expression is not a
@@ -19063,7 +19637,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match1(parser, PM_TOKEN_COMMA)) {
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
} else {
- return parse_target_validate(parser, splat);
+ return parse_target_validate(parser, splat, true);
}
}
case PM_TOKEN_BANG: {
@@ -19135,9 +19709,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
-
pm_token_t opening = parser->current;
parser_lex(parser);
@@ -19154,9 +19725,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
break;
}
case PM_CASE_PARAMETER: {
- assert(parser->current_scope->parameters == PM_SCOPE_PARAMETERS_NONE);
- parser->current_scope->parameters = PM_SCOPE_PARAMETERS_ORDINARY;
-
pm_accepts_block_stack_push(parser, false);
pm_token_t opening = not_provided(parser);
block_parameters = parse_block_parameters(parser, false, &opening, true);
@@ -19335,10 +19903,15 @@ parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
*/
static pm_node_t *
parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
+ bool permitted = true;
+ if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
+
pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
- parse_assignment_value_local(parser, value);
+ if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
+ parse_assignment_value_local(parser, value);
bool single_value = true;
+
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
single_value = false;
@@ -19409,122 +19982,126 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
}
/**
- * Returns true if the name of the capture group is a valid local variable that
- * can be written to.
+ * This struct is used to pass information between the regular expression parser
+ * and the named capture callback.
*/
-static bool
-parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
- if (length == 0) {
- return false;
- }
+typedef struct {
+ /** The parser that is parsing the regular expression. */
+ pm_parser_t *parser;
- // First ensure that it starts with a valid identifier starting character.
- size_t width = char_is_identifier_start(parser, source);
- if (!width) {
- return false;
- }
+ /** The call node wrapping the regular expression node. */
+ pm_call_node_t *call;
- // Next, ensure that it's not an uppercase character.
- if (parser->encoding_changed) {
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
- } else {
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
- }
+ /** The match write node that is being created. */
+ pm_match_write_node_t *match;
- // Next, iterate through all of the bytes of the string to ensure that they
- // are all valid identifier characters.
- const uint8_t *cursor = source + width;
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
- cursor += width;
- }
+ /** The list of names that have been parsed. */
+ pm_constant_id_list_t names;
- return cursor == source + length;
-}
+ /**
+ * Whether the content of the regular expression is shared. This impacts
+ * whether or not we used owned constants or shared constants in the
+ * constant pool for the names of the captures.
+ */
+ bool shared;
+} parse_regular_expression_named_capture_data_t;
/**
- * Potentially change a =~ with a regular expression with named captures into a
- * match write node.
+ * This callback is called when the regular expression parser encounters a named
+ * capture group.
*/
-static pm_node_t *
-parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
- pm_string_list_t named_captures = { 0 };
- pm_node_t *result;
+static void
+parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
- // Since we should not create a MatchWriteNode when all capture names
- // are invalid, creating a MatchWriteNode is delaid here.
- pm_match_write_node_t *match = NULL;
- pm_constant_id_list_t names = { 0 };
+ pm_parser_t *parser = callback_data->parser;
+ pm_call_node_t *call = callback_data->call;
+ pm_constant_id_list_t *names = &callback_data->names;
- for (size_t index = 0; index < named_captures.length; index++) {
- pm_string_t *string = &named_captures.strings[index];
+ const uint8_t *source = pm_string_source(capture);
+ size_t length = pm_string_length(capture);
- const uint8_t *source = pm_string_source(string);
- size_t length = pm_string_length(string);
+ pm_location_t location;
+ pm_constant_id_t name;
- pm_location_t location;
- pm_constant_id_t name;
+ // If the name of the capture group isn't a valid identifier, we do
+ // not add it to the local table.
+ if (!pm_slice_is_valid_local(parser, source, source + length)) return;
- // If the name of the capture group isn't a valid identifier, we do
- // not add it to the local table.
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
+ if (callback_data->shared) {
+ // If the unescaped string is a slice of the source, then we can
+ // copy the names directly. The pointers will line up.
+ location = (pm_location_t) { .start = source, .end = source + length };
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
+ } else {
+ // Otherwise, the name is a slice of the malloc-ed owned string,
+ // in which case we need to copy it out into a new string.
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
- if (content->type == PM_STRING_SHARED) {
- // If the unescaped string is a slice of the source, then we can
- // copy the names directly. The pointers will line up.
- location = (pm_location_t) { .start = source, .end = source + length };
- name = pm_parser_constant_id_location(parser, location.start, location.end);
- } else {
- // Otherwise, the name is a slice of the malloc-ed owned string,
- // in which case we need to copy it out into a new string.
- location = call->receiver->location;
+ void *memory = xmalloc(length);
+ if (memory == NULL) abort();
- void *memory = xmalloc(length);
- if (memory == NULL) abort();
+ memcpy(memory, source, length);
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+ }
- memcpy(memory, source, length);
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
- }
+ // Add this name to the list of constants if it is valid, not duplicated,
+ // and not a keyword.
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
+ pm_constant_id_list_append(names, name);
- if (name != 0) {
- // We dont want to create duplicate targets if the capture name
- // is duplicated.
- if (pm_constant_id_list_includes(&names, name)) continue;
- pm_constant_id_list_append(&names, name);
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
+ // If the local is not already a local but it is a keyword, then we
+ // do not want to add a capture for this.
+ if (pm_local_is_keyword((const char *) source, length)) return;
- int depth;
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
- // If the identifier is not already a local, then we'll add
- // it to the local table unless it's a keyword.
- if (pm_local_is_keyword((const char *) source, length)) continue;
+ // If the identifier is not already a local, then we will add it to
+ // the local table.
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
+ }
- pm_parser_local_add(parser, name, location.start, location.end, 0);
- }
+ // Here we lazily create the MatchWriteNode since we know we're
+ // about to add a target.
+ if (callback_data->match == NULL) {
+ callback_data->match = pm_match_write_node_create(parser, call);
+ }
- // Here we lazily create the MatchWriteNode since we know we're
- // about to add a target.
- if (match == NULL) match = pm_match_write_node_create(parser, call);
+ // Next, create the local variable target and add it to the list of
+ // targets for the match.
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
+ pm_node_list_append(&callback_data->match->targets, target);
+ }
+}
- // Next, create the local variable target and add it to the
- // list of targets for the match.
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
- pm_node_list_append(&match->targets, target);
- }
- }
+/**
+ * Potentially change a =~ with a regular expression with named captures into a
+ * match write node.
+ */
+static pm_node_t *
+parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
+ parse_regular_expression_named_capture_data_t callback_data = {
+ .parser = parser,
+ .call = call,
+ .names = { 0 },
+ .shared = content->type == PM_STRING_SHARED
+ };
- if (match != NULL) {
- result = (pm_node_t *) match;
- } else {
- result = (pm_node_t *) call;
- }
+ parse_regular_expression_error_data_t error_data = {
+ .parser = parser,
+ .start = call->receiver->location.start,
+ .end = call->receiver->location.end,
+ .shared = content->type == PM_STRING_SHARED
+ };
+
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
+ pm_constant_id_list_free(&callback_data.names);
- pm_constant_id_list_free(&names);
+ if (callback_data.match != NULL) {
+ return (pm_node_t *) callback_data.match;
} else {
- result = (pm_node_t *) call;
+ return (pm_node_t *) call;
}
-
- pm_string_list_free(&named_captures);
- return result;
}
static inline pm_node_t *
@@ -19641,7 +20218,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
return result;
}
case PM_CALL_NODE: {
- parser_lex(parser);
pm_call_node_t *cast = (pm_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
@@ -19652,6 +20228,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ parser_lex(parser);
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
@@ -19659,6 +20237,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
return result;
}
+ // Move past the token here so that we have already added
+ // the local variable by this point.
+ parser_lex(parser);
+
// If there is no call operator and the message is "[]" then
// this is an aref expression, and we can transform it into
// an aset expression.
@@ -19754,7 +20336,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
return result;
}
case PM_CALL_NODE: {
- parser_lex(parser);
pm_call_node_t *cast = (pm_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
@@ -19765,6 +20346,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ parser_lex(parser);
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
@@ -19772,6 +20355,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
return result;
}
+ // Move past the token here so that we have already added
+ // the local variable by this point.
+ parser_lex(parser);
+
// If there is no call operator and the message is "[]" then
// this is an aref expression, and we can transform it into
// an aset expression.
@@ -19925,7 +20512,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In this case we have an operator but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
return node;
}
}
@@ -20181,7 +20768,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
) {
// If we have a constant immediately following a '::' operator, then
// this can either be a constant path or a method call, depending on
@@ -20196,8 +20783,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
} else {
// Otherwise, this is a constant path. That would look like Foo::Bar.
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+ path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
}
// If this is followed by a comma then it is a multiple assignment.
@@ -20236,9 +20822,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
}
default: {
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
}
}
}
@@ -20309,7 +20894,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
parser_lex(parser);
pm_constant_id_list_t captures = { 0 };
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
pm_constant_id_list_free(&captures);
@@ -20326,7 +20911,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
parser_lex(parser);
pm_constant_id_list_t captures = { 0 };
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
pm_constant_id_list_free(&captures);
@@ -20339,6 +20924,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
}
+#undef PM_PARSE_PATTERN_SINGLE
+#undef PM_PARSE_PATTERN_TOP
+#undef PM_PARSE_PATTERN_MULTI
+
/**
* Parse an expression at the given point of the parser using the given binding
* power to parse subsequent chains. If this function finds a syntax error, it
@@ -20657,6 +21246,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.parsing_eval = false,
.command_start = true,
.recovering = false,
+ .encoding_locked = false,
.encoding_changed = false,
.pattern_matching_newlines = false,
.in_keyword_arg = false,
@@ -20704,6 +21294,9 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
}
+ // encoding_locked option
+ parser->encoding_locked = options->encoding_locked;
+
// frozen_string_literal option
parser->frozen_string_literal = options->frozen_string_literal;
@@ -20722,7 +21315,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// Scopes given from the outside are not allowed to have numbered
// parameters.
- parser->current_scope->numbered_parameters = PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED;
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
@@ -21110,328 +21703,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
}
#endif
-
-/** An error that is going to be formatted into the output. */
-typedef struct {
- /** A pointer to the diagnostic that was generated during parsing. */
- pm_diagnostic_t *error;
-
- /** The start line of the diagnostic message. */
- int32_t line;
-
- /** The column start of the diagnostic message. */
- uint32_t column_start;
-
- /** The column end of the diagnostic message. */
- uint32_t column_end;
-} pm_error_t;
-
-/** The format that will be used to format the errors into the output. */
-typedef struct {
- /** The prefix that will be used for line numbers. */
- const char *number_prefix;
-
- /** The prefix that will be used for blank lines. */
- const char *blank_prefix;
-
- /** The divider that will be used between sections of source code. */
- const char *divider;
-
- /** The length of the blank prefix. */
- size_t blank_prefix_length;
-
- /** The length of the divider. */
- size_t divider_length;
-} pm_error_format_t;
-
-#define PM_COLOR_GRAY "\033[38;5;102m"
-#define PM_COLOR_RED "\033[1;31m"
-#define PM_COLOR_RESET "\033[m"
-
-static inline pm_error_t *
-pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
- pm_error_t *errors = xcalloc(error_list->size, sizeof(pm_error_t));
- if (errors == NULL) return NULL;
-
- int32_t start_line = parser->start_line;
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
-
- // We're going to insert this error into the array in sorted order. We
- // do this by finding the first error that has a line number greater
- // than the current error and then inserting the current error before
- // that one.
- size_t index = 0;
- while (
- (index < error_list->size) &&
- (errors[index].error != NULL) &&
- (
- (errors[index].line < start.line) ||
- ((errors[index].line == start.line) && (errors[index].column_start < start.column))
- )
- ) index++;
-
- // Now we're going to shift all of the errors after this one down one
- // index to make room for the new error.
- if (index + 1 < error_list->size) {
- memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
- }
-
- // Finally, we'll insert the error into the array.
- uint32_t column_end;
- if (start.line == end.line) {
- column_end = end.column;
- } else {
- column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
- }
-
- // Ensure we have at least one column of error.
- if (start.column == column_end) column_end++;
-
- errors[index] = (pm_error_t) {
- .error = error,
- .line = start.line,
- .column_start = start.column,
- .column_end = column_end
- };
- }
-
- return errors;
-}
-
-static inline void
-pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
- int32_t line_delta = line - parser->start_line;
- assert(line_delta >= 0);
-
- size_t index = (size_t) line_delta;
- assert(index < newline_list->size);
-
- const uint8_t *start = &parser->start[newline_list->offsets[index]];
- const uint8_t *end;
-
- if (index >= newline_list->size - 1) {
- end = parser->end;
- } else {
- end = &parser->start[newline_list->offsets[index + 1]];
- }
-
- pm_buffer_append_format(buffer, number_prefix, line);
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
-
- if (end == parser->end && end[-1] != '\n') {
- pm_buffer_append_string(buffer, "\n", 1);
- }
-}
-
-/**
- * Format the errors on the parser into the given buffer.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
- assert(error_list->size != 0);
-
- // First, we're going to sort all of the errors by line number using an
- // insertion sort into a newly allocated array.
- const int32_t start_line = parser->start_line;
- const pm_newline_list_t *newline_list = &parser->newline_list;
-
- pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
- if (errors == NULL) return;
-
- // Now we're going to determine how we're going to format line numbers and
- // blank lines based on the maximum number of digits in the line numbers
- // that are going to be displaid.
- pm_error_format_t error_format;
- int32_t first_line_number = errors[0].line;
- int32_t last_line_number = errors[error_list->size - 1].line;
-
- // If we have a maximum line number that is negative, then we're going to
- // use the absolute value for comparison but multiple by 10 to additionally
- // have a column for the negative sign.
- if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
- if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
- int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
-
- if (max_line_number < 10) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%1" PRIi32 " | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~\n"
- };
- }
- } else if (max_line_number < 100) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%2" PRIi32 " | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~\n"
- };
- }
- } else if (max_line_number < 1000) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%3" PRIi32 " | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~\n"
- };
- }
- } else if (max_line_number < 10000) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%4" PRIi32 " | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~~\n"
- };
- }
- } else {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%5" PRIi32 " | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~~\n"
- };
- }
- }
-
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
- error_format.divider_length = strlen(error_format.divider);
-
- // Now we're going to iterate through every error in our error list and
- // display it. While we're iterating, we will display some padding lines of
- // the source before the error to give some context. We'll be careful not to
- // display the same line twice in case the errors are close enough in the
- // source.
- int32_t last_line = parser->start_line - 1;
- const pm_encoding_t *encoding = parser->encoding;
-
- for (size_t index = 0; index < error_list->size; index++) {
- pm_error_t *error = &errors[index];
-
- // Here we determine how many lines of padding of the source to display,
- // based on the difference from the last line that was displaid.
- if (error->line - last_line > 1) {
- if (error->line - last_line > 2) {
- if ((index != 0) && (error->line - last_line > 3)) {
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
- }
-
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
- }
-
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
- }
-
- // If this is the first error or we're on a new line, then we'll display
- // the line that has the error in it.
- if ((index == 0) || (error->line != last_line)) {
- if (colorize) {
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
- } else {
- pm_buffer_append_string(buffer, "> ", 2);
- }
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
- }
-
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
- if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
-
- // Now we'll display the actual error message. We'll do this by first
- // putting the prefix to the line, then a bunch of blank spaces
- // depending on the column, then as many carets as we need to display
- // the width of the error, then the error message itself.
- //
- // Note that this doesn't take into account the width of the actual
- // character when displaid in the terminal. For some east-asian
- // languages or emoji, this means it can be thrown off pretty badly. We
- // will need to solve this eventually.
- pm_buffer_append_string(buffer, " ", 2);
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
-
- size_t column = 0;
- while (column < error->column_end) {
- if (column < error->column_start) {
- pm_buffer_append_byte(buffer, ' ');
- } else {
- const uint8_t caret = column == error->column_start ? '^' : '~';
-
- if (colorize) {
- pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
- pm_buffer_append_byte(buffer, caret);
- pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
- } else {
- pm_buffer_append_byte(buffer, caret);
- }
- }
-
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
- column += (char_width == 0 ? 1 : char_width);
- }
-
- if (inline_messages) {
- pm_buffer_append_byte(buffer, ' ');
- assert(error->error != NULL);
-
- const char *message = error->error->message;
- pm_buffer_append_string(buffer, message, strlen(message));
- }
-
- pm_buffer_append_byte(buffer, '\n');
-
- // Here we determine how many lines of padding to display after the
- // error, depending on where the next error is in source.
- last_line = error->line;
- int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
-
- if (next_line - last_line > 1) {
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
- }
-
- if (next_line - last_line > 1) {
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
- }
- }
-
- // Finally, we'll free the array of errors that we allocated.
- xfree(errors);
-}
-
-#undef PM_COLOR_GRAY
-#undef PM_COLOR_RED
-#undef PM_COLOR_RESET