summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroya Fujinami <make.just.on@gmail.com>2023-11-19 11:05:16 +0900
committergit <svn-admin@ruby-lang.org>2023-11-19 02:05:21 +0000
commit8b4b0b71777ff3371c2b4e7b8feca047f8f64511 (patch)
treeee85a1e3b77f9e3e6211839036bf91cfd5f81835
parent5fea1d08bdeb120029f24e677f0cdcbb390dbbe0 (diff)
[ruby/prism] Don't add an invalid identifier capture to locals
(https://github.com/ruby/prism/pull/1836) * Don't add an invalid identifier capture to locals Fix https://github.com/ruby/prism/pull/1815 * Delay creating a MatchWriteNode https://github.com/ruby/prism/pull/1836#discussion_r1393716600 https://github.com/ruby/prism/commit/635f595a36
-rw-r--r--prism/prism.c41
-rw-r--r--test/prism/fixtures/regex.txt4
-rw-r--r--test/prism/snapshots/regex.txt70
3 files changed, 106 insertions, 9 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 2b027a9bde..850b35c3c7 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -15686,6 +15686,25 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
}
}
+static bool
+name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
+ if (length == 0) {
+ return false;
+ }
+
+ size_t width = char_is_identifier_start(parser, source);
+ if (!width) {
+ return false;
+ }
+
+ uint8_t *cursor = ((uint8_t *)source) + width;
+ while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
+ cursor += width;
+ }
+
+ return cursor == source + length;
+}
+
/**
* Potentially change a =~ with a regular expression with named captures into a
* match write node.
@@ -15696,7 +15715,9 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
pm_node_t *result;
if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
- pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
+ // Since we should not create a MatchWriteNode when all capture names are invalid,
+ // creating a MatchWriteNode is delayed here.
+ pm_match_write_node_t *match = NULL;
for (size_t index = 0; index < named_captures.length; index++) {
pm_string_t *name = &named_captures.strings[index];
@@ -15704,6 +15725,10 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
const uint8_t *source = pm_string_source(name);
size_t length = pm_string_length(name);
+ if (!name_is_identifier(parser, source, length)) {
+ continue;
+ }
+
pm_constant_id_t local;
if (content->type == PM_STRING_SHARED) {
// If the unescaped string is a slice of the source, then we can
@@ -15731,10 +15756,22 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
}
}
+ if (match == NULL) {
+ match = pm_match_write_node_create(parser, call);
+ }
+
+ if (pm_constant_id_list_includes(&match->locals, local)) {
+ continue;
+ }
+
pm_constant_id_list_append(&match->locals, local);
}
- result = (pm_node_t *) match;
+ if (match != NULL) {
+ result = (pm_node_t *) match;
+ } else {
+ result = (pm_node_t *) call;
+ }
} else {
result = (pm_node_t *) call;
}
diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt
index 1f33b810ac..c27e022d01 100644
--- a/test/prism/fixtures/regex.txt
+++ b/test/prism/fixtures/regex.txt
@@ -31,3 +31,7 @@ foo /bar/
/(?<a\
b>)/ =~ ""; ab
+
+/(?<abc>)(?<abc>)/ =~ ""; abc
+
+/(?<a b>)/ =~ ""
diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt
index d1d913c88c..722006c998 100644
--- a/test/prism/snapshots/regex.txt
+++ b/test/prism/snapshots/regex.txt
@@ -1,8 +1,8 @@
-@ ProgramNode (location: (1,0)-(33,14))
-├── locals: [:foo, :ab]
+@ ProgramNode (location: (1,0)-(37,16))
+├── locals: [:foo, :ab, :abc]
└── statements:
- @ StatementsNode (location: (1,0)-(33,14))
- └── body: (length: 16)
+ @ StatementsNode (location: (1,0)-(37,16))
+ └── body: (length: 19)
├── @ CallNode (location: (1,0)-(1,9))
│ ├── receiver: ∅
│ ├── call_operator_loc: ∅
@@ -238,6 +238,62 @@
│ │ ├── flags: ∅
│ │ └── name: :=~
│ └── locals: [:ab]
- └── @ LocalVariableReadNode (location: (33,12)-(33,14))
- ├── name: :ab
- └── depth: 0
+ ├── @ LocalVariableReadNode (location: (33,12)-(33,14))
+ │ ├── name: :ab
+ │ └── depth: 0
+ ├── @ MatchWriteNode (location: (35,0)-(35,24))
+ │ ├── call:
+ │ │ @ CallNode (location: (35,0)-(35,24))
+ │ │ ├── receiver:
+ │ │ │ @ RegularExpressionNode (location: (35,0)-(35,18))
+ │ │ │ ├── opening_loc: (35,0)-(35,1) = "/"
+ │ │ │ ├── content_loc: (35,1)-(35,17) = "(?<abc>)(?<abc>)"
+ │ │ │ ├── closing_loc: (35,17)-(35,18) = "/"
+ │ │ │ ├── unescaped: "(?<abc>)(?<abc>)"
+ │ │ │ └── flags: ∅
+ │ │ ├── call_operator_loc: ∅
+ │ │ ├── message_loc: (35,19)-(35,21) = "=~"
+ │ │ ├── opening_loc: ∅
+ │ │ ├── arguments:
+ │ │ │ @ ArgumentsNode (location: (35,22)-(35,24))
+ │ │ │ ├── arguments: (length: 1)
+ │ │ │ │ └── @ StringNode (location: (35,22)-(35,24))
+ │ │ │ │ ├── flags: ∅
+ │ │ │ │ ├── opening_loc: (35,22)-(35,23) = "\""
+ │ │ │ │ ├── content_loc: (35,23)-(35,23) = ""
+ │ │ │ │ ├── closing_loc: (35,23)-(35,24) = "\""
+ │ │ │ │ └── unescaped: ""
+ │ │ │ └── flags: ∅
+ │ │ ├── closing_loc: ∅
+ │ │ ├── block: ∅
+ │ │ ├── flags: ∅
+ │ │ └── name: :=~
+ │ └── locals: [:abc]
+ ├── @ LocalVariableReadNode (location: (35,26)-(35,29))
+ │ ├── name: :abc
+ │ └── depth: 0
+ └── @ CallNode (location: (37,0)-(37,16))
+ ├── receiver:
+ │ @ RegularExpressionNode (location: (37,0)-(37,10))
+ │ ├── opening_loc: (37,0)-(37,1) = "/"
+ │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)"
+ │ ├── closing_loc: (37,9)-(37,10) = "/"
+ │ ├── unescaped: "(?<a b>)"
+ │ └── flags: ∅
+ ├── call_operator_loc: ∅
+ ├── message_loc: (37,11)-(37,13) = "=~"
+ ├── opening_loc: ∅
+ ├── arguments:
+ │ @ ArgumentsNode (location: (37,14)-(37,16))
+ │ ├── arguments: (length: 1)
+ │ │ └── @ StringNode (location: (37,14)-(37,16))
+ │ │ ├── flags: ∅
+ │ │ ├── opening_loc: (37,14)-(37,15) = "\""
+ │ │ ├── content_loc: (37,15)-(37,15) = ""
+ │ │ ├── closing_loc: (37,15)-(37,16) = "\""
+ │ │ └── unescaped: ""
+ │ └── flags: ∅
+ ├── closing_loc: ∅
+ ├── block: ∅
+ ├── flags: ∅
+ └── name: :=~