diff options
| author | Hiroya Fujinami <make.just.on@gmail.com> | 2023-11-19 11:05:16 +0900 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2023-11-19 02:05:21 +0000 |
| commit | 8b4b0b71777ff3371c2b4e7b8feca047f8f64511 (patch) | |
| tree | ee85a1e3b77f9e3e6211839036bf91cfd5f81835 | |
| parent | 5fea1d08bdeb120029f24e677f0cdcbb390dbbe0 (diff) | |
[ruby/prism] Don't add an invalid identifier capture to locals
(https://github.com/ruby/prism/pull/1836)
* Don't add an invalid identifier capture to locals
Fix https://github.com/ruby/prism/pull/1815
* Delay creating a MatchWriteNode
https://github.com/ruby/prism/pull/1836#discussion_r1393716600
https://github.com/ruby/prism/commit/635f595a36
| -rw-r--r-- | prism/prism.c | 41 | ||||
| -rw-r--r-- | test/prism/fixtures/regex.txt | 4 | ||||
| -rw-r--r-- | test/prism/snapshots/regex.txt | 70 |
3 files changed, 106 insertions, 9 deletions
diff --git a/prism/prism.c b/prism/prism.c index 2b027a9bde..850b35c3c7 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -15686,6 +15686,25 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const } } +static bool +name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) { + if (length == 0) { + return false; + } + + size_t width = char_is_identifier_start(parser, source); + if (!width) { + return false; + } + + uint8_t *cursor = ((uint8_t *)source) + width; + while (cursor < source + length && (width = char_is_identifier(parser, cursor))) { + cursor += width; + } + + return cursor == source + length; +} + /** * Potentially change a =~ with a regular expression with named captures into a * match write node. @@ -15696,7 +15715,9 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * pm_node_t *result; if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) { - pm_match_write_node_t *match = pm_match_write_node_create(parser, call); + // Since we should not create a MatchWriteNode when all capture names are invalid, + // creating a MatchWriteNode is delayed here. + pm_match_write_node_t *match = NULL; for (size_t index = 0; index < named_captures.length; index++) { pm_string_t *name = &named_captures.strings[index]; @@ -15704,6 +15725,10 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * const uint8_t *source = pm_string_source(name); size_t length = pm_string_length(name); + if (!name_is_identifier(parser, source, length)) { + continue; + } + pm_constant_id_t local; if (content->type == PM_STRING_SHARED) { // If the unescaped string is a slice of the source, then we can @@ -15731,10 +15756,22 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * } } + if (match == NULL) { + match = pm_match_write_node_create(parser, call); + } + + if (pm_constant_id_list_includes(&match->locals, local)) { + continue; + } + pm_constant_id_list_append(&match->locals, local); } - result = (pm_node_t *) match; + if (match != NULL) { + result = (pm_node_t *) match; + } else { + result = (pm_node_t *) call; + } } else { result = (pm_node_t *) call; } diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt index 1f33b810ac..c27e022d01 100644 --- a/test/prism/fixtures/regex.txt +++ b/test/prism/fixtures/regex.txt @@ -31,3 +31,7 @@ foo /bar/ /(?<a\ b>)/ =~ ""; ab + +/(?<abc>)(?<abc>)/ =~ ""; abc + +/(?<a b>)/ =~ "" diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt index d1d913c88c..722006c998 100644 --- a/test/prism/snapshots/regex.txt +++ b/test/prism/snapshots/regex.txt @@ -1,8 +1,8 @@ -@ ProgramNode (location: (1,0)-(33,14)) -├── locals: [:foo, :ab] +@ ProgramNode (location: (1,0)-(37,16)) +├── locals: [:foo, :ab, :abc] └── statements: - @ StatementsNode (location: (1,0)-(33,14)) - └── body: (length: 16) + @ StatementsNode (location: (1,0)-(37,16)) + └── body: (length: 19) ├── @ CallNode (location: (1,0)-(1,9)) │ ├── receiver: ∅ │ ├── call_operator_loc: ∅ @@ -238,6 +238,62 @@ │ │ ├── flags: ∅ │ │ └── name: :=~ │ └── locals: [:ab] - └── @ LocalVariableReadNode (location: (33,12)-(33,14)) - ├── name: :ab - └── depth: 0 + ├── @ LocalVariableReadNode (location: (33,12)-(33,14)) + │ ├── name: :ab + │ └── depth: 0 + ├── @ MatchWriteNode (location: (35,0)-(35,24)) + │ ├── call: + │ │ @ CallNode (location: (35,0)-(35,24)) + │ │ ├── receiver: + │ │ │ @ RegularExpressionNode (location: (35,0)-(35,18)) + │ │ │ ├── opening_loc: (35,0)-(35,1) = "/" + │ │ │ ├── content_loc: (35,1)-(35,17) = "(?<abc>)(?<abc>)" + │ │ │ ├── closing_loc: (35,17)-(35,18) = "/" + │ │ │ ├── unescaped: "(?<abc>)(?<abc>)" + │ │ │ └── flags: ∅ + │ │ ├── call_operator_loc: ∅ + │ │ ├── message_loc: (35,19)-(35,21) = "=~" + │ │ ├── opening_loc: ∅ + │ │ ├── arguments: + │ │ │ @ ArgumentsNode (location: (35,22)-(35,24)) + │ │ │ ├── arguments: (length: 1) + │ │ │ │ └── @ StringNode (location: (35,22)-(35,24)) + │ │ │ │ ├── flags: ∅ + │ │ │ │ ├── opening_loc: (35,22)-(35,23) = "\"" + │ │ │ │ ├── content_loc: (35,23)-(35,23) = "" + │ │ │ │ ├── closing_loc: (35,23)-(35,24) = "\"" + │ │ │ │ └── unescaped: "" + │ │ │ └── flags: ∅ + │ │ ├── closing_loc: ∅ + │ │ ├── block: ∅ + │ │ ├── flags: ∅ + │ │ └── name: :=~ + │ └── locals: [:abc] + ├── @ LocalVariableReadNode (location: (35,26)-(35,29)) + │ ├── name: :abc + │ └── depth: 0 + └── @ CallNode (location: (37,0)-(37,16)) + ├── receiver: + │ @ RegularExpressionNode (location: (37,0)-(37,10)) + │ ├── opening_loc: (37,0)-(37,1) = "/" + │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)" + │ ├── closing_loc: (37,9)-(37,10) = "/" + │ ├── unescaped: "(?<a b>)" + │ └── flags: ∅ + ├── call_operator_loc: ∅ + ├── message_loc: (37,11)-(37,13) = "=~" + ├── opening_loc: ∅ + ├── arguments: + │ @ ArgumentsNode (location: (37,14)-(37,16)) + │ ├── arguments: (length: 1) + │ │ └── @ StringNode (location: (37,14)-(37,16)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (37,14)-(37,15) = "\"" + │ │ ├── content_loc: (37,15)-(37,15) = "" + │ │ ├── closing_loc: (37,15)-(37,16) = "\"" + │ │ └── unescaped: "" + │ └── flags: ∅ + ├── closing_loc: ∅ + ├── block: ∅ + ├── flags: ∅ + └── name: :=~ |
