diff options
author | Kevin Newton <kddnewton@gmail.com> | 2023-11-09 22:59:35 -0500 |
---|---|---|
committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-20 18:00:44 -0500 |
commit | e269096d15a4488a8ae34aca38752989e61304c7 (patch) | |
tree | 7e92471855d8210d9b3d623437a664d8f378ef63 | |
parent | fa547cd70243710e90bf57377402cbbdf910efa3 (diff) |
[ruby/prism] Replace match write locals with match write targets
https://github.com/ruby/prism/commit/eec1862967
-rw-r--r-- | prism/config.yml | 4 | ||||
-rw-r--r-- | prism/prism.c | 109 | ||||
-rw-r--r-- | test/prism/fixtures/regex.txt | 3 | ||||
-rw-r--r-- | test/prism/snapshots/regex.txt | 126 | ||||
-rw-r--r-- | test/prism/snapshots/spanning_heredoc.txt | 5 | ||||
-rw-r--r-- | test/prism/snapshots/whitequark/lvar_injecting_match.txt | 5 |
6 files changed, 186 insertions, 66 deletions
diff --git a/prism/config.yml b/prism/config.yml index 6f12a51344..052901734a 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -1880,8 +1880,8 @@ nodes: - name: call type: node kind: CallNode - - name: locals - type: constant[] + - name: targets + type: node[] comment: | Represents writing local variables using a regular expression match with named capture groups. diff --git a/prism/prism.c b/prism/prism.c index f041fb5167..84a28b9345 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -4028,26 +4028,41 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui } /** - * Allocate and initialize a new LocalVariableTargetNode node. + * Allocate and initialize a new LocalVariableTargetNode node with the given + * name and depth. */ static pm_local_variable_target_node_t * -pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) { +pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t); - pm_refute_numbered_parameter(parser, name->start, name->end); *node = (pm_local_variable_target_node_t) { { .type = PM_LOCAL_VARIABLE_TARGET_NODE, - .location = PM_LOCATION_TOKEN_VALUE(name) + .location = *location }, - .name = pm_parser_constant_id_token(parser, name), - .depth = 0 + .name = name, + .depth = depth }; return node; } /** + * Allocate and initialize a new LocalVariableTargetNode node. + */ +static pm_local_variable_target_node_t * +pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) { + pm_refute_numbered_parameter(parser, name->start, name->end); + + return pm_local_variable_target_node_create_values( + parser, + &(pm_location_t) { .start = name->start, .end = name->end }, + pm_parser_constant_id_token(parser, name), + 0 + ); +} + +/** * Allocate and initialize a new MatchPredicateNode node. */ static pm_match_predicate_node_t * @@ -4109,10 +4124,10 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) { .type = PM_MATCH_WRITE_NODE, .location = call->base.location }, - .call = call + .call = call, + .targets = { 0 } }; - pm_constant_id_list_init(&node->locals); return node; } @@ -5694,17 +5709,16 @@ pm_parser_scope_push_transparent(pm_parser_t *parser) { } /** - * Check if the current scope has a given local variables. + * Check if any of the currently visible scopes contain a local variable + * described by the given constant id. */ static int -pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { - pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, token); +pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) { pm_scope_t *scope = parser->current_scope; int depth = 0; while (scope != NULL) { - if (!scope->transparent && - pm_constant_id_list_includes(&scope->locals, constant_id)) return depth; + if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth; if (scope->closed) break; scope = scope->previous; @@ -5715,6 +5729,16 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { } /** + * Check if any of the currently visible scopes contain a local variable + * described by the given token. This function implicitly inserts a constant + * into the constant pool. + */ +static inline int +pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { + return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token)); +} + +/** * Add a constant id to the local table of the current scope. */ static inline void @@ -15766,37 +15790,40 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * pm_node_t *result; if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) { - // Since we should not create a MatchWriteNode when all capture names are invalid, - // creating a MatchWriteNode is delayed here. + // Since we should not create a MatchWriteNode when all capture names + // are invalid, creating a MatchWriteNode is delayed here. pm_match_write_node_t *match = NULL; + pm_constant_id_list_t names = { 0 }; for (size_t index = 0; index < named_captures.length; index++) { - pm_string_t *name = &named_captures.strings[index]; + pm_string_t *string = &named_captures.strings[index]; - const uint8_t *source = pm_string_source(name); - size_t length = pm_string_length(name); + const uint8_t *source = pm_string_source(string); + size_t length = pm_string_length(string); - if (!name_is_identifier(parser, source, length)) { - continue; - } + pm_location_t location; + pm_constant_id_t name; + + // If the name of the capture group isn't a valid identifier, we do + // not add it to the local table. + if (!name_is_identifier(parser, source, length)) continue; - pm_constant_id_t local; if (content->type == PM_STRING_SHARED) { // If the unescaped string is a slice of the source, then we can // copy the names directly. The pointers will line up. - local = pm_parser_local_add_location(parser, source, source + length); + location = (pm_location_t) { .start = source, .end = source + length }; + name = pm_parser_constant_id_location(parser, location.start, location.end); pm_refute_numbered_parameter(parser, source, source + length); } else { // Otherwise, the name is a slice of the malloc-ed owned string, // in which case we need to copy it out into a new string. + location = call->receiver->location; + void *memory = malloc(length); if (memory == NULL) abort(); memcpy(memory, source, length); - - // This silences clang analyzer warning about leak of memory pointed by `memory`. - // NOLINTNEXTLINE(clang-analyzer-*) - local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length); + name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length); if (pm_token_is_numbered_parameter(source, source + length)) { const pm_location_t *location = &call->receiver->location; @@ -15804,15 +15831,27 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * } } - if (match == NULL) { - match = pm_match_write_node_create(parser, call); - } + if (name != 0) { + // We dont want to create duplicate targets if the capture name + // is duplicated. + if (pm_constant_id_list_includes(&names, name)) continue; + pm_constant_id_list_append(&names, name); - if (pm_constant_id_list_includes(&match->locals, local)) { - continue; - } + // Here we lazily create the MatchWriteNode since we know we're + // about to add a target. + if (match == NULL) match = pm_match_write_node_create(parser, call); - pm_constant_id_list_append(&match->locals, local); + // First, find the depth of the local that is being assigned. + int depth; + if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) { + pm_parser_local_add(parser, name); + } + + // Next, create the local variable target and add it to the + // list of targets for the match. + pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth); + pm_node_list_append(&match->targets, target); + } } if (match != NULL) { @@ -15820,6 +15859,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * } else { result = (pm_node_t *) call; } + + pm_constant_id_list_free(&names); } else { result = (pm_node_t *) call; } diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt index c27e022d01..ef2f6d45a3 100644 --- a/test/prism/fixtures/regex.txt +++ b/test/prism/fixtures/regex.txt @@ -35,3 +35,6 @@ b>)/ =~ ""; ab /(?<abc>)(?<abc>)/ =~ ""; abc /(?<a b>)/ =~ "" + +a = 1 +tap { /(?<a>)/ =~ to_s } diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt index 722006c998..b69e5cc31e 100644 --- a/test/prism/snapshots/regex.txt +++ b/test/prism/snapshots/regex.txt @@ -1,8 +1,8 @@ -@ ProgramNode (location: (1,0)-(37,16)) -├── locals: [:foo, :ab, :abc] +@ ProgramNode (location: (1,0)-(40,24)) +├── locals: [:foo, :ab, :abc, :a] └── statements: - @ StatementsNode (location: (1,0)-(37,16)) - └── body: (length: 19) + @ StatementsNode (location: (1,0)-(40,24)) + └── body: (length: 21) ├── @ CallNode (location: (1,0)-(1,9)) │ ├── receiver: ∅ │ ├── call_operator_loc: ∅ @@ -116,7 +116,10 @@ │ │ │ │ ├── block: ∅ │ │ │ │ ├── flags: ∅ │ │ │ │ └── name: :=~ - │ │ │ └── locals: [:foo] + │ │ │ └── targets: (length: 1) + │ │ │ └── @ LocalVariableTargetNode (location: (11,5)-(11,8)) + │ │ │ ├── name: :foo + │ │ │ └── depth: 0 │ │ └── @ LocalVariableReadNode (location: (11,23)-(11,26)) │ │ ├── name: :foo │ │ └── depth: 0 @@ -237,7 +240,10 @@ │ │ ├── block: ∅ │ │ ├── flags: ∅ │ │ └── name: :=~ - │ └── locals: [:ab] + │ └── targets: (length: 1) + │ └── @ LocalVariableTargetNode (location: (32,0)-(33,4)) + │ ├── name: :ab + │ └── depth: 0 ├── @ LocalVariableReadNode (location: (33,12)-(33,14)) │ ├── name: :ab │ └── depth: 0 @@ -268,32 +274,96 @@ │ │ ├── block: ∅ │ │ ├── flags: ∅ │ │ └── name: :=~ - │ └── locals: [:abc] + │ └── targets: (length: 1) + │ └── @ LocalVariableTargetNode (location: (35,4)-(35,7)) + │ ├── name: :abc + │ └── depth: 0 ├── @ LocalVariableReadNode (location: (35,26)-(35,29)) │ ├── name: :abc │ └── depth: 0 - └── @ CallNode (location: (37,0)-(37,16)) - ├── receiver: - │ @ RegularExpressionNode (location: (37,0)-(37,10)) - │ ├── opening_loc: (37,0)-(37,1) = "/" - │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)" - │ ├── closing_loc: (37,9)-(37,10) = "/" - │ ├── unescaped: "(?<a b>)" - │ └── flags: ∅ + ├── @ CallNode (location: (37,0)-(37,16)) + │ ├── receiver: + │ │ @ RegularExpressionNode (location: (37,0)-(37,10)) + │ │ ├── opening_loc: (37,0)-(37,1) = "/" + │ │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)" + │ │ ├── closing_loc: (37,9)-(37,10) = "/" + │ │ ├── unescaped: "(?<a b>)" + │ │ └── flags: ∅ + │ ├── call_operator_loc: ∅ + │ ├── message_loc: (37,11)-(37,13) = "=~" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (37,14)-(37,16)) + │ │ ├── arguments: (length: 1) + │ │ │ └── @ StringNode (location: (37,14)-(37,16)) + │ │ │ ├── flags: ∅ + │ │ │ ├── opening_loc: (37,14)-(37,15) = "\"" + │ │ │ ├── content_loc: (37,15)-(37,15) = "" + │ │ │ ├── closing_loc: (37,15)-(37,16) = "\"" + │ │ │ └── unescaped: "" + │ │ └── flags: ∅ + │ ├── closing_loc: ∅ + │ ├── block: ∅ + │ ├── flags: ∅ + │ └── name: :=~ + ├── @ LocalVariableWriteNode (location: (39,0)-(39,5)) + │ ├── name: :a + │ ├── depth: 0 + │ ├── name_loc: (39,0)-(39,1) = "a" + │ ├── value: + │ │ @ IntegerNode (location: (39,4)-(39,5)) + │ │ └── flags: decimal + │ └── operator_loc: (39,2)-(39,3) = "=" + └── @ CallNode (location: (40,0)-(40,24)) + ├── receiver: ∅ ├── call_operator_loc: ∅ - ├── message_loc: (37,11)-(37,13) = "=~" + ├── message_loc: (40,0)-(40,3) = "tap" ├── opening_loc: ∅ - ├── arguments: - │ @ ArgumentsNode (location: (37,14)-(37,16)) - │ ├── arguments: (length: 1) - │ │ └── @ StringNode (location: (37,14)-(37,16)) - │ │ ├── flags: ∅ - │ │ ├── opening_loc: (37,14)-(37,15) = "\"" - │ │ ├── content_loc: (37,15)-(37,15) = "" - │ │ ├── closing_loc: (37,15)-(37,16) = "\"" - │ │ └── unescaped: "" - │ └── flags: ∅ + ├── arguments: ∅ ├── closing_loc: ∅ - ├── block: ∅ + ├── block: + │ @ BlockNode (location: (40,4)-(40,24)) + │ ├── locals: [] + │ ├── parameters: ∅ + │ ├── body: + │ │ @ StatementsNode (location: (40,6)-(40,22)) + │ │ └── body: (length: 1) + │ │ └── @ MatchWriteNode (location: (40,6)-(40,22)) + │ │ ├── call: + │ │ │ @ CallNode (location: (40,6)-(40,22)) + │ │ │ ├── receiver: + │ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14)) + │ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/" + │ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)" + │ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/" + │ │ │ │ ├── unescaped: "(?<a>)" + │ │ │ │ └── flags: ∅ + │ │ │ ├── call_operator_loc: ∅ + │ │ │ ├── message_loc: (40,15)-(40,17) = "=~" + │ │ │ ├── opening_loc: ∅ + │ │ │ ├── arguments: + │ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22)) + │ │ │ │ ├── arguments: (length: 1) + │ │ │ │ │ └── @ CallNode (location: (40,18)-(40,22)) + │ │ │ │ │ ├── receiver: ∅ + │ │ │ │ │ ├── call_operator_loc: ∅ + │ │ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s" + │ │ │ │ │ ├── opening_loc: ∅ + │ │ │ │ │ ├── arguments: ∅ + │ │ │ │ │ ├── closing_loc: ∅ + │ │ │ │ │ ├── block: ∅ + │ │ │ │ │ ├── flags: variable_call + │ │ │ │ │ └── name: :to_s + │ │ │ │ └── flags: ∅ + │ │ │ ├── closing_loc: ∅ + │ │ │ ├── block: ∅ + │ │ │ ├── flags: ∅ + │ │ │ └── name: :=~ + │ │ └── targets: (length: 1) + │ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11)) + │ │ ├── name: :a + │ │ └── depth: 1 + │ ├── opening_loc: (40,4)-(40,5) = "{" + │ └── closing_loc: (40,23)-(40,24) = "}" ├── flags: ∅ - └── name: :=~ + └── name: :tap diff --git a/test/prism/snapshots/spanning_heredoc.txt b/test/prism/snapshots/spanning_heredoc.txt index f28aeb815a..ed9c03e608 100644 --- a/test/prism/snapshots/spanning_heredoc.txt +++ b/test/prism/snapshots/spanning_heredoc.txt @@ -352,4 +352,7 @@ │ ├── block: ∅ │ ├── flags: ∅ │ └── name: :=~ - └── locals: [:a] + └── targets: (length: 1) + └── @ LocalVariableTargetNode (location: (53,5)-(55,7)) + ├── name: :a + └── depth: 0 diff --git a/test/prism/snapshots/whitequark/lvar_injecting_match.txt b/test/prism/snapshots/whitequark/lvar_injecting_match.txt index 0d615cc11b..de6d5eb99a 100644 --- a/test/prism/snapshots/whitequark/lvar_injecting_match.txt +++ b/test/prism/snapshots/whitequark/lvar_injecting_match.txt @@ -30,7 +30,10 @@ │ │ ├── block: ∅ │ │ ├── flags: ∅ │ │ └── name: :=~ - │ └── locals: [:match] + │ └── targets: (length: 1) + │ └── @ LocalVariableTargetNode (location: (1,4)-(1,9)) + │ ├── name: :match + │ └── depth: 0 └── @ LocalVariableReadNode (location: (1,26)-(1,31)) ├── name: :match └── depth: 0 |