summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-09 22:59:35 -0500
committerKevin Newton <kddnewton@gmail.com>2023-11-20 18:00:44 -0500
commite269096d15a4488a8ae34aca38752989e61304c7 (patch)
tree7e92471855d8210d9b3d623437a664d8f378ef63
parentfa547cd70243710e90bf57377402cbbdf910efa3 (diff)
[ruby/prism] Replace match write locals with match write targets
https://github.com/ruby/prism/commit/eec1862967
-rw-r--r--prism/config.yml4
-rw-r--r--prism/prism.c109
-rw-r--r--test/prism/fixtures/regex.txt3
-rw-r--r--test/prism/snapshots/regex.txt126
-rw-r--r--test/prism/snapshots/spanning_heredoc.txt5
-rw-r--r--test/prism/snapshots/whitequark/lvar_injecting_match.txt5
6 files changed, 186 insertions, 66 deletions
diff --git a/prism/config.yml b/prism/config.yml
index 6f12a51344..052901734a 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -1880,8 +1880,8 @@ nodes:
- name: call
type: node
kind: CallNode
- - name: locals
- type: constant[]
+ - name: targets
+ type: node[]
comment: |
Represents writing local variables using a regular expression match with
named capture groups.
diff --git a/prism/prism.c b/prism/prism.c
index f041fb5167..84a28b9345 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -4028,26 +4028,41 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
}
/**
- * Allocate and initialize a new LocalVariableTargetNode node.
+ * Allocate and initialize a new LocalVariableTargetNode node with the given
+ * name and depth.
*/
static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
+pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
- pm_refute_numbered_parameter(parser, name->start, name->end);
*node = (pm_local_variable_target_node_t) {
{
.type = PM_LOCAL_VARIABLE_TARGET_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name)
+ .location = *location
},
- .name = pm_parser_constant_id_token(parser, name),
- .depth = 0
+ .name = name,
+ .depth = depth
};
return node;
}
/**
+ * Allocate and initialize a new LocalVariableTargetNode node.
+ */
+static pm_local_variable_target_node_t *
+pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
+ pm_refute_numbered_parameter(parser, name->start, name->end);
+
+ return pm_local_variable_target_node_create_values(
+ parser,
+ &(pm_location_t) { .start = name->start, .end = name->end },
+ pm_parser_constant_id_token(parser, name),
+ 0
+ );
+}
+
+/**
* Allocate and initialize a new MatchPredicateNode node.
*/
static pm_match_predicate_node_t *
@@ -4109,10 +4124,10 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
.type = PM_MATCH_WRITE_NODE,
.location = call->base.location
},
- .call = call
+ .call = call,
+ .targets = { 0 }
};
- pm_constant_id_list_init(&node->locals);
return node;
}
@@ -5694,17 +5709,16 @@ pm_parser_scope_push_transparent(pm_parser_t *parser) {
}
/**
- * Check if the current scope has a given local variables.
+ * Check if any of the currently visible scopes contain a local variable
+ * described by the given constant id.
*/
static int
-pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
- pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, token);
+pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
pm_scope_t *scope = parser->current_scope;
int depth = 0;
while (scope != NULL) {
- if (!scope->transparent &&
- pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
+ if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
if (scope->closed) break;
scope = scope->previous;
@@ -5715,6 +5729,16 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
}
/**
+ * Check if any of the currently visible scopes contain a local variable
+ * described by the given token. This function implicitly inserts a constant
+ * into the constant pool.
+ */
+static inline int
+pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
+ return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
+}
+
+/**
* Add a constant id to the local table of the current scope.
*/
static inline void
@@ -15766,37 +15790,40 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
pm_node_t *result;
if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
- // Since we should not create a MatchWriteNode when all capture names are invalid,
- // creating a MatchWriteNode is delayed here.
+ // Since we should not create a MatchWriteNode when all capture names
+ // are invalid, creating a MatchWriteNode is delayed here.
pm_match_write_node_t *match = NULL;
+ pm_constant_id_list_t names = { 0 };
for (size_t index = 0; index < named_captures.length; index++) {
- pm_string_t *name = &named_captures.strings[index];
+ pm_string_t *string = &named_captures.strings[index];
- const uint8_t *source = pm_string_source(name);
- size_t length = pm_string_length(name);
+ const uint8_t *source = pm_string_source(string);
+ size_t length = pm_string_length(string);
- if (!name_is_identifier(parser, source, length)) {
- continue;
- }
+ pm_location_t location;
+ pm_constant_id_t name;
+
+ // If the name of the capture group isn't a valid identifier, we do
+ // not add it to the local table.
+ if (!name_is_identifier(parser, source, length)) continue;
- pm_constant_id_t local;
if (content->type == PM_STRING_SHARED) {
// If the unescaped string is a slice of the source, then we can
// copy the names directly. The pointers will line up.
- local = pm_parser_local_add_location(parser, source, source + length);
+ location = (pm_location_t) { .start = source, .end = source + length };
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
pm_refute_numbered_parameter(parser, source, source + length);
} else {
// Otherwise, the name is a slice of the malloc-ed owned string,
// in which case we need to copy it out into a new string.
+ location = call->receiver->location;
+
void *memory = malloc(length);
if (memory == NULL) abort();
memcpy(memory, source, length);
-
- // This silences clang analyzer warning about leak of memory pointed by `memory`.
- // NOLINTNEXTLINE(clang-analyzer-*)
- local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
+ name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
if (pm_token_is_numbered_parameter(source, source + length)) {
const pm_location_t *location = &call->receiver->location;
@@ -15804,15 +15831,27 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
}
}
- if (match == NULL) {
- match = pm_match_write_node_create(parser, call);
- }
+ if (name != 0) {
+ // We dont want to create duplicate targets if the capture name
+ // is duplicated.
+ if (pm_constant_id_list_includes(&names, name)) continue;
+ pm_constant_id_list_append(&names, name);
- if (pm_constant_id_list_includes(&match->locals, local)) {
- continue;
- }
+ // Here we lazily create the MatchWriteNode since we know we're
+ // about to add a target.
+ if (match == NULL) match = pm_match_write_node_create(parser, call);
- pm_constant_id_list_append(&match->locals, local);
+ // First, find the depth of the local that is being assigned.
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
+ pm_parser_local_add(parser, name);
+ }
+
+ // Next, create the local variable target and add it to the
+ // list of targets for the match.
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
+ pm_node_list_append(&match->targets, target);
+ }
}
if (match != NULL) {
@@ -15820,6 +15859,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
} else {
result = (pm_node_t *) call;
}
+
+ pm_constant_id_list_free(&names);
} else {
result = (pm_node_t *) call;
}
diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt
index c27e022d01..ef2f6d45a3 100644
--- a/test/prism/fixtures/regex.txt
+++ b/test/prism/fixtures/regex.txt
@@ -35,3 +35,6 @@ b>)/ =~ ""; ab
/(?<abc>)(?<abc>)/ =~ ""; abc
/(?<a b>)/ =~ ""
+
+a = 1
+tap { /(?<a>)/ =~ to_s }
diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt
index 722006c998..b69e5cc31e 100644
--- a/test/prism/snapshots/regex.txt
+++ b/test/prism/snapshots/regex.txt
@@ -1,8 +1,8 @@
-@ ProgramNode (location: (1,0)-(37,16))
-├── locals: [:foo, :ab, :abc]
+@ ProgramNode (location: (1,0)-(40,24))
+├── locals: [:foo, :ab, :abc, :a]
└── statements:
- @ StatementsNode (location: (1,0)-(37,16))
- └── body: (length: 19)
+ @ StatementsNode (location: (1,0)-(40,24))
+ └── body: (length: 21)
├── @ CallNode (location: (1,0)-(1,9))
│ ├── receiver: ∅
│ ├── call_operator_loc: ∅
@@ -116,7 +116,10 @@
│ │ │ │ ├── block: ∅
│ │ │ │ ├── flags: ∅
│ │ │ │ └── name: :=~
- │ │ │ └── locals: [:foo]
+ │ │ │ └── targets: (length: 1)
+ │ │ │ └── @ LocalVariableTargetNode (location: (11,5)-(11,8))
+ │ │ │ ├── name: :foo
+ │ │ │ └── depth: 0
│ │ └── @ LocalVariableReadNode (location: (11,23)-(11,26))
│ │ ├── name: :foo
│ │ └── depth: 0
@@ -237,7 +240,10 @@
│ │ ├── block: ∅
│ │ ├── flags: ∅
│ │ └── name: :=~
- │ └── locals: [:ab]
+ │ └── targets: (length: 1)
+ │ └── @ LocalVariableTargetNode (location: (32,0)-(33,4))
+ │ ├── name: :ab
+ │ └── depth: 0
├── @ LocalVariableReadNode (location: (33,12)-(33,14))
│ ├── name: :ab
│ └── depth: 0
@@ -268,32 +274,96 @@
│ │ ├── block: ∅
│ │ ├── flags: ∅
│ │ └── name: :=~
- │ └── locals: [:abc]
+ │ └── targets: (length: 1)
+ │ └── @ LocalVariableTargetNode (location: (35,4)-(35,7))
+ │ ├── name: :abc
+ │ └── depth: 0
├── @ LocalVariableReadNode (location: (35,26)-(35,29))
│ ├── name: :abc
│ └── depth: 0
- └── @ CallNode (location: (37,0)-(37,16))
- ├── receiver:
- │ @ RegularExpressionNode (location: (37,0)-(37,10))
- │ ├── opening_loc: (37,0)-(37,1) = "/"
- │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)"
- │ ├── closing_loc: (37,9)-(37,10) = "/"
- │ ├── unescaped: "(?<a b>)"
- │ └── flags: ∅
+ ├── @ CallNode (location: (37,0)-(37,16))
+ │ ├── receiver:
+ │ │ @ RegularExpressionNode (location: (37,0)-(37,10))
+ │ │ ├── opening_loc: (37,0)-(37,1) = "/"
+ │ │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)"
+ │ │ ├── closing_loc: (37,9)-(37,10) = "/"
+ │ │ ├── unescaped: "(?<a b>)"
+ │ │ └── flags: ∅
+ │ ├── call_operator_loc: ∅
+ │ ├── message_loc: (37,11)-(37,13) = "=~"
+ │ ├── opening_loc: ∅
+ │ ├── arguments:
+ │ │ @ ArgumentsNode (location: (37,14)-(37,16))
+ │ │ ├── arguments: (length: 1)
+ │ │ │ └── @ StringNode (location: (37,14)-(37,16))
+ │ │ │ ├── flags: ∅
+ │ │ │ ├── opening_loc: (37,14)-(37,15) = "\""
+ │ │ │ ├── content_loc: (37,15)-(37,15) = ""
+ │ │ │ ├── closing_loc: (37,15)-(37,16) = "\""
+ │ │ │ └── unescaped: ""
+ │ │ └── flags: ∅
+ │ ├── closing_loc: ∅
+ │ ├── block: ∅
+ │ ├── flags: ∅
+ │ └── name: :=~
+ ├── @ LocalVariableWriteNode (location: (39,0)-(39,5))
+ │ ├── name: :a
+ │ ├── depth: 0
+ │ ├── name_loc: (39,0)-(39,1) = "a"
+ │ ├── value:
+ │ │ @ IntegerNode (location: (39,4)-(39,5))
+ │ │ └── flags: decimal
+ │ └── operator_loc: (39,2)-(39,3) = "="
+ └── @ CallNode (location: (40,0)-(40,24))
+ ├── receiver: ∅
├── call_operator_loc: ∅
- ├── message_loc: (37,11)-(37,13) = "=~"
+ ├── message_loc: (40,0)-(40,3) = "tap"
├── opening_loc: ∅
- ├── arguments:
- │ @ ArgumentsNode (location: (37,14)-(37,16))
- │ ├── arguments: (length: 1)
- │ │ └── @ StringNode (location: (37,14)-(37,16))
- │ │ ├── flags: ∅
- │ │ ├── opening_loc: (37,14)-(37,15) = "\""
- │ │ ├── content_loc: (37,15)-(37,15) = ""
- │ │ ├── closing_loc: (37,15)-(37,16) = "\""
- │ │ └── unescaped: ""
- │ └── flags: ∅
+ ├── arguments: ∅
├── closing_loc: ∅
- ├── block: ∅
+ ├── block:
+ │ @ BlockNode (location: (40,4)-(40,24))
+ │ ├── locals: []
+ │ ├── parameters: ∅
+ │ ├── body:
+ │ │ @ StatementsNode (location: (40,6)-(40,22))
+ │ │ └── body: (length: 1)
+ │ │ └── @ MatchWriteNode (location: (40,6)-(40,22))
+ │ │ ├── call:
+ │ │ │ @ CallNode (location: (40,6)-(40,22))
+ │ │ │ ├── receiver:
+ │ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
+ │ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
+ │ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
+ │ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
+ │ │ │ │ ├── unescaped: "(?<a>)"
+ │ │ │ │ └── flags: ∅
+ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ ├── message_loc: (40,15)-(40,17) = "=~"
+ │ │ │ ├── opening_loc: ∅
+ │ │ │ ├── arguments:
+ │ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
+ │ │ │ │ ├── arguments: (length: 1)
+ │ │ │ │ │ └── @ CallNode (location: (40,18)-(40,22))
+ │ │ │ │ │ ├── receiver: ∅
+ │ │ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
+ │ │ │ │ │ ├── opening_loc: ∅
+ │ │ │ │ │ ├── arguments: ∅
+ │ │ │ │ │ ├── closing_loc: ∅
+ │ │ │ │ │ ├── block: ∅
+ │ │ │ │ │ ├── flags: variable_call
+ │ │ │ │ │ └── name: :to_s
+ │ │ │ │ └── flags: ∅
+ │ │ │ ├── closing_loc: ∅
+ │ │ │ ├── block: ∅
+ │ │ │ ├── flags: ∅
+ │ │ │ └── name: :=~
+ │ │ └── targets: (length: 1)
+ │ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
+ │ │ ├── name: :a
+ │ │ └── depth: 1
+ │ ├── opening_loc: (40,4)-(40,5) = "{"
+ │ └── closing_loc: (40,23)-(40,24) = "}"
├── flags: ∅
- └── name: :=~
+ └── name: :tap
diff --git a/test/prism/snapshots/spanning_heredoc.txt b/test/prism/snapshots/spanning_heredoc.txt
index f28aeb815a..ed9c03e608 100644
--- a/test/prism/snapshots/spanning_heredoc.txt
+++ b/test/prism/snapshots/spanning_heredoc.txt
@@ -352,4 +352,7 @@
│ ├── block: ∅
│ ├── flags: ∅
│ └── name: :=~
- └── locals: [:a]
+ └── targets: (length: 1)
+ └── @ LocalVariableTargetNode (location: (53,5)-(55,7))
+ ├── name: :a
+ └── depth: 0
diff --git a/test/prism/snapshots/whitequark/lvar_injecting_match.txt b/test/prism/snapshots/whitequark/lvar_injecting_match.txt
index 0d615cc11b..de6d5eb99a 100644
--- a/test/prism/snapshots/whitequark/lvar_injecting_match.txt
+++ b/test/prism/snapshots/whitequark/lvar_injecting_match.txt
@@ -30,7 +30,10 @@
│ │ ├── block: ∅
│ │ ├── flags: ∅
│ │ └── name: :=~
- │ └── locals: [:match]
+ │ └── targets: (length: 1)
+ │ └── @ LocalVariableTargetNode (location: (1,4)-(1,9))
+ │ ├── name: :match
+ │ └── depth: 0
└── @ LocalVariableReadNode (location: (1,26)-(1,31))
├── name: :match
└── depth: 0