summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/prism.c110
-rw-r--r--test/prism/fixtures/regex.txt4
-rw-r--r--test/prism/snapshots/regex.txt187
3 files changed, 237 insertions, 64 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 9d9aec00d5..1cfcf704bb 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1184,6 +1184,77 @@ token_is_setter_name(pm_token_t *token) {
);
}
+/**
+ * Returns true if the given local variable is a keyword.
+ */
+static bool
+pm_local_is_keyword(const char *source, size_t length) {
+#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
+
+ switch (length) {
+ case 2:
+ switch (source[0]) {
+ case 'd': KEYWORD("do"); return false;
+ case 'i': KEYWORD("if"); KEYWORD("in"); return false;
+ case 'o': KEYWORD("or"); return false;
+ default: return false;
+ }
+ case 3:
+ switch (source[0]) {
+ case 'a': KEYWORD("and"); return false;
+ case 'd': KEYWORD("def"); return false;
+ case 'e': KEYWORD("end"); return false;
+ case 'f': KEYWORD("for"); return false;
+ case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
+ default: return false;
+ }
+ case 4:
+ switch (source[0]) {
+ case 'c': KEYWORD("case"); return false;
+ case 'e': KEYWORD("else"); return false;
+ case 'n': KEYWORD("next"); return false;
+ case 'r': KEYWORD("redo"); return false;
+ case 's': KEYWORD("self"); return false;
+ case 't': KEYWORD("then"); KEYWORD("true"); return false;
+ case 'w': KEYWORD("when"); return false;
+ default: return false;
+ }
+ case 5:
+ switch (source[0]) {
+ case 'a': KEYWORD("alias"); return false;
+ case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
+ case 'c': KEYWORD("class"); return false;
+ case 'e': KEYWORD("elsif"); return false;
+ case 'f': KEYWORD("false"); return false;
+ case 'r': KEYWORD("retry"); return false;
+ case 's': KEYWORD("super"); return false;
+ case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
+ case 'w': KEYWORD("while"); return false;
+ case 'y': KEYWORD("yield"); return false;
+ default: return false;
+ }
+ case 6:
+ switch (source[0]) {
+ case 'e': KEYWORD("ensure"); return false;
+ case 'm': KEYWORD("module"); return false;
+ case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
+ case 'u': KEYWORD("unless"); return false;
+ default: return false;
+ }
+ case 8:
+ KEYWORD("__LINE__");
+ KEYWORD("__FILE__");
+ return false;
+ case 12:
+ KEYWORD("__ENCODING__");
+ return false;
+ default:
+ return false;
+ }
+
+#undef KEYWORD
+}
+
/******************************************************************************/
/* Node flag handling functions */
/******************************************************************************/
@@ -10576,19 +10647,19 @@ parser_lex(pm_parser_t *parser) {
pm_token_type_t type = lex_identifier(parser, previous_command_start);
- // If we've hit a __END__ and it was at the start of the line or the
- // start of the file and it is followed by either a \n or a \r\n, then
- // this is the last token of the file.
+ // If we've hit a __END__ and it was at the start of the
+ // line or the start of the file and it is followed by
+ // either a \n or a \r\n, then this is the last token of the
+ // file.
if (
((parser->current.end - parser->current.start) == 7) &&
current_token_starts_line(parser) &&
(memcmp(parser->current.start, "__END__", 7) == 0) &&
(parser->current.end == parser->end || match_eol(parser))
- )
- {
- // Since we know we're about to add an __END__ comment, we know we
- // need to add all of the newlines to get the correct column
- // information for it.
+ ) {
+ // Since we know we're about to add an __END__ comment,
+ // we know we need to add all of the newlines to get the
+ // correct column information for it.
const uint8_t *cursor = parser->current.end;
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
pm_newline_list_append(&parser->newline_list, cursor++);
@@ -18006,22 +18077,39 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
}
}
+/**
+ * Returns true if the name of the capture group is a valid local variable that
+ * can be written to.
+ */
static bool
-name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
+parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
if (length == 0) {
return false;
}
+ // First ensure that it starts with a valid identifier starting character.
size_t width = char_is_identifier_start(parser, source);
if (!width) {
return false;
}
- uint8_t *cursor = ((uint8_t *)source) + width;
+ // Next, ensure that it's not an uppercase character.
+ if (parser->encoding_changed) {
+ if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
+ } else {
+ if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
+ }
+
+ // Next, iterate through all of the bytes of the string to ensure that they
+ // are all valid identifier characters.
+ const uint8_t *cursor = source + width;
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
cursor += width;
}
+ // Finally, validate that the identifier is not a keywor.
+ if (pm_local_is_keyword((const char *) source, length)) return false;
+
return cursor == source + length;
}
@@ -18051,7 +18139,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
// If the name of the capture group isn't a valid identifier, we do
// not add it to the local table.
- if (!name_is_identifier(parser, source, length)) continue;
+ if (!parse_regular_expression_named_capture(parser, source, length)) continue;
if (content->type == PM_STRING_SHARED) {
// If the unescaped string is a slice of the source, then we can
diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt
index ef2f6d45a3..1010ffedc3 100644
--- a/test/prism/fixtures/regex.txt
+++ b/test/prism/fixtures/regex.txt
@@ -38,3 +38,7 @@ b>)/ =~ ""; ab
a = 1
tap { /(?<a>)/ =~ to_s }
+
+/(?<foo>)/ =~ ""
+/(?<Foo>)/ =~ ""
+/(?<nil>)/ =~ ""
diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt
index d07ab8c5e7..44657260c5 100644
--- a/test/prism/snapshots/regex.txt
+++ b/test/prism/snapshots/regex.txt
@@ -1,8 +1,8 @@
-@ ProgramNode (location: (1,0)-(40,24))
+@ ProgramNode (location: (1,0)-(44,16))
├── locals: [:foo, :ab, :abc, :a]
└── statements:
- @ StatementsNode (location: (1,0)-(40,24))
- └── body: (length: 21)
+ @ StatementsNode (location: (1,0)-(44,16))
+ └── body: (length: 24)
├── @ CallNode (location: (1,0)-(1,9))
│ ├── flags: ignore_visibility
│ ├── receiver: ∅
@@ -316,56 +316,137 @@
│ │ ├── flags: decimal
│ │ └── value: 1
│ └── operator_loc: (39,2)-(39,3) = "="
- └── @ CallNode (location: (40,0)-(40,24))
- ├── flags: ignore_visibility
- ├── receiver: ∅
+ ├── @ CallNode (location: (40,0)-(40,24))
+ │ ├── flags: ignore_visibility
+ │ ├── receiver: ∅
+ │ ├── call_operator_loc: ∅
+ │ ├── name: :tap
+ │ ├── message_loc: (40,0)-(40,3) = "tap"
+ │ ├── opening_loc: ∅
+ │ ├── arguments: ∅
+ │ ├── closing_loc: ∅
+ │ └── block:
+ │ @ BlockNode (location: (40,4)-(40,24))
+ │ ├── locals: []
+ │ ├── parameters: ∅
+ │ ├── body:
+ │ │ @ StatementsNode (location: (40,6)-(40,22))
+ │ │ └── body: (length: 1)
+ │ │ └── @ MatchWriteNode (location: (40,6)-(40,22))
+ │ │ ├── call:
+ │ │ │ @ CallNode (location: (40,6)-(40,22))
+ │ │ │ ├── flags: ∅
+ │ │ │ ├── receiver:
+ │ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
+ │ │ │ │ ├── flags: forced_us_ascii_encoding
+ │ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
+ │ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
+ │ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
+ │ │ │ │ └── unescaped: "(?<a>)"
+ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ ├── name: :=~
+ │ │ │ ├── message_loc: (40,15)-(40,17) = "=~"
+ │ │ │ ├── opening_loc: ∅
+ │ │ │ ├── arguments:
+ │ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
+ │ │ │ │ ├── flags: ∅
+ │ │ │ │ └── arguments: (length: 1)
+ │ │ │ │ └── @ CallNode (location: (40,18)-(40,22))
+ │ │ │ │ ├── flags: variable_call, ignore_visibility
+ │ │ │ │ ├── receiver: ∅
+ │ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ │ ├── name: :to_s
+ │ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
+ │ │ │ │ ├── opening_loc: ∅
+ │ │ │ │ ├── arguments: ∅
+ │ │ │ │ ├── closing_loc: ∅
+ │ │ │ │ └── block: ∅
+ │ │ │ ├── closing_loc: ∅
+ │ │ │ └── block: ∅
+ │ │ └── targets: (length: 1)
+ │ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
+ │ │ ├── name: :a
+ │ │ └── depth: 1
+ │ ├── opening_loc: (40,4)-(40,5) = "{"
+ │ └── closing_loc: (40,23)-(40,24) = "}"
+ ├── @ MatchWriteNode (location: (42,0)-(42,16))
+ │ ├── call:
+ │ │ @ CallNode (location: (42,0)-(42,16))
+ │ │ ├── flags: ∅
+ │ │ ├── receiver:
+ │ │ │ @ RegularExpressionNode (location: (42,0)-(42,10))
+ │ │ │ ├── flags: forced_us_ascii_encoding
+ │ │ │ ├── opening_loc: (42,0)-(42,1) = "/"
+ │ │ │ ├── content_loc: (42,1)-(42,9) = "(?<foo>)"
+ │ │ │ ├── closing_loc: (42,9)-(42,10) = "/"
+ │ │ │ └── unescaped: "(?<foo>)"
+ │ │ ├── call_operator_loc: ∅
+ │ │ ├── name: :=~
+ │ │ ├── message_loc: (42,11)-(42,13) = "=~"
+ │ │ ├── opening_loc: ∅
+ │ │ ├── arguments:
+ │ │ │ @ ArgumentsNode (location: (42,14)-(42,16))
+ │ │ │ ├── flags: ∅
+ │ │ │ └── arguments: (length: 1)
+ │ │ │ └── @ StringNode (location: (42,14)-(42,16))
+ │ │ │ ├── flags: ∅
+ │ │ │ ├── opening_loc: (42,14)-(42,15) = "\""
+ │ │ │ ├── content_loc: (42,15)-(42,15) = ""
+ │ │ │ ├── closing_loc: (42,15)-(42,16) = "\""
+ │ │ │ └── unescaped: ""
+ │ │ ├── closing_loc: ∅
+ │ │ └── block: ∅
+ │ └── targets: (length: 1)
+ │ └── @ LocalVariableTargetNode (location: (42,4)-(42,7))
+ │ ├── name: :foo
+ │ └── depth: 0
+ ├── @ CallNode (location: (43,0)-(43,16))
+ │ ├── flags: ∅
+ │ ├── receiver:
+ │ │ @ RegularExpressionNode (location: (43,0)-(43,10))
+ │ │ ├── flags: forced_us_ascii_encoding
+ │ │ ├── opening_loc: (43,0)-(43,1) = "/"
+ │ │ ├── content_loc: (43,1)-(43,9) = "(?<Foo>)"
+ │ │ ├── closing_loc: (43,9)-(43,10) = "/"
+ │ │ └── unescaped: "(?<Foo>)"
+ │ ├── call_operator_loc: ∅
+ │ ├── name: :=~
+ │ ├── message_loc: (43,11)-(43,13) = "=~"
+ │ ├── opening_loc: ∅
+ │ ├── arguments:
+ │ │ @ ArgumentsNode (location: (43,14)-(43,16))
+ │ │ ├── flags: ∅
+ │ │ └── arguments: (length: 1)
+ │ │ └── @ StringNode (location: (43,14)-(43,16))
+ │ │ ├── flags: ∅
+ │ │ ├── opening_loc: (43,14)-(43,15) = "\""
+ │ │ ├── content_loc: (43,15)-(43,15) = ""
+ │ │ ├── closing_loc: (43,15)-(43,16) = "\""
+ │ │ └── unescaped: ""
+ │ ├── closing_loc: ∅
+ │ └── block: ∅
+ └── @ CallNode (location: (44,0)-(44,16))
+ ├── flags: ∅
+ ├── receiver:
+ │ @ RegularExpressionNode (location: (44,0)-(44,10))
+ │ ├── flags: forced_us_ascii_encoding
+ │ ├── opening_loc: (44,0)-(44,1) = "/"
+ │ ├── content_loc: (44,1)-(44,9) = "(?<nil>)"
+ │ ├── closing_loc: (44,9)-(44,10) = "/"
+ │ └── unescaped: "(?<nil>)"
├── call_operator_loc: ∅
- ├── name: :tap
- ├── message_loc: (40,0)-(40,3) = "tap"
+ ├── name: :=~
+ ├── message_loc: (44,11)-(44,13) = "=~"
├── opening_loc: ∅
- ├── arguments: ∅
+ ├── arguments:
+ │ @ ArgumentsNode (location: (44,14)-(44,16))
+ │ ├── flags: ∅
+ │ └── arguments: (length: 1)
+ │ └── @ StringNode (location: (44,14)-(44,16))
+ │ ├── flags: ∅
+ │ ├── opening_loc: (44,14)-(44,15) = "\""
+ │ ├── content_loc: (44,15)-(44,15) = ""
+ │ ├── closing_loc: (44,15)-(44,16) = "\""
+ │ └── unescaped: ""
├── closing_loc: ∅
- └── block:
- @ BlockNode (location: (40,4)-(40,24))
- ├── locals: []
- ├── parameters: ∅
- ├── body:
- │ @ StatementsNode (location: (40,6)-(40,22))
- │ └── body: (length: 1)
- │ └── @ MatchWriteNode (location: (40,6)-(40,22))
- │ ├── call:
- │ │ @ CallNode (location: (40,6)-(40,22))
- │ │ ├── flags: ∅
- │ │ ├── receiver:
- │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
- │ │ │ ├── flags: forced_us_ascii_encoding
- │ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
- │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
- │ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
- │ │ │ └── unescaped: "(?<a>)"
- │ │ ├── call_operator_loc: ∅
- │ │ ├── name: :=~
- │ │ ├── message_loc: (40,15)-(40,17) = "=~"
- │ │ ├── opening_loc: ∅
- │ │ ├── arguments:
- │ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
- │ │ │ ├── flags: ∅
- │ │ │ └── arguments: (length: 1)
- │ │ │ └── @ CallNode (location: (40,18)-(40,22))
- │ │ │ ├── flags: variable_call, ignore_visibility
- │ │ │ ├── receiver: ∅
- │ │ │ ├── call_operator_loc: ∅
- │ │ │ ├── name: :to_s
- │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
- │ │ │ ├── opening_loc: ∅
- │ │ │ ├── arguments: ∅
- │ │ │ ├── closing_loc: ∅
- │ │ │ └── block: ∅
- │ │ ├── closing_loc: ∅
- │ │ └── block: ∅
- │ └── targets: (length: 1)
- │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
- │ ├── name: :a
- │ └── depth: 1
- ├── opening_loc: (40,4)-(40,5) = "{"
- └── closing_loc: (40,23)-(40,24) = "}"
+ └── block: ∅