diff options
-rw-r--r-- | prism/prism.c | 110 | ||||
-rw-r--r-- | test/prism/fixtures/regex.txt | 4 | ||||
-rw-r--r-- | test/prism/snapshots/regex.txt | 187 |
3 files changed, 237 insertions, 64 deletions
diff --git a/prism/prism.c b/prism/prism.c index 9d9aec00d5..1cfcf704bb 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -1184,6 +1184,77 @@ token_is_setter_name(pm_token_t *token) { ); } +/** + * Returns true if the given local variable is a keyword. + */ +static bool +pm_local_is_keyword(const char *source, size_t length) { +#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true + + switch (length) { + case 2: + switch (source[0]) { + case 'd': KEYWORD("do"); return false; + case 'i': KEYWORD("if"); KEYWORD("in"); return false; + case 'o': KEYWORD("or"); return false; + default: return false; + } + case 3: + switch (source[0]) { + case 'a': KEYWORD("and"); return false; + case 'd': KEYWORD("def"); return false; + case 'e': KEYWORD("end"); return false; + case 'f': KEYWORD("for"); return false; + case 'n': KEYWORD("nil"); KEYWORD("not"); return false; + default: return false; + } + case 4: + switch (source[0]) { + case 'c': KEYWORD("case"); return false; + case 'e': KEYWORD("else"); return false; + case 'n': KEYWORD("next"); return false; + case 'r': KEYWORD("redo"); return false; + case 's': KEYWORD("self"); return false; + case 't': KEYWORD("then"); KEYWORD("true"); return false; + case 'w': KEYWORD("when"); return false; + default: return false; + } + case 5: + switch (source[0]) { + case 'a': KEYWORD("alias"); return false; + case 'b': KEYWORD("begin"); KEYWORD("break"); return false; + case 'c': KEYWORD("class"); return false; + case 'e': KEYWORD("elsif"); return false; + case 'f': KEYWORD("false"); return false; + case 'r': KEYWORD("retry"); return false; + case 's': KEYWORD("super"); return false; + case 'u': KEYWORD("undef"); KEYWORD("until"); return false; + case 'w': KEYWORD("while"); return false; + case 'y': KEYWORD("yield"); return false; + default: return false; + } + case 6: + switch (source[0]) { + case 'e': KEYWORD("ensure"); return false; + case 'm': KEYWORD("module"); return false; + case 'r': KEYWORD("rescue"); KEYWORD("return"); return false; + case 'u': KEYWORD("unless"); return false; + default: return false; + } + case 8: + KEYWORD("__LINE__"); + KEYWORD("__FILE__"); + return false; + case 12: + KEYWORD("__ENCODING__"); + return false; + default: + return false; + } + +#undef KEYWORD +} + /******************************************************************************/ /* Node flag handling functions */ /******************************************************************************/ @@ -10576,19 +10647,19 @@ parser_lex(pm_parser_t *parser) { pm_token_type_t type = lex_identifier(parser, previous_command_start); - // If we've hit a __END__ and it was at the start of the line or the - // start of the file and it is followed by either a \n or a \r\n, then - // this is the last token of the file. + // If we've hit a __END__ and it was at the start of the + // line or the start of the file and it is followed by + // either a \n or a \r\n, then this is the last token of the + // file. if ( ((parser->current.end - parser->current.start) == 7) && current_token_starts_line(parser) && (memcmp(parser->current.start, "__END__", 7) == 0) && (parser->current.end == parser->end || match_eol(parser)) - ) - { - // Since we know we're about to add an __END__ comment, we know we - // need to add all of the newlines to get the correct column - // information for it. + ) { + // Since we know we're about to add an __END__ comment, + // we know we need to add all of the newlines to get the + // correct column information for it. const uint8_t *cursor = parser->current.end; while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { pm_newline_list_append(&parser->newline_list, cursor++); @@ -18006,22 +18077,39 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const } } +/** + * Returns true if the name of the capture group is a valid local variable that + * can be written to. + */ static bool -name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) { +parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) { if (length == 0) { return false; } + // First ensure that it starts with a valid identifier starting character. size_t width = char_is_identifier_start(parser, source); if (!width) { return false; } - uint8_t *cursor = ((uint8_t *)source) + width; + // Next, ensure that it's not an uppercase character. + if (parser->encoding_changed) { + if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false; + } else { + if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false; + } + + // Next, iterate through all of the bytes of the string to ensure that they + // are all valid identifier characters. + const uint8_t *cursor = source + width; while (cursor < source + length && (width = char_is_identifier(parser, cursor))) { cursor += width; } + // Finally, validate that the identifier is not a keywor. + if (pm_local_is_keyword((const char *) source, length)) return false; + return cursor == source + length; } @@ -18051,7 +18139,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * // If the name of the capture group isn't a valid identifier, we do // not add it to the local table. - if (!name_is_identifier(parser, source, length)) continue; + if (!parse_regular_expression_named_capture(parser, source, length)) continue; if (content->type == PM_STRING_SHARED) { // If the unescaped string is a slice of the source, then we can diff --git a/test/prism/fixtures/regex.txt b/test/prism/fixtures/regex.txt index ef2f6d45a3..1010ffedc3 100644 --- a/test/prism/fixtures/regex.txt +++ b/test/prism/fixtures/regex.txt @@ -38,3 +38,7 @@ b>)/ =~ ""; ab a = 1 tap { /(?<a>)/ =~ to_s } + +/(?<foo>)/ =~ "" +/(?<Foo>)/ =~ "" +/(?<nil>)/ =~ "" diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt index d07ab8c5e7..44657260c5 100644 --- a/test/prism/snapshots/regex.txt +++ b/test/prism/snapshots/regex.txt @@ -1,8 +1,8 @@ -@ ProgramNode (location: (1,0)-(40,24)) +@ ProgramNode (location: (1,0)-(44,16)) ├── locals: [:foo, :ab, :abc, :a] └── statements: - @ StatementsNode (location: (1,0)-(40,24)) - └── body: (length: 21) + @ StatementsNode (location: (1,0)-(44,16)) + └── body: (length: 24) ├── @ CallNode (location: (1,0)-(1,9)) │ ├── flags: ignore_visibility │ ├── receiver: ∅ @@ -316,56 +316,137 @@ │ │ ├── flags: decimal │ │ └── value: 1 │ └── operator_loc: (39,2)-(39,3) = "=" - └── @ CallNode (location: (40,0)-(40,24)) - ├── flags: ignore_visibility - ├── receiver: ∅ + ├── @ CallNode (location: (40,0)-(40,24)) + │ ├── flags: ignore_visibility + │ ├── receiver: ∅ + │ ├── call_operator_loc: ∅ + │ ├── name: :tap + │ ├── message_loc: (40,0)-(40,3) = "tap" + │ ├── opening_loc: ∅ + │ ├── arguments: ∅ + │ ├── closing_loc: ∅ + │ └── block: + │ @ BlockNode (location: (40,4)-(40,24)) + │ ├── locals: [] + │ ├── parameters: ∅ + │ ├── body: + │ │ @ StatementsNode (location: (40,6)-(40,22)) + │ │ └── body: (length: 1) + │ │ └── @ MatchWriteNode (location: (40,6)-(40,22)) + │ │ ├── call: + │ │ │ @ CallNode (location: (40,6)-(40,22)) + │ │ │ ├── flags: ∅ + │ │ │ ├── receiver: + │ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14)) + │ │ │ │ ├── flags: forced_us_ascii_encoding + │ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/" + │ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)" + │ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/" + │ │ │ │ └── unescaped: "(?<a>)" + │ │ │ ├── call_operator_loc: ∅ + │ │ │ ├── name: :=~ + │ │ │ ├── message_loc: (40,15)-(40,17) = "=~" + │ │ │ ├── opening_loc: ∅ + │ │ │ ├── arguments: + │ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22)) + │ │ │ │ ├── flags: ∅ + │ │ │ │ └── arguments: (length: 1) + │ │ │ │ └── @ CallNode (location: (40,18)-(40,22)) + │ │ │ │ ├── flags: variable_call, ignore_visibility + │ │ │ │ ├── receiver: ∅ + │ │ │ │ ├── call_operator_loc: ∅ + │ │ │ │ ├── name: :to_s + │ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s" + │ │ │ │ ├── opening_loc: ∅ + │ │ │ │ ├── arguments: ∅ + │ │ │ │ ├── closing_loc: ∅ + │ │ │ │ └── block: ∅ + │ │ │ ├── closing_loc: ∅ + │ │ │ └── block: ∅ + │ │ └── targets: (length: 1) + │ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11)) + │ │ ├── name: :a + │ │ └── depth: 1 + │ ├── opening_loc: (40,4)-(40,5) = "{" + │ └── closing_loc: (40,23)-(40,24) = "}" + ├── @ MatchWriteNode (location: (42,0)-(42,16)) + │ ├── call: + │ │ @ CallNode (location: (42,0)-(42,16)) + │ │ ├── flags: ∅ + │ │ ├── receiver: + │ │ │ @ RegularExpressionNode (location: (42,0)-(42,10)) + │ │ │ ├── flags: forced_us_ascii_encoding + │ │ │ ├── opening_loc: (42,0)-(42,1) = "/" + │ │ │ ├── content_loc: (42,1)-(42,9) = "(?<foo>)" + │ │ │ ├── closing_loc: (42,9)-(42,10) = "/" + │ │ │ └── unescaped: "(?<foo>)" + │ │ ├── call_operator_loc: ∅ + │ │ ├── name: :=~ + │ │ ├── message_loc: (42,11)-(42,13) = "=~" + │ │ ├── opening_loc: ∅ + │ │ ├── arguments: + │ │ │ @ ArgumentsNode (location: (42,14)-(42,16)) + │ │ │ ├── flags: ∅ + │ │ │ └── arguments: (length: 1) + │ │ │ └── @ StringNode (location: (42,14)-(42,16)) + │ │ │ ├── flags: ∅ + │ │ │ ├── opening_loc: (42,14)-(42,15) = "\"" + │ │ │ ├── content_loc: (42,15)-(42,15) = "" + │ │ │ ├── closing_loc: (42,15)-(42,16) = "\"" + │ │ │ └── unescaped: "" + │ │ ├── closing_loc: ∅ + │ │ └── block: ∅ + │ └── targets: (length: 1) + │ └── @ LocalVariableTargetNode (location: (42,4)-(42,7)) + │ ├── name: :foo + │ └── depth: 0 + ├── @ CallNode (location: (43,0)-(43,16)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ RegularExpressionNode (location: (43,0)-(43,10)) + │ │ ├── flags: forced_us_ascii_encoding + │ │ ├── opening_loc: (43,0)-(43,1) = "/" + │ │ ├── content_loc: (43,1)-(43,9) = "(?<Foo>)" + │ │ ├── closing_loc: (43,9)-(43,10) = "/" + │ │ └── unescaped: "(?<Foo>)" + │ ├── call_operator_loc: ∅ + │ ├── name: :=~ + │ ├── message_loc: (43,11)-(43,13) = "=~" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (43,14)-(43,16)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ StringNode (location: (43,14)-(43,16)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (43,14)-(43,15) = "\"" + │ │ ├── content_loc: (43,15)-(43,15) = "" + │ │ ├── closing_loc: (43,15)-(43,16) = "\"" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + └── @ CallNode (location: (44,0)-(44,16)) + ├── flags: ∅ + ├── receiver: + │ @ RegularExpressionNode (location: (44,0)-(44,10)) + │ ├── flags: forced_us_ascii_encoding + │ ├── opening_loc: (44,0)-(44,1) = "/" + │ ├── content_loc: (44,1)-(44,9) = "(?<nil>)" + │ ├── closing_loc: (44,9)-(44,10) = "/" + │ └── unescaped: "(?<nil>)" ├── call_operator_loc: ∅ - ├── name: :tap - ├── message_loc: (40,0)-(40,3) = "tap" + ├── name: :=~ + ├── message_loc: (44,11)-(44,13) = "=~" ├── opening_loc: ∅ - ├── arguments: ∅ + ├── arguments: + │ @ ArgumentsNode (location: (44,14)-(44,16)) + │ ├── flags: ∅ + │ └── arguments: (length: 1) + │ └── @ StringNode (location: (44,14)-(44,16)) + │ ├── flags: ∅ + │ ├── opening_loc: (44,14)-(44,15) = "\"" + │ ├── content_loc: (44,15)-(44,15) = "" + │ ├── closing_loc: (44,15)-(44,16) = "\"" + │ └── unescaped: "" ├── closing_loc: ∅ - └── block: - @ BlockNode (location: (40,4)-(40,24)) - ├── locals: [] - ├── parameters: ∅ - ├── body: - │ @ StatementsNode (location: (40,6)-(40,22)) - │ └── body: (length: 1) - │ └── @ MatchWriteNode (location: (40,6)-(40,22)) - │ ├── call: - │ │ @ CallNode (location: (40,6)-(40,22)) - │ │ ├── flags: ∅ - │ │ ├── receiver: - │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14)) - │ │ │ ├── flags: forced_us_ascii_encoding - │ │ │ ├── opening_loc: (40,6)-(40,7) = "/" - │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)" - │ │ │ ├── closing_loc: (40,13)-(40,14) = "/" - │ │ │ └── unescaped: "(?<a>)" - │ │ ├── call_operator_loc: ∅ - │ │ ├── name: :=~ - │ │ ├── message_loc: (40,15)-(40,17) = "=~" - │ │ ├── opening_loc: ∅ - │ │ ├── arguments: - │ │ │ @ ArgumentsNode (location: (40,18)-(40,22)) - │ │ │ ├── flags: ∅ - │ │ │ └── arguments: (length: 1) - │ │ │ └── @ CallNode (location: (40,18)-(40,22)) - │ │ │ ├── flags: variable_call, ignore_visibility - │ │ │ ├── receiver: ∅ - │ │ │ ├── call_operator_loc: ∅ - │ │ │ ├── name: :to_s - │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s" - │ │ │ ├── opening_loc: ∅ - │ │ │ ├── arguments: ∅ - │ │ │ ├── closing_loc: ∅ - │ │ │ └── block: ∅ - │ │ ├── closing_loc: ∅ - │ │ └── block: ∅ - │ └── targets: (length: 1) - │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11)) - │ ├── name: :a - │ └── depth: 1 - ├── opening_loc: (40,4)-(40,5) = "{" - └── closing_loc: (40,23)-(40,24) = "}" + └── block: ∅ |