summaryrefslogtreecommitdiff
path: root/prism
diff options
context:
space:
mode:
Diffstat (limited to 'prism')
-rw-r--r--prism/api_pack.c9
-rw-r--r--prism/config.yml2373
-rw-r--r--prism/defines.h168
-rw-r--r--prism/diagnostic.c358
-rw-r--r--prism/diagnostic.h300
-rw-r--r--prism/encoding.c592
-rw-r--r--prism/encoding.h41
-rw-r--r--prism/extension.c1013
-rw-r--r--prism/extension.h11
-rw-r--r--prism/node.h106
-rw-r--r--prism/options.c163
-rw-r--r--prism/options.h298
-rw-r--r--prism/pack.c50
-rw-r--r--prism/pack.h11
-rw-r--r--prism/parser.h332
-rw-r--r--prism/prettyprint.h8
-rw-r--r--prism/prism.c13457
-rw-r--r--prism/prism.h163
-rw-r--r--prism/regexp.c298
-rw-r--r--prism/regexp.h26
-rw-r--r--prism/srcs.mk150
-rw-r--r--prism/srcs.mk.in48
-rw-r--r--prism/static_literals.c617
-rw-r--r--prism/static_literals.h121
-rw-r--r--prism/templates/ext/prism/api_node.c.erb217
-rw-r--r--prism/templates/include/prism/ast.h.erb100
-rw-r--r--prism/templates/include/prism/diagnostic.h.erb130
-rw-r--r--prism/templates/lib/prism/compiler.rb.erb8
-rw-r--r--prism/templates/lib/prism/dispatcher.rb.erb16
-rw-r--r--prism/templates/lib/prism/dot_visitor.rb.erb27
-rw-r--r--prism/templates/lib/prism/dsl.rb.erb120
-rw-r--r--prism/templates/lib/prism/inspect_visitor.rb.erb131
-rw-r--r--prism/templates/lib/prism/mutation_compiler.rb.erb4
-rw-r--r--prism/templates/lib/prism/node.rb.erb507
-rw-r--r--prism/templates/lib/prism/reflection.rb.erb136
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb618
-rw-r--r--prism/templates/lib/prism/visitor.rb.erb11
-rw-r--r--prism/templates/src/diagnostic.c.erb526
-rw-r--r--prism/templates/src/node.c.erb341
-rw-r--r--prism/templates/src/prettyprint.c.erb123
-rw-r--r--prism/templates/src/serialize.c.erb122
-rw-r--r--prism/templates/src/token_type.c.erb357
-rwxr-xr-xprism/templates/template.rb936
-rw-r--r--prism/util/pm_buffer.c207
-rw-r--r--prism/util/pm_buffer.h85
-rw-r--r--prism/util/pm_char.c10
-rw-r--r--prism/util/pm_char.h3
-rw-r--r--prism/util/pm_constant_pool.c85
-rw-r--r--prism/util/pm_constant_pool.h38
-rw-r--r--prism/util/pm_integer.c670
-rw-r--r--prism/util/pm_integer.h130
-rw-r--r--prism/util/pm_list.c2
-rw-r--r--prism/util/pm_list.h8
-rw-r--r--prism/util/pm_newline_list.c57
-rw-r--r--prism/util/pm_newline_list.h26
-rw-r--r--prism/util/pm_state_stack.c25
-rw-r--r--prism/util/pm_state_stack.h42
-rw-r--r--prism/util/pm_string.c259
-rw-r--r--prism/util/pm_string.h70
-rw-r--r--prism/util/pm_string_list.c28
-rw-r--r--prism/util/pm_string_list.h44
-rw-r--r--prism/util/pm_strncasecmp.c14
-rw-r--r--prism/util/pm_strpbrk.c162
-rw-r--r--prism/util/pm_strpbrk.h5
-rw-r--r--prism/version.h6
65 files changed, 19736 insertions, 7383 deletions
diff --git a/prism/api_pack.c b/prism/api_pack.c
index c9f0b18a39..98509ae65c 100644
--- a/prism/api_pack.c
+++ b/prism/api_pack.c
@@ -1,5 +1,12 @@
#include "prism/extension.h"
+#ifdef PRISM_EXCLUDE_PACK
+
+void
+Init_prism_pack(void) {}
+
+#else
+
static VALUE rb_cPrism;
static VALUE rb_cPrismPack;
static VALUE rb_cPrismPackDirective;
@@ -265,3 +272,5 @@ Init_prism_pack(void) {
pack_symbol = ID2SYM(rb_intern("pack"));
unpack_symbol = ID2SYM(rb_intern("unpack"));
}
+
+#endif
diff --git a/prism/config.yml b/prism/config.yml
index 748729ec6b..4e5b077a35 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -1,11 +1,368 @@
+errors:
+ - ALIAS_ARGUMENT
+ - ALIAS_ARGUMENT_NUMBERED_REFERENCE
+ - AMPAMPEQ_MULTI_ASSIGN
+ - ARGUMENT_AFTER_BLOCK
+ - ARGUMENT_AFTER_FORWARDING_ELLIPSES
+ - ARGUMENT_BARE_HASH
+ - ARGUMENT_BLOCK_FORWARDING
+ - ARGUMENT_BLOCK_MULTI
+ - ARGUMENT_CONFLICT_AMPERSAND
+ - ARGUMENT_CONFLICT_STAR
+ - ARGUMENT_CONFLICT_STAR_STAR
+ - ARGUMENT_FORMAL_CLASS
+ - ARGUMENT_FORMAL_CONSTANT
+ - ARGUMENT_FORMAL_GLOBAL
+ - ARGUMENT_FORMAL_IVAR
+ - ARGUMENT_FORWARDING_UNBOUND
+ - ARGUMENT_NO_FORWARDING_AMPERSAND
+ - ARGUMENT_NO_FORWARDING_ELLIPSES
+ - ARGUMENT_NO_FORWARDING_STAR
+ - ARGUMENT_NO_FORWARDING_STAR_STAR
+ - ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT
+ - ARGUMENT_SPLAT_AFTER_SPLAT
+ - ARGUMENT_TERM_PAREN
+ - ARGUMENT_UNEXPECTED_BLOCK
+ - ARRAY_ELEMENT
+ - ARRAY_EXPRESSION
+ - ARRAY_EXPRESSION_AFTER_STAR
+ - ARRAY_SEPARATOR
+ - ARRAY_TERM
+ - BEGIN_LONELY_ELSE
+ - BEGIN_TERM
+ - BEGIN_UPCASE_BRACE
+ - BEGIN_UPCASE_TERM
+ - BEGIN_UPCASE_TOPLEVEL
+ - BLOCK_PARAM_LOCAL_VARIABLE
+ - BLOCK_PARAM_PIPE_TERM
+ - BLOCK_TERM_BRACE
+ - BLOCK_TERM_END
+ - CANNOT_PARSE_EXPRESSION
+ - CANNOT_PARSE_STRING_PART
+ - CASE_EXPRESSION_AFTER_CASE
+ - CASE_EXPRESSION_AFTER_WHEN
+ - CASE_MATCH_MISSING_PREDICATE
+ - CASE_MISSING_CONDITIONS
+ - CASE_TERM
+ - CLASS_IN_METHOD
+ - CLASS_NAME
+ - CLASS_SUPERCLASS
+ - CLASS_TERM
+ - CLASS_UNEXPECTED_END
+ - CLASS_VARIABLE_BARE
+ - CONDITIONAL_ELSIF_PREDICATE
+ - CONDITIONAL_IF_PREDICATE
+ - CONDITIONAL_PREDICATE_TERM
+ - CONDITIONAL_TERM
+ - CONDITIONAL_TERM_ELSE
+ - CONDITIONAL_UNLESS_PREDICATE
+ - CONDITIONAL_UNTIL_PREDICATE
+ - CONDITIONAL_WHILE_PREDICATE
+ - CONSTANT_PATH_COLON_COLON_CONSTANT
+ - DEF_ENDLESS
+ - DEF_ENDLESS_PARAMETERS
+ - DEF_ENDLESS_SETTER
+ - DEF_NAME
+ - DEF_PARAMS_TERM
+ - DEF_PARAMS_TERM_PAREN
+ - DEF_RECEIVER
+ - DEF_RECEIVER_TERM
+ - DEF_TERM
+ - DEFINED_EXPRESSION
+ - EMBDOC_TERM
+ - EMBEXPR_END
+ - EMBVAR_INVALID
+ - END_UPCASE_BRACE
+ - END_UPCASE_TERM
+ - ESCAPE_INVALID_CONTROL
+ - ESCAPE_INVALID_CONTROL_REPEAT
+ - ESCAPE_INVALID_HEXADECIMAL
+ - ESCAPE_INVALID_META
+ - ESCAPE_INVALID_META_REPEAT
+ - ESCAPE_INVALID_UNICODE
+ - ESCAPE_INVALID_UNICODE_CM_FLAGS
+ - ESCAPE_INVALID_UNICODE_LIST
+ - ESCAPE_INVALID_UNICODE_LITERAL
+ - ESCAPE_INVALID_UNICODE_LONG
+ - ESCAPE_INVALID_UNICODE_SHORT
+ - ESCAPE_INVALID_UNICODE_TERM
+ - EXPECT_ARGUMENT
+ - EXPECT_EOL_AFTER_STATEMENT
+ - EXPECT_EXPRESSION_AFTER_AMPAMPEQ
+ - EXPECT_EXPRESSION_AFTER_COMMA
+ - EXPECT_EXPRESSION_AFTER_EQUAL
+ - EXPECT_EXPRESSION_AFTER_LESS_LESS
+ - EXPECT_EXPRESSION_AFTER_LPAREN
+ - EXPECT_EXPRESSION_AFTER_OPERATOR
+ - EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ
+ - EXPECT_EXPRESSION_AFTER_QUESTION
+ - EXPECT_EXPRESSION_AFTER_SPLAT
+ - EXPECT_EXPRESSION_AFTER_SPLAT_HASH
+ - EXPECT_EXPRESSION_AFTER_STAR
+ - EXPECT_FOR_DELIMITER
+ - EXPECT_IDENT_REQ_PARAMETER
+ - EXPECT_IN_DELIMITER
+ - EXPECT_LPAREN_AFTER_NOT_LPAREN
+ - EXPECT_LPAREN_AFTER_NOT_OTHER
+ - EXPECT_LPAREN_REQ_PARAMETER
+ - EXPECT_MESSAGE
+ - EXPECT_RBRACKET
+ - EXPECT_RPAREN
+ - EXPECT_RPAREN_AFTER_MULTI
+ - EXPECT_RPAREN_REQ_PARAMETER
+ - EXPECT_SINGLETON_CLASS_DELIMITER
+ - EXPECT_STRING_CONTENT
+ - EXPECT_WHEN_DELIMITER
+ - EXPRESSION_BARE_HASH
+ - EXPRESSION_NOT_WRITABLE
+ - EXPRESSION_NOT_WRITABLE_ENCODING
+ - EXPRESSION_NOT_WRITABLE_FALSE
+ - EXPRESSION_NOT_WRITABLE_FILE
+ - EXPRESSION_NOT_WRITABLE_LINE
+ - EXPRESSION_NOT_WRITABLE_NIL
+ - EXPRESSION_NOT_WRITABLE_NUMBERED
+ - EXPRESSION_NOT_WRITABLE_SELF
+ - EXPRESSION_NOT_WRITABLE_TRUE
+ - FLOAT_PARSE
+ - FOR_COLLECTION
+ - FOR_IN
+ - FOR_INDEX
+ - FOR_TERM
+ - GLOBAL_VARIABLE_BARE
+ - HASH_EXPRESSION_AFTER_LABEL
+ - HASH_KEY
+ - HASH_ROCKET
+ - HASH_TERM
+ - HASH_VALUE
+ - HEREDOC_IDENTIFIER
+ - HEREDOC_TERM
+ - INCOMPLETE_QUESTION_MARK
+ - INCOMPLETE_VARIABLE_CLASS
+ - INCOMPLETE_VARIABLE_CLASS_3_3
+ - INCOMPLETE_VARIABLE_INSTANCE
+ - INCOMPLETE_VARIABLE_INSTANCE_3_3
+ - INSTANCE_VARIABLE_BARE
+ - INVALID_BLOCK_EXIT
+ - INVALID_CHARACTER
+ - INVALID_COMMA
+ - INVALID_ENCODING_MAGIC_COMMENT
+ - INVALID_ESCAPE_CHARACTER
+ - INVALID_FLOAT_EXPONENT
+ - INVALID_LOCAL_VARIABLE_READ
+ - INVALID_LOCAL_VARIABLE_WRITE
+ - INVALID_MULTIBYTE_CHAR
+ - INVALID_MULTIBYTE_CHARACTER
+ - INVALID_MULTIBYTE_ESCAPE
+ - INVALID_NUMBER_BINARY
+ - INVALID_NUMBER_DECIMAL
+ - INVALID_NUMBER_FRACTION
+ - INVALID_NUMBER_HEXADECIMAL
+ - INVALID_NUMBER_OCTAL
+ - INVALID_NUMBER_UNDERSCORE_INNER
+ - INVALID_NUMBER_UNDERSCORE_TRAILING
+ - INVALID_PERCENT
+ - INVALID_PERCENT_EOF
+ - INVALID_PRINTABLE_CHARACTER
+ - INVALID_RETRY_AFTER_ELSE
+ - INVALID_RETRY_AFTER_ENSURE
+ - INVALID_RETRY_WITHOUT_RESCUE
+ - INVALID_SYMBOL
+ - INVALID_VARIABLE_GLOBAL
+ - INVALID_VARIABLE_GLOBAL_3_3
+ - INVALID_YIELD
+ - IT_NOT_ALLOWED_NUMBERED
+ - IT_NOT_ALLOWED_ORDINARY
+ - LAMBDA_OPEN
+ - LAMBDA_TERM_BRACE
+ - LAMBDA_TERM_END
+ - LIST_I_LOWER_ELEMENT
+ - LIST_I_LOWER_TERM
+ - LIST_I_UPPER_ELEMENT
+ - LIST_I_UPPER_TERM
+ - LIST_W_LOWER_ELEMENT
+ - LIST_W_LOWER_TERM
+ - LIST_W_UPPER_ELEMENT
+ - LIST_W_UPPER_TERM
+ - MALLOC_FAILED
+ - MIXED_ENCODING
+ - MODULE_IN_METHOD
+ - MODULE_NAME
+ - MODULE_TERM
+ - MULTI_ASSIGN_MULTI_SPLATS
+ - MULTI_ASSIGN_UNEXPECTED_REST
+ - NESTING_TOO_DEEP
+ - NO_LOCAL_VARIABLE
+ - NON_ASSOCIATIVE_OPERATOR
+ - NOT_EXPRESSION
+ - NUMBER_LITERAL_UNDERSCORE
+ - NUMBERED_PARAMETER_INNER_BLOCK
+ - NUMBERED_PARAMETER_IT
+ - NUMBERED_PARAMETER_ORDINARY
+ - NUMBERED_PARAMETER_OUTER_BLOCK
+ - OPERATOR_MULTI_ASSIGN
+ - OPERATOR_WRITE_ARGUMENTS
+ - OPERATOR_WRITE_BLOCK
+ - PARAMETER_ASSOC_SPLAT_MULTI
+ - PARAMETER_BLOCK_MULTI
+ - PARAMETER_CIRCULAR
+ - PARAMETER_FORWARDING_AFTER_REST
+ - PARAMETER_METHOD_NAME
+ - PARAMETER_NAME_DUPLICATED
+ - PARAMETER_NO_DEFAULT
+ - PARAMETER_NO_DEFAULT_KW
+ - PARAMETER_NUMBERED_RESERVED
+ - PARAMETER_ORDER
+ - PARAMETER_SPLAT_MULTI
+ - PARAMETER_STAR
+ - PARAMETER_UNEXPECTED_FWD
+ - PARAMETER_UNEXPECTED_NO_KW
+ - PARAMETER_WILD_LOOSE_COMMA
+ - PATTERN_ARRAY_MULTIPLE_RESTS
+ - PATTERN_CAPTURE_DUPLICATE
+ - PATTERN_CAPTURE_IN_ALTERNATIVE
+ - PATTERN_EXPRESSION_AFTER_BRACKET
+ - PATTERN_EXPRESSION_AFTER_COMMA
+ - PATTERN_EXPRESSION_AFTER_HROCKET
+ - PATTERN_EXPRESSION_AFTER_IN
+ - PATTERN_EXPRESSION_AFTER_KEY
+ - PATTERN_EXPRESSION_AFTER_PAREN
+ - PATTERN_EXPRESSION_AFTER_PIN
+ - PATTERN_EXPRESSION_AFTER_PIPE
+ - PATTERN_EXPRESSION_AFTER_RANGE
+ - PATTERN_EXPRESSION_AFTER_REST
+ - PATTERN_FIND_MISSING_INNER
+ - PATTERN_HASH_IMPLICIT
+ - PATTERN_HASH_KEY
+ - PATTERN_HASH_KEY_DUPLICATE
+ - PATTERN_HASH_KEY_INTERPOLATED
+ - PATTERN_HASH_KEY_LABEL
+ - PATTERN_HASH_KEY_LOCALS
+ - PATTERN_IDENT_AFTER_HROCKET
+ - PATTERN_LABEL_AFTER_COMMA
+ - PATTERN_REST
+ - PATTERN_TERM_BRACE
+ - PATTERN_TERM_BRACKET
+ - PATTERN_TERM_PAREN
+ - PIPEPIPEEQ_MULTI_ASSIGN
+ - REGEXP_ENCODING_OPTION_MISMATCH
+ - REGEXP_INCOMPAT_CHAR_ENCODING
+ - REGEXP_INVALID_UNICODE_RANGE
+ - REGEXP_NON_ESCAPED_MBC
+ - REGEXP_PARSE_ERROR
+ - REGEXP_TERM
+ - REGEXP_UNKNOWN_OPTIONS
+ - REGEXP_UTF8_CHAR_NON_UTF8_REGEXP
+ - RESCUE_EXPRESSION
+ - RESCUE_MODIFIER_VALUE
+ - RESCUE_TERM
+ - RESCUE_VARIABLE
+ - RETURN_INVALID
+ - SCRIPT_NOT_FOUND
+ - SINGLETON_FOR_LITERALS
+ - STATEMENT_ALIAS
+ - STATEMENT_POSTEXE_END
+ - STATEMENT_PREEXE_BEGIN
+ - STATEMENT_UNDEF
+ - STRING_CONCATENATION
+ - STRING_INTERPOLATED_TERM
+ - STRING_LITERAL_EOF
+ - STRING_LITERAL_TERM
+ - SYMBOL_INVALID
+ - SYMBOL_TERM_DYNAMIC
+ - SYMBOL_TERM_INTERPOLATED
+ - TERNARY_COLON
+ - TERNARY_EXPRESSION_FALSE
+ - TERNARY_EXPRESSION_TRUE
+ - UNARY_DISALLOWED
+ - UNARY_RECEIVER
+ - UNDEF_ARGUMENT
+ - UNEXPECTED_BLOCK_ARGUMENT
+ - UNEXPECTED_INDEX_BLOCK
+ - UNEXPECTED_INDEX_KEYWORDS
+ - UNEXPECTED_LABEL
+ - UNEXPECTED_MULTI_WRITE
+ - UNEXPECTED_PARAMETER_DEFAULT_VALUE
+ - UNEXPECTED_RANGE_OPERATOR
+ - UNEXPECTED_SAFE_NAVIGATION
+ - UNEXPECTED_TOKEN_CLOSE_CONTEXT
+ - UNEXPECTED_TOKEN_IGNORE
+ - UNTIL_TERM
+ - VOID_EXPRESSION
+ - WHILE_TERM
+ - WRITE_TARGET_IN_METHOD
+ - WRITE_TARGET_READONLY
+ - WRITE_TARGET_UNEXPECTED
+ - XSTRING_TERM
+warnings:
+ - AMBIGUOUS_BINARY_OPERATOR
+ - AMBIGUOUS_FIRST_ARGUMENT_MINUS
+ - AMBIGUOUS_FIRST_ARGUMENT_PLUS
+ - AMBIGUOUS_PREFIX_AMPERSAND
+ - AMBIGUOUS_PREFIX_STAR
+ - AMBIGUOUS_PREFIX_STAR_STAR
+ - AMBIGUOUS_SLASH
+ - COMPARISON_AFTER_COMPARISON
+ - DOT_DOT_DOT_EOL
+ - EQUAL_IN_CONDITIONAL
+ - EQUAL_IN_CONDITIONAL_3_3
+ - END_IN_METHOD
+ - DUPLICATED_HASH_KEY
+ - DUPLICATED_WHEN_CLAUSE
+ - FLOAT_OUT_OF_RANGE
+ - IGNORED_FROZEN_STRING_LITERAL
+ - INDENTATION_MISMATCH
+ - INTEGER_IN_FLIP_FLOP
+ - INVALID_CHARACTER
+ - INVALID_MAGIC_COMMENT_VALUE
+ - INVALID_NUMBERED_REFERENCE
+ - KEYWORD_EOL
+ - LITERAL_IN_CONDITION_DEFAULT
+ - LITERAL_IN_CONDITION_VERBOSE
+ - SHAREABLE_CONSTANT_VALUE_LINE
+ - SHEBANG_CARRIAGE_RETURN
+ - UNEXPECTED_CARRIAGE_RETURN
+ - UNREACHABLE_STATEMENT
+ - UNUSED_LOCAL_VARIABLE
+ - VOID_STATEMENT
tokens:
+ # The order of the tokens at the beginning is important, because we use them
+ # for a lookup table.
- name: EOF
value: 1
comment: final token in the file
- - name: MISSING
- comment: "a token that was expected but not found"
- - name: NOT_PROVIDED
- comment: "a token that was not present but it is okay"
+ - name: BRACE_RIGHT
+ comment: "}"
+ - name: COMMA
+ comment: ","
+ - name: EMBEXPR_END
+ comment: "}"
+ - name: KEYWORD_DO
+ comment: "do"
+ - name: KEYWORD_ELSE
+ comment: "else"
+ - name: KEYWORD_ELSIF
+ comment: "elsif"
+ - name: KEYWORD_END
+ comment: "end"
+ - name: KEYWORD_ENSURE
+ comment: "ensure"
+ - name: KEYWORD_IN
+ comment: "in"
+ - name: KEYWORD_RESCUE
+ comment: "rescue"
+ - name: KEYWORD_THEN
+ comment: "then"
+ - name: KEYWORD_WHEN
+ comment: "when"
+ - name: NEWLINE
+ comment: "a newline character outside of other tokens"
+ - name: PARENTHESIS_RIGHT
+ comment: ")"
+ - name: PIPE
+ comment: "|"
+ - name: SEMICOLON
+ comment: ";"
+ # Tokens from here on are not used for lookup, and can be in any order.
- name: AMPERSAND
comment: "&"
- name: AMPERSAND_AMPERSAND
@@ -28,8 +385,6 @@ tokens:
comment: "!~"
- name: BRACE_LEFT
comment: "{"
- - name: BRACE_RIGHT
- comment: "}"
- name: BRACKET_LEFT
comment: "["
- name: BRACKET_LEFT_ARRAY
@@ -52,8 +407,6 @@ tokens:
comment: ":"
- name: COLON_COLON
comment: "::"
- - name: COMMA
- comment: ","
- name: COMMENT
comment: "a comment"
- name: CONSTANT
@@ -72,8 +425,6 @@ tokens:
comment: "a line inside of embedded documentation"
- name: EMBEXPR_BEGIN
comment: "#{"
- - name: EMBEXPR_END
- comment: "}"
- name: EMBVAR
comment: "#"
- name: EQUAL
@@ -140,20 +491,10 @@ tokens:
comment: "def"
- name: KEYWORD_DEFINED
comment: "defined?"
- - name: KEYWORD_DO
- comment: "do"
- name: KEYWORD_DO_LOOP
comment: "do keyword for a predicate in a while, until, or for loop"
- - name: KEYWORD_ELSE
- comment: "else"
- - name: KEYWORD_ELSIF
- comment: "elsif"
- - name: KEYWORD_END
- comment: "end"
- name: KEYWORD_END_UPCASE
comment: "END"
- - name: KEYWORD_ENSURE
- comment: "ensure"
- name: KEYWORD_FALSE
comment: "false"
- name: KEYWORD_FOR
@@ -162,8 +503,6 @@ tokens:
comment: "if"
- name: KEYWORD_IF_MODIFIER
comment: "if in the modifier form"
- - name: KEYWORD_IN
- comment: "in"
- name: KEYWORD_MODULE
comment: "module"
- name: KEYWORD_NEXT
@@ -176,8 +515,6 @@ tokens:
comment: "or"
- name: KEYWORD_REDO
comment: "redo"
- - name: KEYWORD_RESCUE
- comment: "rescue"
- name: KEYWORD_RESCUE_MODIFIER
comment: "rescue in the modifier form"
- name: KEYWORD_RETRY
@@ -188,8 +525,6 @@ tokens:
comment: "self"
- name: KEYWORD_SUPER
comment: "super"
- - name: KEYWORD_THEN
- comment: "then"
- name: KEYWORD_TRUE
comment: "true"
- name: KEYWORD_UNDEF
@@ -202,8 +537,6 @@ tokens:
comment: "until"
- name: KEYWORD_UNTIL_MODIFIER
comment: "until in the modifier form"
- - name: KEYWORD_WHEN
- comment: "when"
- name: KEYWORD_WHILE
comment: "while"
- name: KEYWORD_WHILE_MODIFIER
@@ -240,16 +573,12 @@ tokens:
comment: "-="
- name: MINUS_GREATER
comment: "->"
- - name: NEWLINE
- comment: "a newline character outside of other tokens"
- name: NUMBERED_REFERENCE
comment: "a numbered reference to a capture group in the previous regular expression match"
- name: PARENTHESIS_LEFT
comment: "("
- name: PARENTHESIS_LEFT_PARENTHESES
comment: "( for a parentheses node"
- - name: PARENTHESIS_RIGHT
- comment: ")"
- name: PERCENT
comment: "%"
- name: PERCENT_EQUAL
@@ -264,8 +593,6 @@ tokens:
comment: "%I"
- name: PERCENT_UPPER_W
comment: "%W"
- - name: PIPE
- comment: "|"
- name: PIPE_EQUAL
comment: "|="
- name: PIPE_PIPE
@@ -282,8 +609,6 @@ tokens:
comment: "the beginning of a regular expression"
- name: REGEXP_END
comment: "the end of a regular expression"
- - name: SEMICOLON
- comment: ";"
- name: SLASH
comment: "/"
- name: SLASH_EQUAL
@@ -328,11 +653,23 @@ tokens:
comment: "a separator between words in a list"
- name: __END__
comment: "marker for the point in the file at which the parser should stop"
+ - name: MISSING
+ comment: "a token that was expected but not found"
+ - name: NOT_PROVIDED
+ comment: "a token that was not present but it is okay"
flags:
- name: ArgumentsNodeFlags
values:
+ - name: CONTAINS_FORWARDING
+ comment: "if the arguments contain forwarding"
+ - name: CONTAINS_KEYWORDS
+ comment: "if the arguments contain keywords"
- name: CONTAINS_KEYWORD_SPLAT
- comment: "if arguments contain keyword splat"
+ comment: "if the arguments contain a keyword splat"
+ - name: CONTAINS_SPLAT
+ comment: "if the arguments contain a splat"
+ - name: CONTAINS_MULTIPLE_SPLATS
+ comment: "if the arguments contain multiple splats"
comment: Flags for arguments nodes.
- name: ArrayNodeFlags
values:
@@ -368,6 +705,13 @@ flags:
- name: HEXADECIMAL
comment: "0x prefix"
comment: Flags for integer nodes that correspond to the base of the integer.
+ - name: InterpolatedStringNodeFlags
+ values:
+ - name: FROZEN
+ comment: "frozen by virtue of a `frozen_string_literal: true` comment or `--enable-frozen-string-literal`; only for adjacent string literals like `'a' 'b'`"
+ - name: MUTABLE
+ comment: "mutable by virtue of a `frozen_string_literal: false` comment or `--disable-frozen-string-literal`; only for adjacent string literals like `'a' 'b'`"
+ comment: Flags for interpolated string nodes that indicated mutability if they are also marked as literals.
- name: KeywordHashNodeFlags
values:
- name: SYMBOL_KEYS
@@ -383,6 +727,11 @@ flags:
- name: REPEATED_PARAMETER
comment: "a parameter name that has been repeated in the method signature"
comment: Flags for parameter nodes.
+ - name: ParenthesesNodeFlags
+ values:
+ - name: MULTIPLE_STATEMENTS
+ comment: "parentheses that contain multiple potentially void statements"
+ comment: Flags for parentheses nodes.
- name: RangeFlags
values:
- name: EXCLUDE_END
@@ -413,6 +762,15 @@ flags:
- name: FORCED_US_ASCII_ENCODING
comment: "internal bytes forced the encoding to US-ASCII"
comment: Flags for regular expression and match last line nodes.
+ - name: ShareableConstantNodeFlags
+ values:
+ - name: LITERAL
+ comment: "constant writes that should be modified with shareable constant value literal"
+ - name: EXPERIMENTAL_EVERYTHING
+ comment: "constant writes that should be modified with shareable constant value experimental everything"
+ - name: EXPERIMENTAL_COPY
+ comment: "constant writes that should be modified with shareable constant value experimental copy"
+ comment: Flags for shareable constant nodes.
- name: StringFlags
values:
- name: FORCED_UTF8_ENCODING
@@ -420,7 +778,9 @@ flags:
- name: FORCED_BINARY_ENCODING
comment: "internal bytes forced the encoding to binary"
- name: FROZEN
- comment: "frozen by virtue of a `frozen_string_literal` comment"
+ comment: "frozen by virtue of a `frozen_string_literal: true` comment or `--enable-frozen-string-literal`"
+ - name: MUTABLE
+ comment: "mutable by virtue of a `frozen_string_literal: false` comment or `--disable-frozen-string-literal`"
comment: Flags for string nodes.
- name: SymbolFlags
values:
@@ -436,10 +796,35 @@ nodes:
fields:
- name: new_name
type: node
+ kind:
+ - GlobalVariableReadNode
+ - BackReferenceReadNode
+ - NumberedReferenceReadNode
+ comment: |
+ Represents the new name of the global variable that can be used after aliasing.
+
+ alias $foo $bar
+ ^^^^
- name: old_name
type: node
+ kind:
+ - GlobalVariableReadNode
+ - BackReferenceReadNode
+ - NumberedReferenceReadNode
+ - on error: SymbolNode # alias $a b
+ - on error: MissingNode # alias $a 42
+ comment: |
+ Represents the old name of the global variable that can be used before aliasing.
+
+ alias $foo $bar
+ ^^^^
- name: keyword_loc
type: location
+ comment: |
+ The location of the `alias` keyword.
+
+ alias $foo $bar
+ ^^^^^
comment: |
Represents the use of the `alias` keyword to alias a global variable.
@@ -449,10 +834,45 @@ nodes:
fields:
- name: new_name
type: node
+ kind:
+ - SymbolNode
+ - InterpolatedSymbolNode
+ comment: |
+ Represents the new name of the method that will be aliased.
+
+ alias foo bar
+ ^^^
+
+ alias :foo :bar
+ ^^^^
+
+ alias :"#{foo}" :"#{bar}"
+ ^^^^^^^^^
- name: old_name
type: node
+ kind:
+ - SymbolNode
+ - InterpolatedSymbolNode
+ - on error: GlobalVariableReadNode # alias a $b
+ - on error: MissingNode # alias a 42
+ comment: |
+ Represents the old name of the method that will be aliased.
+
+ alias foo bar
+ ^^^
+
+ alias :foo :bar
+ ^^^^
+
+ alias :"#{foo}" :"#{bar}"
+ ^^^^^^^^^
- name: keyword_loc
type: location
+ comment: |
+ Represents the location of the `alias` keyword.
+
+ alias foo bar
+ ^^^^^
comment: |
Represents the use of the `alias` keyword to alias a method.
@@ -462,10 +882,27 @@ nodes:
fields:
- name: left
type: node
+ kind: pattern expression
+ comment: |
+ Represents the left side of the expression.
+
+ foo => bar | baz
+ ^^^
- name: right
type: node
+ kind: pattern expression
+ comment: |
+ Represents the right side of the expression.
+
+ foo => bar | baz
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the alternation operator location.
+
+ foo => bar | baz
+ ^
comment: |
Represents an alternation pattern in pattern matching.
@@ -475,41 +912,81 @@ nodes:
fields:
- name: left
type: node
+ kind: non-void expression
+ comment: |
+ Represents the left side of the expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ left and right
+ ^^^^
+
+ 1 && 2
+ ^
- name: right
type: node
+ kind: Node
+ comment: |
+ Represents the right side of the expression.
+
+ left && right
+ ^^^^^
+
+ 1 and 2
+ ^
- name: operator_loc
type: location
+ comment: |
+ The location of the `and` keyword or the `&&` operator.
+
+ left and right
+ ^^^
comment: |
Represents the use of the `&&` operator or the `and` keyword.
left and right
^^^^^^^^^^^^^^
- name: ArgumentsNode
+ flags: ArgumentsNodeFlags
fields:
- - name: flags
- type: flags
- kind: ArgumentsNodeFlags
- name: arguments
type: node[]
+ kind: non-void expression
+ comment: |
+ The list of arguments, if present. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo(bar, baz)
+ ^^^^^^^^
comment: |
Represents a set of arguments to a method or a keyword.
return foo, bar, baz
^^^^^^^^^^^^^
- name: ArrayNode
+ flags: ArrayNodeFlags
fields:
- - name: flags
- type: flags
- kind: ArrayNodeFlags
- name: elements
type: node[]
+ kind: non-void expression
+ comment: Represent the list of zero or more [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression) within the array.
- name: opening_loc
type: location?
+ comment: |
+ Represents the optional source location for the opening token.
+
+ [1,2,3] # "["
+ %w[foo bar baz] # "%w["
+ %I(apple orange banana) # "%I("
+ foo = 1, 2, 3 # nil
- name: closing_loc
type: location?
+ comment: |
+ Represents the optional source location for the closing token.
+
+ [1,2,3] # "]"
+ %w[foo bar baz] # "]"
+ %I(apple orange banana) # ")"
+ foo = 1, 2, 3 # nil
comment: |
- Represents an array literal. This can be a regular array using brackets or
- a special array using % like %w or %i.
+ Represents an array literal. This can be a regular array using brackets or a special array using % like %w or %i.
[1, 2, 3]
^^^^^^^^^
@@ -517,16 +994,58 @@ nodes:
fields:
- name: constant
type: node?
+ kind:
+ - ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the Array
+
+ foo in Bar[]
+ ^^^
+
+ foo in Bar[1, 2, 3]
+ ^^^
+
+ foo in Bar::Baz[1, 2, 3]
+ ^^^^^^^^
- name: requireds
type: node[]
+ kind: pattern expression
+ comment: |
+ Represents the required elements of the array pattern.
+
+ foo in [1, 2]
+ ^ ^
- name: rest
type: node?
+ kind: pattern expression
+ comment: |
+ Represents the rest element of the array pattern.
+
+ foo in *bar
+ ^^^^
- name: posts
type: node[]
+ kind: pattern expression
+ comment: |
+ Represents the elements after the rest element of the array pattern.
+
+ foo in *bar, baz
+ ^^^
- name: opening_loc
type: location?
+ comment: |
+ Represents the opening location of the array pattern.
+
+ foo in [1, 2]
+ ^
- name: closing_loc
type: location?
+ comment: |
+ Represents the closing location of the array pattern.
+
+ foo in [1, 2]
+ ^
comment: |
Represents an array pattern in pattern matching.
@@ -536,8 +1055,8 @@ nodes:
foo in [1, 2]
^^^^^^^^^^^^^
- foo in *1
- ^^^^^^^^^
+ foo in *bar
+ ^^^^^^^^^^^
foo in Bar[]
^^^^^^^^^^^^
@@ -548,8 +1067,9 @@ nodes:
fields:
- name: key
type: node
+ kind: non-void expression
comment: |
- The key of the association. This can be any node that represents a non-void expression.
+ The key of the association. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
{ a: b }
^
@@ -560,11 +1080,10 @@ nodes:
{ def a; end => 1 }
^^^^^^^^^^
- name: value
- type: node?
+ type: node
+ kind: non-void expression
comment: |
- The value of the association, if present. This can be any node that
- represents a non-void expression. It can be optionally omitted if this
- node is an element in a `HashPatternNode`.
+ The value of the association, if present. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
{ foo => bar }
^^^
@@ -587,9 +1106,9 @@ nodes:
fields:
- name: value
type: node?
+ kind: non-void expression
comment: |
- The value to be splatted, if present. Will be missing when keyword
- rest argument forwarding is used.
+ The value to be splatted, if present. Will be missing when keyword rest argument forwarding is used.
{ **foo }
^^^
@@ -609,6 +1128,12 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the back-reference variable, including the leading `$`.
+
+ $& # name `:$&`
+
+ $+ # name `:$+`
comment: |
Represents reading a reference to a field in the previous match.
@@ -618,20 +1143,50 @@ nodes:
fields:
- name: begin_keyword_loc
type: location?
+ comment: |
+ Represents the location of the `begin` keyword.
+
+ begin x end
+ ^^^^^
- name: statements
type: node?
kind: StatementsNode
+ comment: |
+ Represents the statements within the begin block.
+
+ begin x end
+ ^
- name: rescue_clause
type: node?
kind: RescueNode
+ comment: |
+ Represents the rescue clause within the begin block.
+
+ begin x; rescue y; end
+ ^^^^^^^^
- name: else_clause
type: node?
kind: ElseNode
+ comment: |
+ Represents the else clause within the begin block.
+
+ begin x; rescue y; else z; end
+ ^^^^^^
- name: ensure_clause
type: node?
kind: EnsureNode
+ comment: |
+ Represents the ensure clause within the begin block.
+
+ begin x; ensure y; end
+ ^^^^^^^^
- name: end_keyword_loc
type: location?
+ comment: |
+ Represents the location of the `end` keyword.
+
+ begin x end
+ ^^^
newline: false
comment: |
Represents a begin statement.
@@ -644,20 +1199,34 @@ nodes:
fields:
- name: expression
type: node?
+ kind: non-void expression
+ comment: |
+ The expression that is being passed as a block argument. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo(&args)
+ ^^^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the `&` operator.
+
+ foo(&args)
+ ^
comment: |
- Represents block method arguments.
+ Represents a block argument using `&`.
bar(&args)
^^^^^^^^^^
- name: BlockLocalVariableNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant
+ comment: |
+ The name of the block local variable.
+
+ a { |; b| } # name `:b`
+ ^
comment: |
Represents a block local variable.
@@ -667,34 +1236,83 @@ nodes:
fields:
- name: locals
type: constant[]
- - name: locals_body_index
- type: uint32
+ comment: |
+ The local variables declared in the block.
+
+ [1, 2, 3].each { |i| puts x } # locals: [:i]
+ ^
- name: parameters
type: node?
+ kind:
+ - BlockParametersNode
+ - NumberedParametersNode
+ - ItParametersNode
+ comment: |
+ The parameters of the block.
+
+ [1, 2, 3].each { |i| puts x }
+ ^^^
+ [1, 2, 3].each { puts _1 }
+ ^^^^^^^^^^^
+ [1, 2, 3].each { puts it }
+ ^^^^^^^^^^^
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
+ comment: |
+ The body of the block.
+
+ [1, 2, 3].each { |i| puts x }
+ ^^^^^^
- name: opening_loc
type: location
+ comment: |
+ Represents the location of the opening `{` or `do`.
+
+ [1, 2, 3].each { |i| puts x }
+ ^
- name: closing_loc
type: location
+ comment: |
+ Represents the location of the closing `}` or `end`.
+
+ [1, 2, 3].each { |i| puts x }
+ ^
comment: |
Represents a block of ruby code.
- [1, 2, 3].each { |i| puts x }
- ^^^^^^^^^^^^^^
+ [1, 2, 3].each { |i| puts x }
+ ^^^^^^^^^^^^^^
- name: BlockParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant?
+ comment: |
+ The name of the block parameter.
+
+ def a(&b) # name `:b`
+ ^
+ end
- name: name_loc
type: location?
+ comment: |
+ Represents the location of the block parameter name.
+
+ def a(&b)
+ ^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the `&` operator.
+
+ def a(&b)
+ ^
+ end
comment: |
- Represents a block parameter to a method, block, or lambda definition.
+ Represents a block parameter of a method, block, or lambda definition.
def a(&b)
^^
@@ -704,12 +1322,49 @@ nodes:
- name: parameters
type: node?
kind: ParametersNode
+ comment: |
+ Represents the parameters of the block.
+
+ -> (a, b = 1; local) { }
+ ^^^^^^^^
+
+ foo do |a, b = 1; local|
+ ^^^^^^^^
+ end
- name: locals
type: node[]
+ kind: BlockLocalVariableNode
+ comment: |
+ Represents the local variables of the block.
+
+ -> (a, b = 1; local) { }
+ ^^^^^
+
+ foo do |a, b = 1; local|
+ ^^^^^
+ end
- name: opening_loc
type: location?
+ comment: |
+ Represents the opening location of the block parameters.
+
+ -> (a, b = 1; local) { }
+ ^
+
+ foo do |a, b = 1; local|
+ ^
+ end
- name: closing_loc
type: location?
+ comment: |
+ Represents the closing location of the block parameters.
+
+ -> (a, b = 1; local) { }
+ ^
+
+ foo do |a, b = 1; local|
+ ^
+ end
comment: |
Represents a block's parameters declaration.
@@ -724,48 +1379,90 @@ nodes:
- name: arguments
type: node?
kind: ArgumentsNode
+ comment: |
+ The arguments to the break statement, if present. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ break foo
+ ^^^
- name: keyword_loc
type: location
+ comment: |
+ The location of the `break` keyword.
+
+ break foo
+ ^^^^^
comment: |
Represents the use of the `break` keyword.
break foo
^^^^^^^^^
- name: CallAndWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
+ comment: |
+ The object that the method is being called on. This can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo.bar &&= value
+ ^^^
- name: call_operator_loc
type: location?
+ comment: |
+ Represents the location of the call operator.
+
+ foo.bar &&= value
+ ^
- name: message_loc
type: location?
+ comment: |
+ Represents the location of the message.
+
+ foo.bar &&= value
+ ^^^
- name: read_name
type: constant
+ comment: |
+ Represents the name of the method being called.
+
+ foo.bar &&= value # read_name `:bar`
+ ^^^
- name: write_name
type: constant
+ comment: |
+ Represents the name of the method being written to.
+
+ foo.bar &&= value # write_name `:bar=`
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the operator.
+
+ foo.bar &&= value
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ Represents the value being assigned.
+
+ foo.bar &&= value
+ ^^^^^
comment: |
Represents the use of the `&&=` operator on a call.
foo.bar &&= value
^^^^^^^^^^^^^^^^^
- name: CallNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
comment: |
- The object that the method is being called on. This can be either
- `nil` or a node representing any kind of expression that returns a
- non-void value.
+ The object that the method is being called on. This can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
foo.bar
^^^
@@ -777,19 +1474,69 @@ nodes:
^^^
- name: call_operator_loc
type: location?
+ comment: |
+ Represents the location of the call operator.
+
+ foo.bar
+ ^
+
+ foo&.bar
+ ^^
- name: name
type: constant
+ comment: |
+ Represents the name of the method being called.
+
+ foo.bar # name `:foo`
+ ^^^
- name: message_loc
type: location?
+ comment: |
+ Represents the location of the message.
+
+ foo.bar
+ ^^^
- name: opening_loc
type: location?
+ comment: |
+ Represents the location of the left parenthesis.
+ foo(bar)
+ ^
- name: arguments
type: node?
kind: ArgumentsNode
+ comment: |
+ Represents the arguments to the method call. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo(bar)
+ ^^^
- name: closing_loc
type: location?
+ comment: |
+ Represents the location of the right parenthesis.
+
+ foo(bar)
+ ^
+ - name: equal_loc
+ type: location?
+ comment: |
+ Represents the location of the equal sign, in the case that this is an attribute write.
+
+ foo.bar = value
+ ^
+
+ foo[bar] = value
+ ^
- name: block
type: node?
+ kind:
+ - BlockNode
+ - BlockArgumentNode
+ comment: |
+ Represents the block that is being passed to the method.
+
+ foo { |a| a }
+ ^^^^^^^^^
comment: |
Represents a method call, in all of the various forms that can take.
@@ -811,68 +1558,162 @@ nodes:
foo&.bar
^^^^^^^^
- name: CallOperatorWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
+ comment: |
+ The object that the method is being called on. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo.bar += value
+ ^^^
- name: call_operator_loc
type: location?
+ comment: |
+ Represents the location of the call operator.
+
+ foo.bar += value
+ ^
- name: message_loc
type: location?
+ comment: |
+ Represents the location of the message.
+
+ foo.bar += value
+ ^^^
- name: read_name
type: constant
+ comment: |
+ Represents the name of the method being called.
+
+ foo.bar += value # read_name `:bar`
+ ^^^
- name: write_name
type: constant
- - name: operator
+ comment: |
+ Represents the name of the method being written to.
+
+ foo.bar += value # write_name `:bar=`
+ ^^^
+ - name: binary_operator
type: constant
- - name: operator_loc
+ comment: |
+ Represents the binary operator being used.
+
+ foo.bar += value # binary_operator `:+`
+ ^
+ - name: binary_operator_loc
type: location
+ comment: |
+ Represents the location of the binary operator.
+
+ foo.bar += value
+ ^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ Represents the value being assigned.
+
+ foo.bar += value
+ ^^^^^
comment: |
Represents the use of an assignment operator on a call.
foo.bar += baz
^^^^^^^^^^^^^^
- name: CallOrWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
+ comment: |
+ The object that the method is being called on. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo.bar ||= value
+ ^^^
- name: call_operator_loc
type: location?
+ comment: |
+ Represents the location of the call operator.
+
+ foo.bar ||= value
+ ^
- name: message_loc
type: location?
+ comment: |
+ Represents the location of the message.
+
+ foo.bar ||= value
+ ^^^
- name: read_name
type: constant
+ comment: |
+ Represents the name of the method being called.
+
+ foo.bar ||= value # read_name `:bar`
+ ^^^
- name: write_name
type: constant
+ comment: |
+ Represents the name of the method being written to.
+
+ foo.bar ||= value # write_name `:bar=`
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the operator.
+
+ foo.bar ||= value
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ Represents the value being assigned.
+
+ foo.bar ||= value
+ ^^^^^
comment: |
Represents the use of the `||=` operator on a call.
foo.bar ||= value
^^^^^^^^^^^^^^^^^
- name: CallTargetNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node
+ kind: non-void expression
+ comment: |
+ The object that the method is being called on. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo.bar = 1
+ ^^^
- name: call_operator_loc
type: location
+ comment: |
+ Represents the location of the call operator.
+
+ foo.bar = 1
+ ^
- name: name
type: constant
+ comment: |
+ Represents the name of the method being called.
+
+ foo.bar = 1 # name `:foo`
+ ^^^
- name: message_loc
type: location
+ comment: |
+ Represents the location of the message.
+
+ foo.bar = 1
+ ^^^
comment: |
Represents assigning to a method call.
@@ -890,10 +1731,27 @@ nodes:
fields:
- name: value
type: node
+ kind: pattern expression
+ comment: |
+ Represents the value to capture.
+
+ foo => bar
+ ^^^
- name: target
type: node
+ kind: LocalVariableTargetNode
+ comment: |
+ Represents the target of the capture.
+
+ foo => bar
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the `=>` operator.
+
+ foo => bar
+ ^^
comment: |
Represents assigning to a local variable in pattern matching.
@@ -903,15 +1761,42 @@ nodes:
fields:
- name: predicate
type: node?
+ kind: non-void expression
+ comment: |
+ Represents the predicate of the case match. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ case true; in false; end
+ ^^^^
- name: conditions
type: node[]
- - name: consequent
+ kind: InNode
+ comment: |
+ Represents the conditions of the case match.
+
+ case true; in false; end
+ ^^^^^^^^
+ - name: else_clause
type: node?
kind: ElseNode
+ comment: |
+ Represents the else clause of the case match.
+
+ case true; in false; else; end
+ ^^^^
- name: case_keyword_loc
type: location
+ comment: |
+ Represents the location of the `case` keyword.
+
+ case true; in false; end
+ ^^^^
- name: end_keyword_loc
type: location
+ comment: |
+ Represents the location of the `end` keyword.
+
+ case true; in false; end
+ ^^^
comment: |
Represents the use of a case statement for pattern matching.
@@ -923,15 +1808,42 @@ nodes:
fields:
- name: predicate
type: node?
+ kind: non-void expression
+ comment: |
+ Represents the predicate of the case statement. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ case true; when false; end
+ ^^^^
- name: conditions
type: node[]
- - name: consequent
+ kind: WhenNode
+ comment: |
+ Represents the conditions of the case statement.
+
+ case true; when false; end
+ ^^^^^^^^^^
+ - name: else_clause
type: node?
kind: ElseNode
+ comment: |
+ Represents the else clause of the case statement.
+
+ case true; when false; else; end
+ ^^^^
- name: case_keyword_loc
type: location
+ comment: |
+ Represents the location of the `case` keyword.
+
+ case true; when false; end
+ ^^^^
- name: end_keyword_loc
type: location
+ comment: |
+ Represents the location of the `end` keyword.
+
+ case true; when false; end
+ ^^^
comment: |
Represents the use of a case statement.
@@ -945,18 +1857,56 @@ nodes:
type: constant[]
- name: class_keyword_loc
type: location
+ comment: |
+ Represents the location of the `class` keyword.
+
+ class Foo end
+ ^^^^^
- name: constant_path
type: node
+ kind:
+ - ConstantReadNode
+ - ConstantPathNode
+ - on error: CallNode # class 0.X end
- name: inheritance_operator_loc
type: location?
+ comment: |
+ Represents the location of the `<` operator.
+
+ class Foo < Bar
+ ^
- name: superclass
type: node?
+ kind: non-void expression
+ comment: |
+ Represents the superclass of the class.
+
+ class Foo < Bar
+ ^^^
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
+ comment: |
+ Represents the body of the class.
+
+ class Foo
+ foo
+ ^^^
- name: end_keyword_loc
type: location
+ comment: |
+ Represents the location of the `end` keyword.
+
+ class Foo end
+ ^^^
- name: name
type: constant
+ comment: |
+ The name of the class.
+
+ class Foo end # name `:Foo`
comment: |
Represents a class declaration involving the `class` keyword.
@@ -966,12 +1916,33 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ @@target &&= value # name `:@@target`
+ ^^^^^^^^
- name: name_loc
type: location
+ comment: |
+ Represents the location of the variable name.
+
+ @@target &&= value
+ ^^^^^^^^
- name: operator_loc
type: location
+ comment: |
+ Represents the location of the `&&=` operator.
+
+ @@target &&= value
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ Represents the value being assigned. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ @@target &&= value
+ ^^^^^
comment: |
Represents the use of the `&&=` operator for assignment to a class variable.
@@ -983,11 +1954,12 @@ nodes:
type: constant
- name: name_loc
type: location
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
- - name: operator
+ kind: non-void expression
+ - name: binary_operator
type: constant
comment: |
Represents assigning to a class variable using an operator that isn't `=`.
@@ -1004,6 +1976,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator for assignment to a class variable.
@@ -1013,6 +1986,12 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ @@abc # name `:@@abc`
+
+ @@_test # name `:@@_test`
comment: |
Represents referencing a class variable.
@@ -1031,12 +2010,37 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ @@abc = 123 # name `@@abc`
+
+ @@_test = :test # name `@@_test`
- name: name_loc
type: location
+ comment: |
+ The location of the variable name.
+
+ @@foo = :bar
+ ^^^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the class variable. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ @@foo = :bar
+ ^^^^
+
+ @@_xyz = 123
+ ^^^
- name: operator_loc
- type: location?
+ type: location
+ comment: |
+ The location of the `=` operator.
+
+ @@foo = :bar
+ ^
comment: |
Represents writing to a class variable.
@@ -1052,6 +2056,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `&&=` operator for assignment to a constant.
@@ -1063,11 +2068,12 @@ nodes:
type: constant
- name: name_loc
type: location
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
- - name: operator
+ kind: non-void expression
+ - name: binary_operator
type: constant
comment: |
Represents assigning to a constant using an operator that isn't `=`.
@@ -1084,6 +2090,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator for assignment to a constant.
@@ -1098,6 +2105,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `&&=` operator for assignment to a constant path.
@@ -1107,10 +2115,41 @@ nodes:
fields:
- name: parent
type: node?
- - name: child
- type: node
+ kind: non-void expression
+ comment: |
+ The left-hand node of the path, if present. It can be `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression). It will be `nil` when the constant lookup is at the root of the module tree.
+
+ Foo::Bar
+ ^^^
+
+ self::Test
+ ^^^^
+
+ a.b::C
+ ^^^
+ - name: name
+ type: constant?
+ comment: The name of the constant being accessed. This could be `nil` in the event of a syntax error.
- name: delimiter_loc
type: location
+ comment: |
+ The location of the `::` delimiter.
+
+ ::Foo
+ ^^
+
+ One::Two
+ ^^
+ - name: name_loc
+ type: location
+ comment: |
+ The location of the name of the constant.
+
+ ::Foo
+ ^^^
+
+ One::Two
+ ^^^
comment: |
Represents accessing a constant through a path of `::` operators.
@@ -1121,11 +2160,12 @@ nodes:
- name: target
type: node
kind: ConstantPathNode
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
- - name: operator
+ kind: non-void expression
+ - name: binary_operator
type: constant
comment: |
Represents assigning to a constant path using an operator that isn't `=`.
@@ -1141,6 +2181,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator for assignment to a constant path.
@@ -1150,10 +2191,13 @@ nodes:
fields:
- name: parent
type: node?
- - name: child
- type: node
+ kind: non-void expression
+ - name: name
+ type: constant?
- name: delimiter_loc
type: location
+ - name: name_loc
+ type: location
comment: |
Represents writing to a constant path in a context that doesn't have an explicit value.
@@ -1164,10 +2208,29 @@ nodes:
- name: target
type: node
kind: ConstantPathNode
+ comment: |
+ A node representing the constant path being written to.
+
+ Foo::Bar = 1
+ ^^^^^^^^
+
+ ::Foo = :abc
+ ^^^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `=` operator.
+
+ ::ABC = 123
+ ^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the constant path. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ FOO::BAR = :abc
+ ^^^^
comment: |
Represents writing to a constant path.
@@ -1183,6 +2246,12 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the [constant](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#constants).
+
+ X # name `:X`
+
+ SOME_CONSTANT # name `:SOME_CONSTANT`
comment: |
Represents referencing a constant.
@@ -1201,12 +2270,37 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the [constant](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#constants).
+
+ Foo = :bar # name `:Foo`
+
+ XYZ = 1 # name `:XYZ`
- name: name_loc
type: location
+ comment: |
+ The location of the constant name.
+
+ FOO = 1
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the constant. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ FOO = :bar
+ ^^^^
+
+ MyClass = Class.new
+ ^^^^^^^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `=` operator.
+
+ FOO = :bar
+ ^
comment: |
Represents writing to a constant.
@@ -1220,15 +2314,17 @@ nodes:
type: location
- name: receiver
type: node?
+ kind: non-void expression
- name: parameters
type: node?
kind: ParametersNode
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
- name: locals
type: constant[]
- - name: locals_body_index
- type: uint32
- name: def_keyword_loc
type: location
- name: operator_loc
@@ -1253,6 +2349,7 @@ nodes:
type: location?
- name: value
type: node
+ kind: Node # More than non-void expression as defined?(return) is allowed, yet defined?(BEGIN{}) is SyntaxError
- name: rparen_loc
type: location?
- name: keyword_loc
@@ -1296,6 +2393,12 @@ nodes:
type: location
- name: variable
type: node
+ kind:
+ - InstanceVariableReadNode
+ - ClassVariableReadNode
+ - GlobalVariableReadNode
+ - BackReferenceReadNode
+ - NumberedReferenceReadNode
comment: |
Represents an interpolated variable.
@@ -1329,16 +2432,69 @@ nodes:
fields:
- name: constant
type: node?
+ kind:
+ - ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the pattern
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^
- name: left
type: node
+ kind: SplatNode
+ comment: |
+ Represents the first wildcard node in the pattern.
+
+ foo in *bar, baz, *qux
+ ^^^^
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^^
- name: requireds
type: node[]
+ kind: pattern expression
+ comment: |
+ Represents the nodes in between the wildcards.
+
+ foo in *bar, baz, *qux
+ ^^^
+
+ foo in Foo(*bar, baz, 1, *qux)
+ ^^^^^^
- name: right
type: node
+ kind:
+ - SplatNode
+ - on error: MissingNode
+ comment: |
+ Represents the second wildcard node in the pattern.
+
+ foo in *bar, baz, *qux
+ ^^^^
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^^
- name: opening_loc
type: location?
+ comment: |
+ The location of the opening brace.
+
+ foo in [*bar, baz, *qux]
+ ^
+
+ foo in Foo(*bar, baz, *qux)
+ ^
- name: closing_loc
type: location?
+ comment: |
+ The location of the closing brace.
+
+ foo in [*bar, baz, *qux]
+ ^
+
+ foo in Foo(*bar, baz, *qux)
+ ^
comment: |
Represents a find pattern in pattern matching.
@@ -1350,15 +2506,18 @@ nodes:
foo in Foo(*bar, baz, *qux)
^^^^^^^^^^^^^^^^^^^^
+
+ foo => *bar, baz, *qux
+ ^^^^^^^^^^^^^^^
- name: FlipFlopNode
+ flags: RangeFlags
fields:
- - name: flags
- type: flags
- kind: RangeFlags
- name: left
type: node?
+ kind: non-void expression
- name: right
type: node?
+ kind: non-void expression
- name: operator_loc
type: location
comment: |
@@ -1367,6 +2526,10 @@ nodes:
baz if foo .. bar
^^^^^^^^^^
- name: FloatNode
+ fields:
+ - name: value
+ type: double
+ comment: The value of the floating point number as a Float.
comment: |
Represents a floating point number literal.
@@ -1376,19 +2539,70 @@ nodes:
fields:
- name: index
type: node
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - MultiTargetNode
+ - on error: BackReferenceReadNode # for $& in a end
+ - on error: NumberedReferenceReadNode # for $1 in a end
+ - on error: MissingNode # for in 1..10; end
+ comment: |
+ The index expression for `for` loops.
+
+ for i in a end
+ ^
- name: collection
type: node
+ kind: non-void expression
+ comment: |
+ The collection to iterate over.
+
+ for i in a end
+ ^
- name: statements
type: node?
kind: StatementsNode
+ comment: |
+ Represents the body of statements to execute for each iteration of the loop.
+
+ for i in a
+ foo(i)
+ ^^^^^^
+ end
- name: for_keyword_loc
type: location
+ comment: |
+ The location of the `for` keyword.
+
+ for i in a end
+ ^^^
- name: in_keyword_loc
type: location
+ comment: |
+ The location of the `in` keyword.
+
+ for i in a end
+ ^^
- name: do_keyword_loc
type: location?
+ comment: |
+ The location of the `do` keyword, if present.
+
+ for i in a do end
+ ^^
- name: end_keyword_loc
type: location
+ comment: |
+ The location of the `end` keyword.
+
+ for i in a end
+ ^^^
comment: |
Represents the use of the `for` keyword.
@@ -1414,11 +2628,18 @@ nodes:
- name: block
type: node?
kind: BlockNode
+ comment: |
+ All other arguments are forwarded as normal, except the original block is replaced with the new block.
comment: |
- Represents the use of the `super` keyword without parentheses or arguments.
+ Represents the use of the `super` keyword without parentheses or arguments, but which might have a block.
super
^^^^^
+
+ super { 123 }
+ ^^^^^^^^^^^^^
+
+ If it has any other arguments, it would be a `SuperNode` instead.
- name: GlobalVariableAndWriteNode
fields:
- name: name
@@ -1429,6 +2650,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `&&=` operator for assignment to a global variable.
@@ -1440,11 +2662,12 @@ nodes:
type: constant
- name: name_loc
type: location
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
- - name: operator
+ kind: non-void expression
+ - name: binary_operator
type: constant
comment: |
Represents assigning to a global variable using an operator that isn't `=`.
@@ -1461,6 +2684,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator for assignment to a global variable.
@@ -1470,6 +2694,12 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the global variable, which is a `$` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifier). Alternatively, it can be one of the special global variables designated by a symbol.
+
+ $foo # name `:$foo`
+
+ $_Test # name `:$_Test`
comment: |
Represents referencing a global variable.
@@ -1488,12 +2718,37 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the global variable, which is a `$` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifier). Alternatively, it can be one of the special global variables designated by a symbol.
+
+ $foo = :bar # name `:$foo`
+
+ $_Test = 123 # name `:$_Test`
- name: name_loc
type: location
+ comment: |
+ The location of the global variable's name.
+
+ $foo = :bar
+ ^^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the global variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ $foo = :bar
+ ^^^^
+
+ $-xyz = 123
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `=` operator.
+
+ $foo = :bar
+ ^
comment: |
Represents writing to a global variable.
@@ -1510,6 +2765,9 @@ nodes:
^
- name: elements
type: node[]
+ kind:
+ - AssocNode
+ - AssocSplatNode
comment: |
The elements of the hash. These can be either `AssocNode`s or `AssocSplatNode`s.
@@ -1534,14 +2792,61 @@ nodes:
fields:
- name: constant
type: node?
+ kind:
+ - ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the Hash.
+
+ foo => Bar[a: 1, b: 2]
+ ^^^
+
+ foo => Bar::Baz[a: 1, b: 2]
+ ^^^^^^^^
- name: elements
type: node[]
+ kind: AssocNode
+ comment: |
+ Represents the explicit named hash keys and values.
+
+ foo => { a: 1, b:, ** }
+ ^^^^^^^^
- name: rest
type: node?
+ kind:
+ - AssocSplatNode
+ - NoKeywordsParameterNode
+ comment: |
+ Represents the rest of the Hash keys and values. This can be named, unnamed, or explicitly forbidden via `**nil`, this last one results in a `NoKeywordsParameterNode`.
+
+ foo => { a: 1, b:, **c }
+ ^^^
+
+ foo => { a: 1, b:, ** }
+ ^^
+
+ foo => { a: 1, b:, **nil }
+ ^^^^^
- name: opening_loc
type: location?
+ comment: |
+ The location of the opening brace.
+
+ foo => { a: 1 }
+ ^
+
+ foo => Bar[a: 1]
+ ^
- name: closing_loc
type: location?
+ comment: |
+ The location of the closing brace.
+
+ foo => { a: 1 }
+ ^
+
+ foo => Bar[a: 1]
+ ^
comment: |
Represents a hash pattern in pattern matching.
@@ -1550,34 +2855,109 @@ nodes:
foo => { a: 1, b: 2, **c }
^^^^^^^^^^^^^^^^^^^
+
+ foo => Bar[a: 1, b: 2]
+ ^^^^^^^^^^^^^^^
+
+ foo in { a: 1, b: 2 }
+ ^^^^^^^^^^^^^^
- name: IfNode
fields:
- name: if_keyword_loc
type: location?
+ comment: |
+ The location of the `if` keyword if present.
+
+ bar if foo
+ ^^
+
+ The `if_keyword_loc` field will be `nil` when the `IfNode` represents a ternary expression.
- name: predicate
type: node
+ kind: non-void expression
+ comment: |
+ The node for the condition the `IfNode` is testing.
+
+ if foo
+ ^^^
+ bar
+ end
+
+ bar if foo
+ ^^^
+
+ foo ? bar : baz
+ ^^^
- name: then_keyword_loc
type: location?
+ comment: |
+ The location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
+
+ if foo then bar end
+ ^^^^
+
+ a ? b : c
+ ^
- name: statements
type: node?
kind: StatementsNode
- - name: consequent
+ comment: |
+ Represents the body of statements that will be executed when the predicate is evaluated as truthy. Will be `nil` when no body is provided.
+
+ if foo
+ bar
+ ^^^
+ baz
+ ^^^
+ end
+ - name: subsequent
type: node?
+ kind:
+ - ElseNode
+ - IfNode
+ comment: |
+ Represents an `ElseNode` or an `IfNode` when there is an `else` or an `elsif` in the `if` statement.
+
+ if foo
+ bar
+ elsif baz
+ ^^^^^^^^^
+ qux
+ ^^^
+ end
+ ^^^
+
+ if foo then bar else baz end
+ ^^^^^^^^^^^^
- name: end_keyword_loc
type: location?
+ comment: |
+ The location of the `end` keyword if present, `nil` otherwise.
+
+ if foo
+ bar
+ end
+ ^^^
newline: predicate
comment: |
- Represents the use of the `if` keyword, either in the block form or the modifier form.
+ Represents the use of the `if` keyword, either in the block form or the modifier form, or a ternary expression.
bar if foo
^^^^^^^^^^
if foo then bar end
^^^^^^^^^^^^^^^^^^^
+
+ foo ? bar : baz
+ ^^^^^^^^^^^^^^^
- name: ImaginaryNode
fields:
- name: numeric
type: node
+ kind:
+ - FloatNode
+ - IntegerNode
+ - RationalNode
comment: |
Represents an imaginary number literal.
@@ -1587,15 +2967,22 @@ nodes:
fields:
- name: value
type: node
+ kind:
+ - LocalVariableReadNode
+ - CallNode
+ - ConstantReadNode
+ - LocalVariableTargetNode
comment: |
- Represents a node that is implicitly being added to the tree but doesn't
- correspond directly to a node in the source.
+ Represents a node that is implicitly being added to the tree but doesn't correspond directly to a node in the source.
{ foo: }
^^^^
{ Foo: }
^^^^
+
+ foo in { bar: }
+ ^^^^
- name: ImplicitRestNode
comment: |
Represents using a trailing comma to indicate an implicit rest parameter.
@@ -1615,6 +3002,7 @@ nodes:
fields:
- name: pattern
type: node
+ kind: pattern expression
- name: statements
type: node?
kind: StatementsNode
@@ -1628,12 +3016,11 @@ nodes:
case a; in b then c end
^^^^^^^^^^^
- name: IndexAndWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
- name: call_operator_loc
type: location?
- name: opening_loc
@@ -1645,22 +3032,23 @@ nodes:
type: location
- name: block
type: node?
+ kind: BlockArgumentNode # foo[&b] &&= value, only valid on Ruby < 3.4
- name: operator_loc
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `&&=` operator on a call to the `[]` method.
foo.bar[baz] &&= value
^^^^^^^^^^^^^^^^^^^^^^
- name: IndexOperatorWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
- name: call_operator_loc
type: location?
- name: opening_loc
@@ -1672,24 +3060,25 @@ nodes:
type: location
- name: block
type: node?
- - name: operator
+ kind: BlockArgumentNode # foo[&b] += value, only valid on Ruby < 3.4
+ - name: binary_operator
type: constant
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of an assignment operator on a call to `[]`.
foo.bar[baz] += value
^^^^^^^^^^^^^^^^^^^^^
- name: IndexOrWriteNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node?
+ kind: non-void expression
- name: call_operator_loc
type: location?
- name: opening_loc
@@ -1701,22 +3090,23 @@ nodes:
type: location
- name: block
type: node?
+ kind: BlockArgumentNode # foo[&b] ||= value, only valid on Ruby < 3.4
- name: operator_loc
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator on a call to `[]`.
foo.bar[baz] ||= value
^^^^^^^^^^^^^^^^^^^^^^
- name: IndexTargetNode
+ flags: CallNodeFlags
fields:
- - name: flags
- type: flags
- kind: CallNodeFlags
- name: receiver
type: node
+ kind: non-void expression
- name: opening_loc
type: location
- name: arguments
@@ -1726,6 +3116,7 @@ nodes:
type: location
- name: block
type: node?
+ kind: BlockArgumentNode # foo[&b], = 1, only valid on Ruby < 3.4
comment: |
Represents assigning to an index.
@@ -1749,6 +3140,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `&&=` operator for assignment to an instance variable.
@@ -1760,11 +3152,12 @@ nodes:
type: constant
- name: name_loc
type: location
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
- - name: operator
+ kind: non-void expression
+ - name: binary_operator
type: constant
comment: |
Represents assigning to an instance variable using an operator that isn't `=`.
@@ -1781,6 +3174,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents the use of the `||=` operator for assignment to an instance variable.
@@ -1790,6 +3184,12 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the instance variable, which is a `@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ @x # name `:@x`
+
+ @_test # name `:@_test`
comment: |
Represents referencing an instance variable.
@@ -1808,68 +3208,83 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the instance variable, which is a `@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ @x = :y # name `:@x`
+
+ @_foo = "bar" # name `@_foo`
- name: name_loc
type: location
+ comment: |
+ The location of the variable name.
+
+ @_x = 1
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the instance variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ @foo = :bar
+ ^^^^
+
+ @_x = 1234
+ ^^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `=` operator.
+
+ @x = y
+ ^
comment: |
Represents writing to an instance variable.
@foo = 1
^^^^^^^^
- name: IntegerNode
+ flags: IntegerBaseFlags
fields:
- - name: flags
- type: flags
- kind: IntegerBaseFlags
- comment: |
- Represents flag indicating the base of the integer
-
- 10 base decimal, value 10
- 0d10 base decimal, value 10
- 0b10 base binary, value 2
- 0o10 base octal, value 8
- 010 base octal, value 8
- 0x10 base hexidecimal, value 16
-
- A 0 prefix indicates the number has a different base.
- The d, b, o, and x prefixes indicate the base. If one of those
- four letters is omitted, the base is assumed to be octal.
+ - name: value
+ type: integer
+ comment: The value of the integer literal as a number.
comment: |
Represents an integer number literal.
1
^
- name: InterpolatedMatchLastLineNode
+ flags: RegularExpressionFlags
fields:
- - name: flags
- type: flags
- kind: RegularExpressionFlags
- name: opening_loc
type: location
- name: parts
type: node[]
+ kind:
+ - StringNode
+ - EmbeddedStatementsNode
+ - EmbeddedVariableNode
- name: closing_loc
type: location
newline: parts
comment: |
- Represents a regular expression literal that contains interpolation that
- is being used in the predicate of a conditional to implicitly match
- against the last line read by an IO object.
+ Represents a regular expression literal that contains interpolation that is being used in the predicate of a conditional to implicitly match against the last line read by an IO object.
if /foo #{bar} baz/ then end
^^^^^^^^^^^^^^^^
- name: InterpolatedRegularExpressionNode
+ flags: RegularExpressionFlags
fields:
- - name: flags
- type: flags
- kind: RegularExpressionFlags
- name: opening_loc
type: location
- name: parts
type: node[]
+ kind:
+ - StringNode
+ - EmbeddedStatementsNode
+ - EmbeddedVariableNode
- name: closing_loc
type: location
newline: parts
@@ -1879,11 +3294,21 @@ nodes:
/foo #{bar} baz/
^^^^^^^^^^^^^^^^
- name: InterpolatedStringNode
+ flags: InterpolatedStringNodeFlags
fields:
- name: opening_loc
type: location?
- name: parts
type: node[]
+ kind:
+ - StringNode
+ - EmbeddedStatementsNode
+ - EmbeddedVariableNode
+ - InterpolatedStringNode # `"a" "#{b}"`
+ - on error: XStringNode # `<<`FOO` "bar"
+ - on error: InterpolatedXStringNode
+ - on error: SymbolNode
+ - on error: InterpolatedSymbolNode
- name: closing_loc
type: location?
newline: parts
@@ -1898,6 +3323,10 @@ nodes:
type: location?
- name: parts
type: node[]
+ kind:
+ - StringNode
+ - EmbeddedStatementsNode
+ - EmbeddedVariableNode
- name: closing_loc
type: location?
newline: parts
@@ -1912,6 +3341,10 @@ nodes:
type: location
- name: parts
type: node[]
+ kind:
+ - StringNode
+ - EmbeddedStatementsNode
+ - EmbeddedVariableNode
- name: closing_loc
type: location
newline: parts
@@ -1920,23 +3353,34 @@ nodes:
`foo #{bar} baz`
^^^^^^^^^^^^^^^^
+ - name: ItLocalVariableReadNode
+ comment: |
+ Represents reading from the implicit `it` local variable.
+
+ -> { it }
+ ^^
+ - name: ItParametersNode
+ comment: |
+ Represents an implicit set of parameters through the use of the `it` keyword within a block or lambda.
+
+ -> { it + it }
+ ^^^^^^^^^^^^^^
- name: KeywordHashNode
+ flags: KeywordHashNodeFlags
fields:
- - name: flags
- type: flags
- kind: KeywordHashNodeFlags
- name: elements
type: node[]
+ kind:
+ - AssocNode
+ - AssocSplatNode
comment: |
Represents a hash literal without opening and closing braces.
foo(a: b)
^^^^
- name: KeywordRestParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant?
- name: name_loc
@@ -1953,8 +3397,6 @@ nodes:
fields:
- name: locals
type: constant[]
- - name: locals_body_index
- type: uint32
- name: operator_loc
type: location
- name: opening_loc
@@ -1963,8 +3405,15 @@ nodes:
type: location
- name: parameters
type: node?
+ kind:
+ - BlockParametersNode
+ - NumberedParametersNode
+ - ItParametersNode
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
comment: |
Represents using a lambda literal (not the lambda method call).
@@ -1978,6 +3427,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
- name: name
type: constant
- name: depth
@@ -1991,13 +3441,14 @@ nodes:
fields:
- name: name_loc
type: location
- - name: operator_loc
+ - name: binary_operator_loc
type: location
- name: value
type: node
+ kind: non-void expression
- name: name
type: constant
- - name: operator
+ - name: binary_operator
type: constant
- name: depth
type: uint32
@@ -2014,6 +3465,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
- name: name
type: constant
- name: depth
@@ -2027,12 +3479,29 @@ nodes:
fields:
- name: name
type: constant
+ comment: |
+ The name of the local variable, which is an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ x # name `:x`
+
+ _Test # name `:_Test`
+
+ Note that this can also be an underscore followed by a number for the default block parameters.
+
+ _1 # name `:_1`
+
- name: depth
type: uint32
+ comment: |
+ The number of visible scopes that should be searched to find the origin of this local variable.
+
+ foo = 1; foo # depth 0
+
+ bar = 2; tap { bar } # depth 1
+
+ The specific rules for calculating the depth may differ from individual Ruby implementations, as they are not specified by the language. For more information, see [the Prism documentation](https://github.com/ruby/prism/blob/main/docs/local_variable_depth.md).
comment: |
- Represents reading a local variable. Note that this requires that a local
- variable of the same name has already been written to in the same scope,
- otherwise it is parsed as a method call.
+ Represents reading a local variable. Note that this requires that a local variable of the same name has already been written to in the same scope, otherwise it is parsed as a method call.
foo
^^^
@@ -2047,28 +3516,66 @@ nodes:
foo, bar = baz
^^^ ^^^
+
+ foo => baz
+ ^^^
- name: LocalVariableWriteNode
fields:
- name: name
type: constant
+ comment: |
+ The name of the local variable, which is an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+ foo = :bar # name `:foo`
+
+ abc = 123 # name `:abc`
- name: depth
type: uint32
+ comment: |
+ The number of semantic scopes we have to traverse to find the declaration of this variable.
+
+ foo = 1 # depth 0
+
+ tap { foo = 1 } # depth 1
+
+ The specific rules for calculating the depth may differ from individual Ruby implementations, as they are not specified by the language. For more information, see [the Prism documentation](https://github.com/ruby/prism/blob/main/docs/local_variable_depth.md).
- name: name_loc
type: location
+ comment: |
+ The location of the variable name.
+
+ foo = :bar
+ ^^^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the local variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ foo = :bar
+ ^^^^
+
+ abc = 1234
+ ^^^^
+
+ Note that since the name of a local variable is known before the value is parsed, it is valid for a local variable to appear within the value of its own write.
+
+ foo = foo
- name: operator_loc
type: location
+ comment: |
+ The location of the `=` operator.
+
+ x = :y
+ ^
comment: |
Represents writing to a local variable.
foo = 1
^^^^^^^
- name: MatchLastLineNode
+ flags: RegularExpressionFlags
fields:
- - name: flags
- type: flags
- kind: RegularExpressionFlags
- name: opening_loc
type: location
- name: content_loc
@@ -2078,9 +3585,7 @@ nodes:
- name: unescaped
type: string
comment: |
- Represents a regular expression literal used in the predicate of a
- conditional to implicitly match against the last line read by an IO
- object.
+ Represents a regular expression literal used in the predicate of a conditional to implicitly match against the last line read by an IO object.
if /foo/i then end
^^^^^^
@@ -2088,8 +3593,10 @@ nodes:
fields:
- name: value
type: node
+ kind: non-void expression
- name: pattern
type: node
+ kind: pattern expression
- name: operator_loc
type: location
comment: |
@@ -2101,10 +3608,66 @@ nodes:
fields:
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ Represents the left-hand side of the operator.
+
+ foo => bar
+ ^^^
- name: pattern
type: node
+ kind: pattern expression
+ comment: |
+ Represents the right-hand side of the operator. The type of the node depends on the expression.
+
+ Anything that looks like a local variable name (including `_`) will result in a `LocalVariableTargetNode`.
+
+ foo => a # This is equivalent to writing `a = foo`
+ ^
+
+ Using an explicit `Array` or combining expressions with `,` will result in a `ArrayPatternNode`. This can be preceded by a constant.
+
+ foo => [a]
+ ^^^
+
+ foo => a, b
+ ^^^^
+
+ foo => Bar[a, b]
+ ^^^^^^^^^
+
+ If the array pattern contains at least two wildcard matches, a `FindPatternNode` is created instead.
+
+ foo => *, 1, *a
+ ^^^^^
+
+ Using an explicit `Hash` or a constant with square brackets and hash keys in the square brackets will result in a `HashPatternNode`.
+
+ foo => { a: 1, b: }
+
+ foo => Bar[a: 1, b:]
+
+ foo => Bar[**]
+
+ To use any variable that needs run time evaluation, pinning is required. This results in a `PinnedVariableNode`
+
+ foo => ^a
+ ^^
+
+ Similar, any expression can be used with pinning. This results in a `PinnedExpressionNode`.
+
+ foo => ^(a + 1)
+
+ Anything else will result in the regular node for that expression, for example a `ConstantReadNode`.
+
+ foo => CONST
- name: operator_loc
type: location
+ comment: |
+ The location of the operator.
+
+ foo => bar
+ ^^
comment: |
Represents the use of the `=>` operator.
@@ -2117,16 +3680,15 @@ nodes:
kind: CallNode
- name: targets
type: node[]
+ kind: LocalVariableTargetNode
comment: |
- Represents writing local variables using a regular expression match with
- named capture groups.
+ Represents writing local variables using a regular expression match with named capture groups.
/(?<foo>bar)/ =~ baz
^^^^^^^^^^^^^^^^^^^^
- name: MissingNode
comment: |
- Represents a node that is missing from the source and results in a syntax
- error.
+ Represents a node that is missing from the source and results in a syntax error.
- name: ModuleNode
fields:
- name: locals
@@ -2135,8 +3697,15 @@ nodes:
type: location
- name: constant_path
type: node
+ kind:
+ - ConstantReadNode
+ - ConstantPathNode
+ - on error: MissingNode # module Parent module end
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
- name: end_keyword_loc
type: location
- name: name
@@ -2150,35 +3719,187 @@ nodes:
fields:
- name: lefts
type: node[]
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - MultiTargetNode
+ - RequiredParameterNode # def m((a,b)); end
+ - on error: BackReferenceReadNode # a, (b, $&) = z
+ - on error: NumberedReferenceReadNode # a, (b, $1) = z
+ comment: |
+ Represents the targets expressions before a splat node.
+
+ a, (b, c, *) = 1, 2, 3, 4, 5
+ ^^^^
+
+ The splat node can be absent, in that case all target expressions are in the left field.
+
+ a, (b, c) = 1, 2, 3, 4, 5
+ ^^^^
- name: rest
type: node?
+ kind:
+ - ImplicitRestNode
+ - SplatNode
+ comment: |
+ Represents a splat node in the target expression.
+
+ a, (b, *c) = 1, 2, 3, 4
+ ^^
+
+ The variable can be empty, this results in a `SplatNode` with a `nil` expression field.
+
+ a, (b, *) = 1, 2, 3, 4
+ ^
+
+ If the `*` is omitted, this field will contain an `ImplicitRestNode`
+
+ a, (b,) = 1, 2, 3, 4
+ ^
- name: rights
type: node[]
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - MultiTargetNode
+ - RequiredParameterNode # def m((*,b)); end
+ - on error: BackReferenceReadNode # a, (*, $&) = z
+ - on error: NumberedReferenceReadNode # a, (*, $1) = z
+ comment: |
+ Represents the targets expressions after a splat node.
+
+ a, (*, b, c) = 1, 2, 3, 4, 5
+ ^^^^
- name: lparen_loc
type: location?
+ comment: |
+ The location of the opening parenthesis.
+
+ a, (b, c) = 1, 2, 3
+ ^
- name: rparen_loc
type: location?
+ comment: |
+ The location of the closing parenthesis.
+
+ a, (b, c) = 1, 2, 3
+ ^
comment: |
Represents a multi-target expression.
a, (b, c) = 1, 2, 3
^^^^^^
+
+ This can be a part of `MultiWriteNode` as above, or the target of a `for` loop
+
+ for a, b in [[1, 2], [3, 4]]
+ ^^^^
- name: MultiWriteNode
fields:
- name: lefts
type: node[]
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - MultiTargetNode
+ - on error: BackReferenceReadNode # $&, = z
+ - on error: NumberedReferenceReadNode # $1, = z
+ comment: |
+ Represents the targets expressions before a splat node.
+
+ a, b, * = 1, 2, 3, 4, 5
+ ^^^^
+
+ The splat node can be absent, in that case all target expressions are in the left field.
+
+ a, b, c = 1, 2, 3, 4, 5
+ ^^^^^^^
- name: rest
type: node?
+ kind:
+ - ImplicitRestNode
+ - SplatNode
+ comment: |
+ Represents a splat node in the target expression.
+
+ a, b, *c = 1, 2, 3, 4
+ ^^
+
+ The variable can be empty, this results in a `SplatNode` with a `nil` expression field.
+
+ a, b, * = 1, 2, 3, 4
+ ^
+
+ If the `*` is omitted, this field will contain an `ImplicitRestNode`
+
+ a, b, = 1, 2, 3, 4
+ ^
- name: rights
type: node[]
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - MultiTargetNode
+ - on error: BackReferenceReadNode # *, $& = z
+ - on error: NumberedReferenceReadNode # *, $1 = z
+ comment: |
+ Represents the targets expressions after a splat node.
+
+ a, *, b, c = 1, 2, 3, 4, 5
+ ^^^^
- name: lparen_loc
type: location?
+ comment: |
+ The location of the opening parenthesis.
+
+ (a, b, c) = 1, 2, 3
+ ^
- name: rparen_loc
type: location?
+ comment: |
+ The location of the closing parenthesis.
+
+ (a, b, c) = 1, 2, 3
+ ^
- name: operator_loc
type: location
+ comment: |
+ The location of the operator.
+
+ a, b, c = 1, 2, 3
+ ^
- name: value
type: node
+ kind: non-void expression
+ comment: |
+ The value to write to the targets. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ a, b, c = 1, 2, 3
+ ^^^^^^^
comment: |
Represents a write to a multi-target expression.
@@ -2219,8 +3940,7 @@ nodes:
- name: maximum
type: uint8
comment: |
- Represents an implicit set of parameters through the use of numbered
- parameters within a block or lambda.
+ Represents an implicit set of parameters through the use of numbered parameters within a block or lambda.
-> { _1 + _2 }
^^^^^^^^^^^^^^
@@ -2228,22 +3948,29 @@ nodes:
fields:
- name: number
type: uint32
+ comment: |
+ The (1-indexed, from the left) number of the capture group. Numbered references that are too large result in this value being `0`.
+
+ $1 # number `1`
+
+ $5432 # number `5432`
+
+ $4294967296 # number `0`
comment: |
Represents reading a numbered reference to a capture in the previous match.
$1
^^
- name: OptionalKeywordParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant
- name: name_loc
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents an optional keyword parameter to a method, block, or lambda definition.
@@ -2251,10 +3978,8 @@ nodes:
^^^^
end
- name: OptionalParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant
- name: name_loc
@@ -2263,6 +3988,7 @@ nodes:
type: location
- name: value
type: node
+ kind: non-void expression
comment: |
Represents an optional parameter to a method, block, or lambda definition.
@@ -2273,10 +3999,33 @@ nodes:
fields:
- name: left
type: node
+ kind: non-void expression
+ comment: |
+ Represents the left side of the expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ left or right
+ ^^^^
+
+ 1 || 2
+ ^
- name: right
type: node
+ kind: Node
+ comment: |
+ Represents the right side of the expression.
+
+ left || right
+ ^^^^^
+
+ 1 or 2
+ ^
- name: operator_loc
type: location
+ comment: |
+ The location of the `or` keyword or the `||` operator.
+
+ left or right
+ ^^
comment: |
Represents the use of the `||` operator or the `or` keyword.
@@ -2286,16 +4035,38 @@ nodes:
fields:
- name: requireds
type: node[]
+ kind:
+ - RequiredParameterNode
+ - MultiTargetNode
- name: optionals
type: node[]
+ kind: OptionalParameterNode
- name: rest
type: node?
+ kind:
+ - RestParameterNode
+ - ImplicitRestNode # Only in block parameters
- name: posts
type: node[]
+ kind:
+ - RequiredParameterNode
+ - MultiTargetNode
+ # On parsing error of `f(**kwargs, ...)` or `f(**nil, ...)`, the keyword_rest value is moved here:
+ - on error: KeywordRestParameterNode
+ - on error: NoKeywordsParameterNode
+ # On parsing error of `f(..., ...)`, the first forwarding parameter is moved here:
+ - on error: ForwardingParameterNode
- name: keywords
type: node[]
+ kind:
+ - RequiredKeywordParameterNode
+ - OptionalKeywordParameterNode
- name: keyword_rest
type: node?
+ kind:
+ - KeywordRestParameterNode
+ - ForwardingParameterNode
+ - NoKeywordsParameterNode
- name: block
type: node?
kind: BlockParameterNode
@@ -2306,9 +4077,11 @@ nodes:
^^^^^^^
end
- name: ParenthesesNode
+ flags: ParenthesesNodeFlags
fields:
- name: body
type: node?
+ kind: non-void expression # Usually a StatementsNode but not always e.g. `1 in (..10)`
- name: opening_loc
type: location
- name: closing_loc
@@ -2323,15 +4096,35 @@ nodes:
fields:
- name: expression
type: node
+ kind: non-void expression
+ comment: |
+ The expression used in the pinned expression
+
+ foo in ^(bar)
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `^` operator
+
+ foo in ^(bar)
+ ^
- name: lparen_loc
type: location
+ comment: |
+ The location of the opening parenthesis.
+
+ foo in ^(bar)
+ ^
- name: rparen_loc
type: location
+ comment: |
+ The location of the closing parenthesis.
+
+ foo in ^(bar)
+ ^
comment: |
- Represents the use of the `^` operator for pinning an expression in a
- pattern matching expression.
+ Represents the use of the `^` operator for pinning an expression in a pattern matching expression.
foo in ^(bar)
^^^^^^
@@ -2339,11 +4132,29 @@ nodes:
fields:
- name: variable
type: node
+ kind:
+ - LocalVariableReadNode
+ - InstanceVariableReadNode
+ - ClassVariableReadNode
+ - GlobalVariableReadNode # foo in ^$a
+ - BackReferenceReadNode # foo in ^$&
+ - NumberedReferenceReadNode # foo in ^$1
+ - ItLocalVariableReadNode # proc { 1 in ^it }
+ - on error: MissingNode # foo in ^Bar
+ comment: |
+ The variable used in the pinned expression
+
+ foo in ^bar
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ The location of the `^` operator
+
+ foo in ^bar
+ ^
comment: |
- Represents the use of the `^` operator for pinning a variable in a pattern
- matching expression.
+ Represents the use of the `^` operator for pinning a variable in a pattern matching expression.
foo in ^bar
^^^^
@@ -2388,22 +4199,13 @@ nodes:
kind: StatementsNode
comment: The top level node of any parse tree.
- name: RangeNode
+ flags: RangeFlags
fields:
- - name: flags
- type: flags
- kind: RangeFlags
- comment: |
- A flag indicating whether the range excludes the end value.
-
- 1..3 # includes 3
-
- 1...3 # excludes 3
- name: left
type: node?
+ kind: non-void expression
comment: |
- The left-hand side of the range, if present. Can be either `nil` or
- a node representing any kind of expression that returns a non-void
- value.
+ The left-hand side of the range, if present. It can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
1...
^
@@ -2412,18 +4214,16 @@ nodes:
^^^^^
- name: right
type: node?
+ kind: non-void expression
comment: |
- The right-hand side of the range, if present. Can be either `nil` or
- a node representing any kind of expression that returns a non-void
- value.
+ The right-hand side of the range, if present. It can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
..5
^
1...foo
^^^
- If neither right-hand or left-hand side was included, this will be a
- MissingNode.
+ If neither right-hand or left-hand side was included, this will be a MissingNode.
- name: operator_loc
type: location
comment: |
@@ -2437,9 +4237,20 @@ nodes:
c if a =~ /left/ ... b =~ /right/
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- name: RationalNode
+ flags: IntegerBaseFlags
fields:
- - name: numeric
- type: node
+ - name: numerator
+ type: integer
+ comment: |
+ The numerator of the rational number.
+
+ 1.5r # numerator 3
+ - name: denominator
+ type: integer
+ comment: |
+ The denominator of the rational number.
+
+ 1.5r # denominator 2
comment: |
Represents a rational number literal.
@@ -2452,10 +4263,8 @@ nodes:
redo
^^^^
- name: RegularExpressionNode
+ flags: RegularExpressionFlags
fields:
- - name: flags
- type: flags
- kind: RegularExpressionFlags
- name: opening_loc
type: location
- name: content_loc
@@ -2470,10 +4279,8 @@ nodes:
/foo/i
^^^^^^
- name: RequiredKeywordParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant
- name: name_loc
@@ -2485,10 +4292,8 @@ nodes:
^^
end
- name: RequiredParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant
comment: |
@@ -2501,10 +4306,12 @@ nodes:
fields:
- name: expression
type: node
+ kind: Node
- name: keyword_loc
type: location
- name: rescue_expression
type: node
+ kind: Node
newline: expression
comment: |
Represents an expression modified with a rescue.
@@ -2517,14 +4324,29 @@ nodes:
type: location
- name: exceptions
type: node[]
+ kind: non-void expression
- name: operator_loc
type: location?
- name: reference
type: node?
+ kind:
+ - LocalVariableTargetNode
+ - InstanceVariableTargetNode
+ - ClassVariableTargetNode
+ - GlobalVariableTargetNode
+ - ConstantTargetNode
+ - ConstantPathTargetNode
+ - CallTargetNode
+ - IndexTargetNode
+ - on error: BackReferenceReadNode # => begin; rescue => $&; end
+ - on error: NumberedReferenceReadNode # => begin; rescue => $1; end
+ - on error: MissingNode # begin; rescue =>; end
+ - name: then_keyword_loc
+ type: location?
- name: statements
type: node?
kind: StatementsNode
- - name: consequent
+ - name: subsequent
type: node?
kind: RescueNode
comment: |
@@ -2536,13 +4358,10 @@ nodes:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
end
- `Foo, *splat, Bar` are in the `exceptions` field.
- `ex` is in the `exception` field.
+ `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
- name: RestParameterNode
+ flags: ParameterFlags
fields:
- - name: flags
- type: flags
- kind: ParameterFlags
- name: name
type: constant?
- name: name_loc
@@ -2579,6 +4398,27 @@ nodes:
self
^^^^
+ - name: ShareableConstantNode
+ flags: ShareableConstantNodeFlags
+ fields:
+ - name: write
+ type: node
+ kind:
+ - ConstantWriteNode
+ - ConstantAndWriteNode
+ - ConstantOrWriteNode
+ - ConstantOperatorWriteNode
+ - ConstantPathWriteNode
+ - ConstantPathAndWriteNode
+ - ConstantPathOrWriteNode
+ - ConstantPathOperatorWriteNode
+ comment: The constant write that should be modified with the shareability state.
+ comment: |
+ This node wraps a constant write to indicate that when the value is written, it should have its shareability state modified.
+
+ # shareable_constant_value: literal
+ C = { a: 1 }
+ ^^^^^^^^^^^^
- name: SingletonClassNode
fields:
- name: locals
@@ -2589,8 +4429,12 @@ nodes:
type: location
- name: expression
type: node
+ kind: non-void expression
- name: body
type: node?
+ kind:
+ - StatementsNode
+ - BeginNode
- name: end_keyword_loc
type: location
comment: |
@@ -2605,9 +4449,11 @@ nodes:
__ENCODING__
^^^^^^^^^^^^
- name: SourceFileNode
+ flags: StringFlags
fields:
- name: filepath
type: string
+ comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism::parse*` APIs.
comment: |
Represents the use of the `__FILE__` keyword.
@@ -2625,6 +4471,7 @@ nodes:
type: location
- name: expression
type: node?
+ kind: non-void expression
comment: |
Represents the use of the splat operator.
@@ -2634,16 +4481,15 @@ nodes:
fields:
- name: body
type: node[]
+ kind: Node
comment: |
Represents a set of statements contained within some scope.
foo; bar; baz
^^^^^^^^^^^^^
- name: StringNode
+ flags: StringFlags
fields:
- - name: flags
- type: flags
- kind: StringFlags
- name: opening_loc
type: location?
- name: content_loc
@@ -2653,8 +4499,7 @@ nodes:
- name: unescaped
type: string
comment: |
- Represents a string literal, a string contained within a `%w` list, or
- plain string content within an interpolated string.
+ Represents a string literal, a string contained within a `%w` list, or plain string content within an interpolated string.
"foo"
^^^^^
@@ -2673,10 +4518,14 @@ nodes:
- name: arguments
type: node?
kind: ArgumentsNode
+ comment: "Can be only `nil` when there are empty parentheses, like `super()`."
- name: rparen_loc
type: location?
- name: block
type: node?
+ kind:
+ - BlockNode
+ - BlockArgumentNode
comment: |
Represents the use of the `super` keyword with parentheses or arguments.
@@ -2685,11 +4534,11 @@ nodes:
super foo, bar
^^^^^^^^^^^^^^
+
+ If no arguments are provided (except for a block), it would be a `ForwardingSuperNode` instead.
- name: SymbolNode
+ flags: SymbolFlags
fields:
- - name: flags
- type: flags
- kind: SymbolFlags
- name: opening_loc
type: location?
- name: value_loc
@@ -2716,6 +4565,9 @@ nodes:
fields:
- name: names
type: node[]
+ kind:
+ - SymbolNode
+ - InterpolatedSymbolNode
- name: keyword_loc
type: location
comment: |
@@ -2727,18 +4579,56 @@ nodes:
fields:
- name: keyword_loc
type: location
+ comment: |
+ The location of the `unless` keyword.
+
+ unless cond then bar end
+ ^^^^^^
+
+ bar unless cond
+ ^^^^^^
- name: predicate
type: node
+ kind: non-void expression
+ comment: |
+ The condition to be evaluated for the unless expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+ unless cond then bar end
+ ^^^^
+
+ bar unless cond
+ ^^^^
- name: then_keyword_loc
type: location?
+ comment: |
+ The location of the `then` keyword, if present.
+
+ unless cond then bar end
+ ^^^^
- name: statements
type: node?
kind: StatementsNode
- - name: consequent
+ comment: |
+ The body of statements that will executed if the unless condition is
+ falsey. Will be `nil` if no body is provided.
+
+ unless cond then bar end
+ ^^^
+ - name: else_clause
type: node?
kind: ElseNode
+ comment: |
+ The else clause of the unless expression, if present.
+
+ unless cond then bar else baz end
+ ^^^^^^^^
- name: end_keyword_loc
type: location?
+ comment: |
+ The location of the `end` keyword, if present.
+
+ unless cond then bar end
+ ^^^
newline: predicate
comment: |
Represents the use of the `unless` keyword, either in the block form or the modifier form.
@@ -2749,16 +4639,17 @@ nodes:
unless foo then bar end
^^^^^^^^^^^^^^^^^^^^^^^
- name: UntilNode
+ flags: LoopFlags
fields:
- - name: flags
- type: flags
- kind: LoopFlags
- name: keyword_loc
type: location
+ - name: do_keyword_loc
+ type: location?
- name: closing_loc
type: location?
- name: predicate
type: node
+ kind: non-void expression
- name: statements
type: node?
kind: StatementsNode
@@ -2777,6 +4668,9 @@ nodes:
type: location
- name: conditions
type: node[]
+ kind: non-void expression
+ - name: then_keyword_loc
+ type: location?
- name: statements
type: node?
kind: StatementsNode
@@ -2788,16 +4682,17 @@ nodes:
^^^^^^^^^
end
- name: WhileNode
+ flags: LoopFlags
fields:
- - name: flags
- type: flags
- kind: LoopFlags
- name: keyword_loc
type: location
+ - name: do_keyword_loc
+ type: location?
- name: closing_loc
type: location?
- name: predicate
type: node
+ kind: non-void expression
- name: statements
type: node?
kind: StatementsNode
@@ -2811,10 +4706,8 @@ nodes:
while foo do bar end
^^^^^^^^^^^^^^^^^^^^
- name: XStringNode
+ flags: EncodingFlags
fields:
- - name: flags
- type: flags
- kind: EncodingFlags
- name: opening_loc
type: location
- name: content_loc
diff --git a/prism/defines.h b/prism/defines.h
index c9715c4eb0..e31429c789 100644
--- a/prism/defines.h
+++ b/prism/defines.h
@@ -10,6 +10,8 @@
#define PRISM_DEFINES_H
#include <ctype.h>
+#include <limits.h>
+#include <math.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
@@ -17,6 +19,25 @@
#include <string.h>
/**
+ * We want to be able to use the PRI* macros for printing out integers, but on
+ * some platforms they aren't included unless this is already defined.
+ */
+#define __STDC_FORMAT_MACROS
+// Include sys/types.h before inttypes.h to work around issue with
+// certain versions of GCC and newlib which causes omission of PRIx64
+#include <sys/types.h>
+#include <inttypes.h>
+
+/**
+ * When we are parsing using recursive descent, we want to protect against
+ * malicious payloads that could attempt to crash our parser. We do this by
+ * specifying a maximum depth to which we are allowed to recurse.
+ */
+#ifndef PRISM_DEPTH_MAXIMUM
+ #define PRISM_DEPTH_MAXIMUM 10000
+#endif
+
+/**
* By default, we compile with -fvisibility=hidden. When this is enabled, we
* need to mark certain functions as being publically-visible. This macro does
* that in a compiler-agnostic way.
@@ -40,7 +61,11 @@
* compiler-agnostic way.
*/
#if defined(__GNUC__)
-# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
+# if defined(__MINGW_PRINTF_FORMAT)
+# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index)))
+# else
+# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
+# endif
#elif defined(__clang__)
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
#else
@@ -91,4 +116,145 @@
# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
#endif
+/**
+ * In general, libc for embedded systems does not support memory-mapped files.
+ * If the target platform is POSIX or Windows, we can map a file in memory and
+ * read it in a more efficient manner.
+ */
+#ifdef _WIN32
+# define PRISM_HAS_MMAP
+#else
+# include <unistd.h>
+# ifdef _POSIX_MAPPED_FILES
+# define PRISM_HAS_MMAP
+# endif
+#endif
+
+/**
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
+ * related code from the library. All filesystem related code should be guarded
+ * by PRISM_HAS_FILESYSTEM.
+ */
+#ifndef PRISM_HAS_NO_FILESYSTEM
+# define PRISM_HAS_FILESYSTEM
+#endif
+
+/**
+ * isinf on POSIX systems it accepts a float, a double, or a long double.
+ * But mingw didn't provide an isinf macro, only an isinf function that only
+ * accepts floats, so we need to use _finite instead.
+ */
+#ifdef __MINGW64__
+ #include <float.h>
+ #define PRISM_ISINF(x) (!_finite(x))
+#else
+ #define PRISM_ISINF(x) isinf(x)
+#endif
+
+/**
+ * If you build prism with a custom allocator, configure it with
+ * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
+ * xrealloc, xcalloc, and xfree.
+ *
+ * For example, your `prism_xallocator.h` file could look like this:
+ *
+ * ```
+ * #ifndef PRISM_XALLOCATOR_H
+ * #define PRISM_XALLOCATOR_H
+ * #define xmalloc my_malloc
+ * #define xrealloc my_realloc
+ * #define xcalloc my_calloc
+ * #define xfree my_free
+ * #endif
+ * ```
+ */
+#ifdef PRISM_XALLOCATOR
+ #include "prism_xallocator.h"
+#else
+ #ifndef xmalloc
+ /**
+ * The malloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define.
+ */
+ #define xmalloc malloc
+ #endif
+
+ #ifndef xrealloc
+ /**
+ * The realloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define.
+ */
+ #define xrealloc realloc
+ #endif
+
+ #ifndef xcalloc
+ /**
+ * The calloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define.
+ */
+ #define xcalloc calloc
+ #endif
+
+ #ifndef xfree
+ /**
+ * The free function that should be used. This can be overridden with the
+ * PRISM_XALLOCATOR define.
+ */
+ #define xfree free
+ #endif
+#endif
+
+/**
+ * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
+ * switch that will turn off certain features of prism.
+ */
+#ifdef PRISM_BUILD_MINIMAL
+ /** Exclude the serialization API. */
+ #define PRISM_EXCLUDE_SERIALIZATION
+
+ /** Exclude the JSON serialization API. */
+ #define PRISM_EXCLUDE_JSON
+
+ /** Exclude the Array#pack parser API. */
+ #define PRISM_EXCLUDE_PACK
+
+ /** Exclude the prettyprint API. */
+ #define PRISM_EXCLUDE_PRETTYPRINT
+
+ /** Exclude the full set of encodings, using the minimal only. */
+ #define PRISM_ENCODING_EXCLUDE_FULL
+#endif
+
+/**
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
+ * branch predication.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+ /** The compiler should predicate that this branch will be taken. */
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
+
+ /** The compiler should predicate that this branch will not be taken. */
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_LIKELY(x) (x)
+
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_UNLIKELY(x) (x)
+#endif
+
+/**
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
+ * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
+ #define PRISM_FALLTHROUGH [[fallthrough]];
+#elif defined(__GNUC__) || defined(__clang__)
+ #define PRISM_FALLTHROUGH __attribute__((fallthrough));
+#elif defined(_MSC_VER)
+ #define PRISM_FALLTHROUGH __fallthrough;
+#else
+ #define PRISM_FALLTHROUGH
+#endif
+
#endif
diff --git a/prism/diagnostic.c b/prism/diagnostic.c
deleted file mode 100644
index c779955eb3..0000000000
--- a/prism/diagnostic.c
+++ /dev/null
@@ -1,358 +0,0 @@
-#include "prism/diagnostic.h"
-
-/**
- * ## Message composition
- *
- * When composing an error message, use sentence fragments.
- *
- * Try describing the property of the code that caused the error, rather than
- * the rule that is being violated. It may help to use a fragment that completes
- * a sentence beginning, "the parser encountered (a) ...". If appropriate, add a
- * description of the rule violation (or other helpful context) after a
- * semicolon.
- *
- * For example:, instead of "control escape sequence cannot be doubled", prefer:
- *
- * > "invalid control escape sequence; control cannot be repeated"
- *
- * In some cases, where the failure is more general or syntax expectations are
- * violated, it may make more sense to use a fragment that completes a sentence
- * beginning, "the parser ...".
- *
- * For example:
- *
- * > "expected an expression after `(`"
- * > "cannot parse the expression"
- *
- * ## Message style guide
- *
- * - Use articles like "a", "an", and "the" when appropriate.
- * - e.g., prefer "cannot parse the expression" to "cannot parse expression".
- * - Use the common name for tokens and nodes.
- * - e.g., prefer "keyword splat" to "assoc splat"
- * - e.g., prefer "embedded document" to "embdoc"
- * - Do not capitalize the initial word of the message.
- * - Use back ticks around token literals
- * - e.g., "Expected a `=>` between the hash key and value"
- * - Do not use `.` or other punctuation at the end of the message.
- * - Do not use contractions like "can't". Prefer "cannot" to "can not".
- * - For tokens that can have multiple meanings, reference the token and its meaning.
- * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
- *
- * ## Error names (PM_ERR_*)
- *
- * - When appropriate, prefer node name to token name.
- * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
- * - Prefer token name to common name.
- * - e.g., prefer "STAR" to "ASTERISK".
- * - Try to order the words in the name from more general to more specific,
- * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
- * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
- * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
- */
-static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
- [PM_ERR_ALIAS_ARGUMENT] = "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
- [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "unexpected `&&=` in a multiple assignment",
- [PM_ERR_ARGUMENT_AFTER_BLOCK] = "unexpected argument after a block argument",
- [PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES] = "unexpected argument after `...`",
- [PM_ERR_ARGUMENT_BARE_HASH] = "unexpected bare hash argument",
- [PM_ERR_ARGUMENT_BLOCK_FORWARDING] = "both a block argument and a forwarding argument; only one block is allowed",
- [PM_ERR_ARGUMENT_BLOCK_MULTI] = "multiple block arguments; only one block is allowed",
- [PM_ERR_ARGUMENT_FORMAL_CLASS] = "invalid formal argument; formal argument cannot be a class variable",
- [PM_ERR_ARGUMENT_FORMAL_CONSTANT] = "invalid formal argument; formal argument cannot be a constant",
- [PM_ERR_ARGUMENT_FORMAL_GLOBAL] = "invalid formal argument; formal argument cannot be a global variable",
- [PM_ERR_ARGUMENT_FORMAL_IVAR] = "invalid formal argument; formal argument cannot be an instance variable",
- [PM_ERR_ARGUMENT_FORWARDING_UNBOUND] = "unexpected `...` in an non-parenthesized call",
- [PM_ERR_ARGUMENT_IN] = "unexpected `in` keyword in arguments",
- [PM_ERR_ARGUMENT_NO_FORWARDING_AMP] = "unexpected `&` when the parent method is not forwarding",
- [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = "unexpected `...` when the parent method is not forwarding",
- [PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = "unexpected `*` when the parent method is not forwarding",
- [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = "unexpected `*` splat argument after a `**` keyword splat argument",
- [PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = "unexpected `*` splat argument after a `*` splat argument",
- [PM_ERR_ARGUMENT_TERM_PAREN] = "expected a `)` to close the arguments",
- [PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = "unexpected `{` after a method call without parenthesis",
- [PM_ERR_ARRAY_ELEMENT] = "expected an element for the array",
- [PM_ERR_ARRAY_EXPRESSION] = "expected an expression for the array element",
- [PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = "expected an expression after `*` in the array",
- [PM_ERR_ARRAY_SEPARATOR] = "expected a `,` separator for the array elements",
- [PM_ERR_ARRAY_TERM] = "expected a `]` to close the array",
- [PM_ERR_BEGIN_LONELY_ELSE] = "unexpected `else` in `begin` block; a `rescue` clause must precede `else`",
- [PM_ERR_BEGIN_TERM] = "expected an `end` to close the `begin` statement",
- [PM_ERR_BEGIN_UPCASE_BRACE] = "expected a `{` after `BEGIN`",
- [PM_ERR_BEGIN_UPCASE_TERM] = "expected a `}` to close the `BEGIN` statement",
- [PM_ERR_BEGIN_UPCASE_TOPLEVEL] = "BEGIN is permitted only at toplevel",
- [PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = "expected a local variable name in the block parameters",
- [PM_ERR_BLOCK_PARAM_PIPE_TERM] = "expected the block parameters to end with `|`",
- [PM_ERR_BLOCK_TERM_BRACE] = "expected a block beginning with `{` to end with `}`",
- [PM_ERR_BLOCK_TERM_END] = "expected a block beginning with `do` to end with `end`",
- [PM_ERR_CANNOT_PARSE_EXPRESSION] = "cannot parse the expression",
- [PM_ERR_CANNOT_PARSE_STRING_PART] = "cannot parse the string part",
- [PM_ERR_CASE_EXPRESSION_AFTER_CASE] = "expected an expression after `case`",
- [PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = "expected an expression after `when`",
- [PM_ERR_CASE_MATCH_MISSING_PREDICATE] = "expected a predicate for a case matching statement",
- [PM_ERR_CASE_MISSING_CONDITIONS] = "expected a `when` or `in` clause after `case`",
- [PM_ERR_CASE_TERM] = "expected an `end` to close the `case` statement",
- [PM_ERR_CLASS_IN_METHOD] = "unexpected class definition in a method definition",
- [PM_ERR_CLASS_NAME] = "expected a constant name after `class`",
- [PM_ERR_CLASS_SUPERCLASS] = "expected a superclass after `<`",
- [PM_ERR_CLASS_TERM] = "expected an `end` to close the `class` statement",
- [PM_ERR_CLASS_UNEXPECTED_END] = "unexpected `end`, expecting ';' or '\\n'",
- [PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = "expected a predicate expression for the `elsif` statement",
- [PM_ERR_CONDITIONAL_IF_PREDICATE] = "expected a predicate expression for the `if` statement",
- [PM_ERR_CONDITIONAL_PREDICATE_TERM] = "expected `then` or `;` or '\\n'",
- [PM_ERR_CONDITIONAL_TERM] = "expected an `end` to close the conditional clause",
- [PM_ERR_CONDITIONAL_TERM_ELSE] = "expected an `end` to close the `else` clause",
- [PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = "expected a predicate expression for the `unless` statement",
- [PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = "expected a predicate expression for the `until` statement",
- [PM_ERR_CONDITIONAL_WHILE_PREDICATE] = "expected a predicate expression for the `while` statement",
- [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = "expected a constant after the `::` operator",
- [PM_ERR_DEF_ENDLESS] = "could not parse the endless method body",
- [PM_ERR_DEF_ENDLESS_SETTER] = "invalid method name; a setter method cannot be defined in an endless method definition",
- [PM_ERR_DEF_NAME] = "expected a method name",
- [PM_ERR_DEF_NAME_AFTER_RECEIVER] = "expected a method name after the receiver",
- [PM_ERR_DEF_PARAMS_TERM] = "expected a delimiter to close the parameters",
- [PM_ERR_DEF_PARAMS_TERM_PAREN] = "expected a `)` to close the parameters",
- [PM_ERR_DEF_RECEIVER] = "expected a receiver for the method definition",
- [PM_ERR_DEF_RECEIVER_TERM] = "expected a `.` or `::` after the receiver in a method definition",
- [PM_ERR_DEF_TERM] = "expected an `end` to close the `def` statement",
- [PM_ERR_DEFINED_EXPRESSION] = "expected an expression after `defined?`",
- [PM_ERR_EMBDOC_TERM] = "could not find a terminator for the embedded document",
- [PM_ERR_EMBEXPR_END] = "expected a `}` to close the embedded expression",
- [PM_ERR_EMBVAR_INVALID] = "invalid embedded variable",
- [PM_ERR_END_UPCASE_BRACE] = "expected a `{` after `END`",
- [PM_ERR_END_UPCASE_TERM] = "expected a `}` to close the `END` statement",
- [PM_ERR_ESCAPE_INVALID_CONTROL] = "invalid control escape sequence",
- [PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = "invalid control escape sequence; control cannot be repeated",
- [PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = "invalid hexadecimal escape sequence",
- [PM_ERR_ESCAPE_INVALID_META] = "invalid meta escape sequence",
- [PM_ERR_ESCAPE_INVALID_META_REPEAT] = "invalid meta escape sequence; meta cannot be repeated",
- [PM_ERR_ESCAPE_INVALID_UNICODE] = "invalid Unicode escape sequence",
- [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags",
- [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = "invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal",
- [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = "invalid Unicode escape sequence; maximum length is 6 digits",
- [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = "invalid Unicode escape sequence; needs closing `}`",
- [PM_ERR_EXPECT_ARGUMENT] = "expected an argument",
- [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = "expected a newline or semicolon after the statement",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = "expected an expression after `&&=`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = "expected an expression after `||=`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = "expected an expression after `,`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = "expected an expression after `=`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = "expected an expression after `<<`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = "expected an expression after `(`",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = "expected an expression after the operator",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = "expected an expression after `*` splat in an argument",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = "expected an expression after `**` in a hash",
- [PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = "expected an expression after `*`",
- [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = "expected an identifier for the required parameter",
- [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = "expected a `(` to start a required parameter",
- [PM_ERR_EXPECT_RBRACKET] = "expected a matching `]`",
- [PM_ERR_EXPECT_RPAREN] = "expected a matching `)`",
- [PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = "expected a `)` after multiple assignment",
- [PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = "expected a `)` to end a required parameter",
- [PM_ERR_EXPECT_STRING_CONTENT] = "expected string content after opening string delimiter",
- [PM_ERR_EXPECT_WHEN_DELIMITER] = "expected a delimiter after the predicates of a `when` clause",
- [PM_ERR_EXPRESSION_BARE_HASH] = "unexpected bare hash in expression",
- [PM_ERR_FOR_COLLECTION] = "expected a collection after the `in` in a `for` statement",
- [PM_ERR_FOR_INDEX] = "expected an index after `for`",
- [PM_ERR_FOR_IN] = "expected an `in` after the index in a `for` statement",
- [PM_ERR_FOR_TERM] = "expected an `end` to close the `for` loop",
- [PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = "expected an expression after the label in a hash",
- [PM_ERR_HASH_KEY] = "expected a key in the hash literal",
- [PM_ERR_HASH_ROCKET] = "expected a `=>` between the hash key and value",
- [PM_ERR_HASH_TERM] = "expected a `}` to close the hash literal",
- [PM_ERR_HASH_VALUE] = "expected a value in the hash literal",
- [PM_ERR_HEREDOC_TERM] = "could not find a terminator for the heredoc",
- [PM_ERR_INCOMPLETE_QUESTION_MARK] = "incomplete expression at `?`",
- [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = "incomplete class variable",
- [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = "incomplete instance variable",
- [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = "unknown or invalid encoding in the magic comment",
- [PM_ERR_INVALID_FLOAT_EXPONENT] = "invalid exponent",
- [PM_ERR_INVALID_NUMBER_BINARY] = "invalid binary number",
- [PM_ERR_INVALID_NUMBER_DECIMAL] = "invalid decimal number",
- [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = "invalid hexadecimal number",
- [PM_ERR_INVALID_NUMBER_OCTAL] = "invalid octal number",
- [PM_ERR_INVALID_NUMBER_UNDERSCORE] = "invalid underscore placement in number",
- [PM_ERR_INVALID_PERCENT] = "invalid `%` token", // TODO WHAT?
- [PM_ERR_INVALID_TOKEN] = "invalid token", // TODO WHAT?
- [PM_ERR_INVALID_VARIABLE_GLOBAL] = "invalid global variable",
- [PM_ERR_LAMBDA_OPEN] = "expected a `do` keyword or a `{` to open the lambda block",
- [PM_ERR_LAMBDA_TERM_BRACE] = "expected a lambda block beginning with `{` to end with `}`",
- [PM_ERR_LAMBDA_TERM_END] = "expected a lambda block beginning with `do` to end with `end`",
- [PM_ERR_LIST_I_LOWER_ELEMENT] = "expected a symbol in a `%i` list",
- [PM_ERR_LIST_I_LOWER_TERM] = "expected a closing delimiter for the `%i` list",
- [PM_ERR_LIST_I_UPPER_ELEMENT] = "expected a symbol in a `%I` list",
- [PM_ERR_LIST_I_UPPER_TERM] = "expected a closing delimiter for the `%I` list",
- [PM_ERR_LIST_W_LOWER_ELEMENT] = "expected a string in a `%w` list",
- [PM_ERR_LIST_W_LOWER_TERM] = "expected a closing delimiter for the `%w` list",
- [PM_ERR_LIST_W_UPPER_ELEMENT] = "expected a string in a `%W` list",
- [PM_ERR_LIST_W_UPPER_TERM] = "expected a closing delimiter for the `%W` list",
- [PM_ERR_MALLOC_FAILED] = "failed to allocate memory",
- [PM_ERR_MIXED_ENCODING] = "UTF-8 mixed within %s source",
- [PM_ERR_MODULE_IN_METHOD] = "unexpected module definition in a method definition",
- [PM_ERR_MODULE_NAME] = "expected a constant name after `module`",
- [PM_ERR_MODULE_TERM] = "expected an `end` to close the `module` statement",
- [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = "multiple splats in multiple assignment",
- [PM_ERR_NOT_EXPRESSION] = "expected an expression after `not`",
- [PM_ERR_NO_LOCAL_VARIABLE] = "%.*s: no such local variable",
- [PM_ERR_NUMBER_LITERAL_UNDERSCORE] = "number literal ending with a `_`",
- [PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED] = "numbered parameters are not allowed when an ordinary parameter is defined",
- [PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE] = "numbered parameter is already used in outer scope",
- [PM_ERR_OPERATOR_MULTI_ASSIGN] = "unexpected operator for a multiple assignment",
- [PM_ERR_OPERATOR_WRITE_ARGUMENTS] = "unexpected operator after a call with arguments",
- [PM_ERR_OPERATOR_WRITE_BLOCK] = "unexpected operator after a call with a block",
- [PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = "unexpected multiple `**` splat parameters",
- [PM_ERR_PARAMETER_BLOCK_MULTI] = "multiple block parameters; only one block is allowed",
- [PM_ERR_PARAMETER_CIRCULAR] = "parameter default value references itself",
- [PM_ERR_PARAMETER_METHOD_NAME] = "unexpected name for a parameter",
- [PM_ERR_PARAMETER_NAME_REPEAT] = "repeated parameter name",
- [PM_ERR_PARAMETER_NO_DEFAULT] = "expected a default value for the parameter",
- [PM_ERR_PARAMETER_NO_DEFAULT_KW] = "expected a default value for the keyword parameter",
- [PM_ERR_PARAMETER_NUMBERED_RESERVED] = "%.2s is reserved for numbered parameters",
- [PM_ERR_PARAMETER_ORDER] = "unexpected parameter order",
- [PM_ERR_PARAMETER_SPLAT_MULTI] = "unexpected multiple `*` splat parameters",
- [PM_ERR_PARAMETER_STAR] = "unexpected parameter `*`",
- [PM_ERR_PARAMETER_UNEXPECTED_FWD] = "unexpected `...` in parameters",
- [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = "unexpected `,` in parameters",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = "expected a pattern expression after the `[` operator",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = "expected a pattern expression after `,`",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = "expected a pattern expression after `=>`",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = "expected a pattern expression after the `in` keyword",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = "expected a pattern expression after the key",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = "expected a pattern expression after the `(` operator",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = "expected a pattern expression after the `^` pin operator",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = "expected a pattern expression after the `|` operator",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = "expected a pattern expression after the range operator",
- [PM_ERR_PATTERN_EXPRESSION_AFTER_REST] = "unexpected pattern expression after the `**` expression",
- [PM_ERR_PATTERN_HASH_KEY] = "expected a key in the hash pattern",
- [PM_ERR_PATTERN_HASH_KEY_LABEL] = "expected a label as the key in the hash pattern", // TODO // THIS // AND // ABOVE // IS WEIRD
- [PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = "expected an identifier after the `=>` operator",
- [PM_ERR_PATTERN_LABEL_AFTER_COMMA] = "expected a label after the `,` in the hash pattern",
- [PM_ERR_PATTERN_REST] = "unexpected rest pattern",
- [PM_ERR_PATTERN_TERM_BRACE] = "expected a `}` to close the pattern expression",
- [PM_ERR_PATTERN_TERM_BRACKET] = "expected a `]` to close the pattern expression",
- [PM_ERR_PATTERN_TERM_PAREN] = "expected a `)` to close the pattern expression",
- [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = "unexpected `||=` in a multiple assignment",
- [PM_ERR_REGEXP_TERM] = "expected a closing delimiter for the regular expression",
- [PM_ERR_RESCUE_EXPRESSION] = "expected a rescued expression",
- [PM_ERR_RESCUE_MODIFIER_VALUE] = "expected a value after the `rescue` modifier",
- [PM_ERR_RESCUE_TERM] = "expected a closing delimiter for the `rescue` clause",
- [PM_ERR_RESCUE_VARIABLE] = "expected an exception variable after `=>` in a rescue statement",
- [PM_ERR_RETURN_INVALID] = "invalid `return` in a class or module body",
- [PM_ERR_STATEMENT_ALIAS] = "unexpected an `alias` at a non-statement position",
- [PM_ERR_STATEMENT_POSTEXE_END] = "unexpected an `END` at a non-statement position",
- [PM_ERR_STATEMENT_PREEXE_BEGIN] = "unexpected a `BEGIN` at a non-statement position",
- [PM_ERR_STATEMENT_UNDEF] = "unexpected an `undef` at a non-statement position",
- [PM_ERR_STRING_CONCATENATION] = "expected a string for concatenation",
- [PM_ERR_STRING_INTERPOLATED_TERM] = "expected a closing delimiter for the interpolated string",
- [PM_ERR_STRING_LITERAL_TERM] = "expected a closing delimiter for the string literal",
- [PM_ERR_SYMBOL_INVALID] = "invalid symbol", // TODO expected symbol? prism.c ~9719
- [PM_ERR_SYMBOL_TERM_DYNAMIC] = "expected a closing delimiter for the dynamic symbol",
- [PM_ERR_SYMBOL_TERM_INTERPOLATED] = "expected a closing delimiter for the interpolated symbol",
- [PM_ERR_TERNARY_COLON] = "expected a `:` after the true expression of a ternary operator",
- [PM_ERR_TERNARY_EXPRESSION_FALSE] = "expected an expression after `:` in the ternary operator",
- [PM_ERR_TERNARY_EXPRESSION_TRUE] = "expected an expression after `?` in the ternary operator",
- [PM_ERR_UNDEF_ARGUMENT] = "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument",
- [PM_ERR_UNARY_RECEIVER_BANG] = "expected a receiver for unary `!`",
- [PM_ERR_UNARY_RECEIVER_MINUS] = "expected a receiver for unary `-`",
- [PM_ERR_UNARY_RECEIVER_PLUS] = "expected a receiver for unary `+`",
- [PM_ERR_UNARY_RECEIVER_TILDE] = "expected a receiver for unary `~`",
- [PM_ERR_UNTIL_TERM] = "expected an `end` to close the `until` statement",
- [PM_ERR_VOID_EXPRESSION] = "unexpected void value expression",
- [PM_ERR_WHILE_TERM] = "expected an `end` to close the `while` statement",
- [PM_ERR_WRITE_TARGET_IN_METHOD] = "dynamic constant assignment",
- [PM_ERR_WRITE_TARGET_READONLY] = "immutable variable as a write target",
- [PM_ERR_WRITE_TARGET_UNEXPECTED] = "unexpected write target",
- [PM_ERR_XSTRING_TERM] = "expected a closing delimiter for the `%x` or backtick string",
- [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = "ambiguous first argument; put parentheses or a space even after `-` operator",
- [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = "ambiguous first argument; put parentheses or a space even after `+` operator",
- [PM_WARN_AMBIGUOUS_PREFIX_STAR] = "ambiguous `*` has been interpreted as an argument prefix",
- [PM_WARN_AMBIGUOUS_SLASH] = "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator",
- [PM_WARN_END_IN_METHOD] = "END in method; use at_exit",
-};
-
-static const char*
-pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
- assert(diag_id < PM_DIAGNOSTIC_ID_LEN);
-
- const char *message = diagnostic_messages[diag_id];
- assert(message);
-
- return message;
-}
-
-/**
- * Append an error to the given list of diagnostic.
- */
-bool
-pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
- if (diagnostic == NULL) return false;
-
- *diagnostic = (pm_diagnostic_t) {
- .location = { start, end },
- .message = pm_diagnostic_message(diag_id),
- .owned = false
- };
-
- pm_list_append(list, (pm_list_node_t *) diagnostic);
- return true;
-}
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using a format
- * string for its message.
- */
-bool
-pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) {
- va_list arguments;
- va_start(arguments, diag_id);
-
- const char *format = pm_diagnostic_message(diag_id);
- int result = vsnprintf(NULL, 0, format, arguments);
- va_end(arguments);
-
- if (result < 0) {
- return false;
- }
-
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
- if (diagnostic == NULL) {
- return false;
- }
-
- size_t length = (size_t) (result + 1);
- char *message = (char *) malloc(length);
- if (message == NULL) {
- free(diagnostic);
- return false;
- }
-
- va_start(arguments, diag_id);
- vsnprintf(message, length, format, arguments);
- va_end(arguments);
-
- *diagnostic = (pm_diagnostic_t) {
- .location = { start, end },
- .message = message,
- .owned = true
- };
-
- pm_list_append(list, (pm_list_node_t *) diagnostic);
- return true;
-}
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- */
-void
-pm_diagnostic_list_free(pm_list_t *list) {
- pm_list_node_t *node, *next;
-
- for (node = list->head; node != NULL; node = next) {
- next = node->next;
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) node;
-
- if (diagnostic->owned) free((void *) diagnostic->message);
- free(diagnostic);
- }
-}
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
deleted file mode 100644
index da430b5438..0000000000
--- a/prism/diagnostic.h
+++ /dev/null
@@ -1,300 +0,0 @@
-/**
- * @file diagnostic.h
- *
- * A list of diagnostics generated during parsing.
- */
-#ifndef PRISM_DIAGNOSTIC_H
-#define PRISM_DIAGNOSTIC_H
-
-#include "prism/ast.h"
-#include "prism/defines.h"
-#include "prism/util/pm_list.h"
-
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-
-/**
- * This struct represents a diagnostic generated during parsing.
- *
- * @extends pm_list_node_t
- */
-typedef struct {
- /** The embedded base node. */
- pm_list_node_t node;
-
- /** The location of the diagnostic in the source. */
- pm_location_t location;
-
- /** The message associated with the diagnostic. */
- const char *message;
-
- /**
- * Whether or not the memory related to the message of this diagnostic is
- * owned by this diagnostic. If it is, it needs to be freed when the
- * diagnostic is freed.
- */
- bool owned;
-} pm_diagnostic_t;
-
-/**
- * The diagnostic IDs of all of the diagnostics, used to communicate the types
- * of errors between the parser and the user.
- */
-typedef enum {
- PM_ERR_ALIAS_ARGUMENT,
- PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
- PM_ERR_ARGUMENT_AFTER_BLOCK,
- PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
- PM_ERR_ARGUMENT_BARE_HASH,
- PM_ERR_ARGUMENT_BLOCK_FORWARDING,
- PM_ERR_ARGUMENT_BLOCK_MULTI,
- PM_ERR_ARGUMENT_FORMAL_CLASS,
- PM_ERR_ARGUMENT_FORMAL_CONSTANT,
- PM_ERR_ARGUMENT_FORMAL_GLOBAL,
- PM_ERR_ARGUMENT_FORMAL_IVAR,
- PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
- PM_ERR_ARGUMENT_IN,
- PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
- PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
- PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
- PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
- PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
- PM_ERR_ARGUMENT_TERM_PAREN,
- PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
- PM_ERR_ARRAY_ELEMENT,
- PM_ERR_ARRAY_EXPRESSION,
- PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
- PM_ERR_ARRAY_SEPARATOR,
- PM_ERR_ARRAY_TERM,
- PM_ERR_BEGIN_LONELY_ELSE,
- PM_ERR_BEGIN_TERM,
- PM_ERR_BEGIN_UPCASE_BRACE,
- PM_ERR_BEGIN_UPCASE_TERM,
- PM_ERR_BEGIN_UPCASE_TOPLEVEL,
- PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
- PM_ERR_BLOCK_PARAM_PIPE_TERM,
- PM_ERR_BLOCK_TERM_BRACE,
- PM_ERR_BLOCK_TERM_END,
- PM_ERR_CANNOT_PARSE_EXPRESSION,
- PM_ERR_CANNOT_PARSE_STRING_PART,
- PM_ERR_CASE_EXPRESSION_AFTER_CASE,
- PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
- PM_ERR_CASE_MATCH_MISSING_PREDICATE,
- PM_ERR_CASE_MISSING_CONDITIONS,
- PM_ERR_CASE_TERM,
- PM_ERR_CLASS_IN_METHOD,
- PM_ERR_CLASS_NAME,
- PM_ERR_CLASS_SUPERCLASS,
- PM_ERR_CLASS_TERM,
- PM_ERR_CLASS_UNEXPECTED_END,
- PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
- PM_ERR_CONDITIONAL_IF_PREDICATE,
- PM_ERR_CONDITIONAL_PREDICATE_TERM,
- PM_ERR_CONDITIONAL_TERM,
- PM_ERR_CONDITIONAL_TERM_ELSE,
- PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
- PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
- PM_ERR_CONDITIONAL_WHILE_PREDICATE,
- PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
- PM_ERR_DEF_ENDLESS,
- PM_ERR_DEF_ENDLESS_SETTER,
- PM_ERR_DEF_NAME,
- PM_ERR_DEF_NAME_AFTER_RECEIVER,
- PM_ERR_DEF_PARAMS_TERM,
- PM_ERR_DEF_PARAMS_TERM_PAREN,
- PM_ERR_DEF_RECEIVER,
- PM_ERR_DEF_RECEIVER_TERM,
- PM_ERR_DEF_TERM,
- PM_ERR_DEFINED_EXPRESSION,
- PM_ERR_EMBDOC_TERM,
- PM_ERR_EMBEXPR_END,
- PM_ERR_EMBVAR_INVALID,
- PM_ERR_END_UPCASE_BRACE,
- PM_ERR_END_UPCASE_TERM,
- PM_ERR_ESCAPE_INVALID_CONTROL,
- PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
- PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
- PM_ERR_ESCAPE_INVALID_META,
- PM_ERR_ESCAPE_INVALID_META_REPEAT,
- PM_ERR_ESCAPE_INVALID_UNICODE,
- PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
- PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
- PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
- PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
- PM_ERR_EXPECT_ARGUMENT,
- PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
- PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
- PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
- PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
- PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
- PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
- PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
- PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
- PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
- PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
- PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
- PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
- PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
- PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
- PM_ERR_EXPECT_RBRACKET,
- PM_ERR_EXPECT_RPAREN,
- PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
- PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
- PM_ERR_EXPECT_STRING_CONTENT,
- PM_ERR_EXPECT_WHEN_DELIMITER,
- PM_ERR_EXPRESSION_BARE_HASH,
- PM_ERR_FOR_COLLECTION,
- PM_ERR_FOR_IN,
- PM_ERR_FOR_INDEX,
- PM_ERR_FOR_TERM,
- PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
- PM_ERR_HASH_KEY,
- PM_ERR_HASH_ROCKET,
- PM_ERR_HASH_TERM,
- PM_ERR_HASH_VALUE,
- PM_ERR_HEREDOC_TERM,
- PM_ERR_INCOMPLETE_QUESTION_MARK,
- PM_ERR_INCOMPLETE_VARIABLE_CLASS,
- PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
- PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
- PM_ERR_INVALID_FLOAT_EXPONENT,
- PM_ERR_INVALID_NUMBER_BINARY,
- PM_ERR_INVALID_NUMBER_DECIMAL,
- PM_ERR_INVALID_NUMBER_HEXADECIMAL,
- PM_ERR_INVALID_NUMBER_OCTAL,
- PM_ERR_INVALID_NUMBER_UNDERSCORE,
- PM_ERR_INVALID_PERCENT,
- PM_ERR_INVALID_TOKEN,
- PM_ERR_INVALID_VARIABLE_GLOBAL,
- PM_ERR_LAMBDA_OPEN,
- PM_ERR_LAMBDA_TERM_BRACE,
- PM_ERR_LAMBDA_TERM_END,
- PM_ERR_LIST_I_LOWER_ELEMENT,
- PM_ERR_LIST_I_LOWER_TERM,
- PM_ERR_LIST_I_UPPER_ELEMENT,
- PM_ERR_LIST_I_UPPER_TERM,
- PM_ERR_LIST_W_LOWER_ELEMENT,
- PM_ERR_LIST_W_LOWER_TERM,
- PM_ERR_LIST_W_UPPER_ELEMENT,
- PM_ERR_LIST_W_UPPER_TERM,
- PM_ERR_MALLOC_FAILED,
- PM_ERR_MIXED_ENCODING,
- PM_ERR_MODULE_IN_METHOD,
- PM_ERR_MODULE_NAME,
- PM_ERR_MODULE_TERM,
- PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
- PM_ERR_NOT_EXPRESSION,
- PM_ERR_NO_LOCAL_VARIABLE,
- PM_ERR_NUMBER_LITERAL_UNDERSCORE,
- PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
- PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
- PM_ERR_OPERATOR_MULTI_ASSIGN,
- PM_ERR_OPERATOR_WRITE_ARGUMENTS,
- PM_ERR_OPERATOR_WRITE_BLOCK,
- PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
- PM_ERR_PARAMETER_BLOCK_MULTI,
- PM_ERR_PARAMETER_CIRCULAR,
- PM_ERR_PARAMETER_METHOD_NAME,
- PM_ERR_PARAMETER_NAME_REPEAT,
- PM_ERR_PARAMETER_NO_DEFAULT,
- PM_ERR_PARAMETER_NO_DEFAULT_KW,
- PM_ERR_PARAMETER_NUMBERED_RESERVED,
- PM_ERR_PARAMETER_ORDER,
- PM_ERR_PARAMETER_SPLAT_MULTI,
- PM_ERR_PARAMETER_STAR,
- PM_ERR_PARAMETER_UNEXPECTED_FWD,
- PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
- PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
- PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
- PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
- PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
- PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
- PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
- PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
- PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
- PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
- PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
- PM_ERR_PATTERN_HASH_KEY,
- PM_ERR_PATTERN_HASH_KEY_LABEL,
- PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
- PM_ERR_PATTERN_LABEL_AFTER_COMMA,
- PM_ERR_PATTERN_REST,
- PM_ERR_PATTERN_TERM_BRACE,
- PM_ERR_PATTERN_TERM_BRACKET,
- PM_ERR_PATTERN_TERM_PAREN,
- PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
- PM_ERR_REGEXP_TERM,
- PM_ERR_RESCUE_EXPRESSION,
- PM_ERR_RESCUE_MODIFIER_VALUE,
- PM_ERR_RESCUE_TERM,
- PM_ERR_RESCUE_VARIABLE,
- PM_ERR_RETURN_INVALID,
- PM_ERR_STATEMENT_ALIAS,
- PM_ERR_STATEMENT_POSTEXE_END,
- PM_ERR_STATEMENT_PREEXE_BEGIN,
- PM_ERR_STATEMENT_UNDEF,
- PM_ERR_STRING_CONCATENATION,
- PM_ERR_STRING_INTERPOLATED_TERM,
- PM_ERR_STRING_LITERAL_TERM,
- PM_ERR_SYMBOL_INVALID,
- PM_ERR_SYMBOL_TERM_DYNAMIC,
- PM_ERR_SYMBOL_TERM_INTERPOLATED,
- PM_ERR_TERNARY_COLON,
- PM_ERR_TERNARY_EXPRESSION_FALSE,
- PM_ERR_TERNARY_EXPRESSION_TRUE,
- PM_ERR_UNARY_RECEIVER_BANG,
- PM_ERR_UNARY_RECEIVER_MINUS,
- PM_ERR_UNARY_RECEIVER_PLUS,
- PM_ERR_UNARY_RECEIVER_TILDE,
- PM_ERR_UNDEF_ARGUMENT,
- PM_ERR_UNTIL_TERM,
- PM_ERR_VOID_EXPRESSION,
- PM_ERR_WHILE_TERM,
- PM_ERR_WRITE_TARGET_IN_METHOD,
- PM_ERR_WRITE_TARGET_READONLY,
- PM_ERR_WRITE_TARGET_UNEXPECTED,
- PM_ERR_XSTRING_TERM,
- PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
- PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
- PM_WARN_AMBIGUOUS_PREFIX_STAR,
- PM_WARN_AMBIGUOUS_SLASH,
- PM_WARN_END_IN_METHOD,
-
- /* This must be the last member. */
- PM_DIAGNOSTIC_ID_LEN,
-} pm_diagnostic_id_t;
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using shared
- * memory for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using a format
- * string for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @param ... The arguments to the format string for the message.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- *
- * @param list The list to deallocate.
- */
-void pm_diagnostic_list_free(pm_list_t *list);
-
-#endif
diff --git a/prism/encoding.c b/prism/encoding.c
index 2210d71411..d7e5616840 100644
--- a/prism/encoding.c
+++ b/prism/encoding.c
@@ -2,7 +2,7 @@
typedef uint32_t pm_unicode_codepoint_t;
-#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
+#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508
static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
@@ -10,7 +10,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x2EC, 0x2EC,
0x2EE, 0x2EE,
0x345, 0x345,
- 0x370, 0x374,
+ 0x363, 0x374,
0x376, 0x377,
0x37A, 0x37D,
0x37F, 0x37F,
@@ -50,7 +50,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x840, 0x858,
0x860, 0x86A,
0x870, 0x887,
- 0x889, 0x88E,
+ 0x889, 0x88F,
+ 0x897, 0x897,
0x8A0, 0x8C9,
0x8D4, 0x8DF,
0x8E3, 0x8E9,
@@ -140,7 +141,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xC4A, 0xC4C,
0xC55, 0xC56,
0xC58, 0xC5A,
- 0xC5D, 0xC5D,
+ 0xC5C, 0xC5D,
0xC60, 0xC63,
0xC80, 0xC83,
0xC85, 0xC8C,
@@ -152,7 +153,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xCC6, 0xCC8,
0xCCA, 0xCCC,
0xCD5, 0xCD6,
- 0xCDD, 0xCDE,
+ 0xCDC, 0xCDE,
0xCE0, 0xCE3,
0xCF1, 0xCF3,
0xD00, 0xD0C,
@@ -264,7 +265,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1C00, 0x1C36,
0x1C4D, 0x1C4F,
0x1C5A, 0x1C7D,
- 0x1C80, 0x1C88,
+ 0x1C80, 0x1C8A,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1CE9, 0x1CEC,
@@ -272,7 +273,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1CF5, 0x1CF6,
0x1CFA, 0x1CFA,
0x1D00, 0x1DBF,
- 0x1DE7, 0x1DF4,
+ 0x1DD3, 0x1DF4,
0x1E00, 0x1F15,
0x1F18, 0x1F1D,
0x1F20, 0x1F45,
@@ -352,11 +353,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xA67F, 0xA6EF,
0xA717, 0xA71F,
0xA722, 0xA788,
- 0xA78B, 0xA7CA,
- 0xA7D0, 0xA7D1,
- 0xA7D3, 0xA7D3,
- 0xA7D5, 0xA7D9,
- 0xA7F2, 0xA805,
+ 0xA78B, 0xA7DC,
+ 0xA7F1, 0xA805,
0xA807, 0xA827,
0xA840, 0xA873,
0xA880, 0xA8C3,
@@ -446,6 +444,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x105A3, 0x105B1,
0x105B3, 0x105B9,
0x105BB, 0x105BC,
+ 0x105C0, 0x105F3,
0x10600, 0x10736,
0x10740, 0x10755,
0x10760, 0x10767,
@@ -464,6 +463,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x108F4, 0x108F5,
0x10900, 0x10915,
0x10920, 0x10939,
+ 0x10940, 0x10959,
0x10980, 0x109B7,
0x109BE, 0x109BF,
0x10A00, 0x10A03,
@@ -483,9 +483,14 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x10C80, 0x10CB2,
0x10CC0, 0x10CF2,
0x10D00, 0x10D27,
+ 0x10D4A, 0x10D65,
+ 0x10D69, 0x10D69,
+ 0x10D6F, 0x10D85,
0x10E80, 0x10EA9,
0x10EAB, 0x10EAC,
0x10EB0, 0x10EB1,
+ 0x10EC2, 0x10EC7,
+ 0x10EFA, 0x10EFC,
0x10F00, 0x10F1C,
0x10F27, 0x10F27,
0x10F30, 0x10F45,
@@ -529,6 +534,17 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11350, 0x11350,
0x11357, 0x11357,
0x1135D, 0x11363,
+ 0x11380, 0x11389,
+ 0x1138B, 0x1138B,
+ 0x1138E, 0x1138E,
+ 0x11390, 0x113B5,
+ 0x113B7, 0x113C0,
+ 0x113C2, 0x113C2,
+ 0x113C5, 0x113C5,
+ 0x113C7, 0x113CA,
+ 0x113CC, 0x113CD,
+ 0x113D1, 0x113D1,
+ 0x113D3, 0x113D3,
0x11400, 0x11441,
0x11443, 0x11445,
0x11447, 0x1144A,
@@ -567,6 +583,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11A50, 0x11A97,
0x11A9D, 0x11A9D,
0x11AB0, 0x11AF8,
+ 0x11B60, 0x11B67,
+ 0x11BC0, 0x11BE0,
0x11C00, 0x11C08,
0x11C0A, 0x11C36,
0x11C38, 0x11C3E,
@@ -588,6 +606,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11D90, 0x11D91,
0x11D93, 0x11D96,
0x11D98, 0x11D98,
+ 0x11DB0, 0x11DDB,
0x11EE0, 0x11EF6,
0x11F00, 0x11F10,
0x11F12, 0x11F3A,
@@ -599,7 +618,9 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x12F90, 0x12FF0,
0x13000, 0x1342F,
0x13441, 0x13446,
+ 0x13460, 0x143FA,
0x14400, 0x14646,
+ 0x16100, 0x1612E,
0x16800, 0x16A38,
0x16A40, 0x16A5E,
0x16A70, 0x16ABE,
@@ -608,16 +629,19 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x16B40, 0x16B43,
0x16B63, 0x16B77,
0x16B7D, 0x16B8F,
+ 0x16D40, 0x16D6C,
0x16E40, 0x16E7F,
+ 0x16EA0, 0x16EB8,
+ 0x16EBB, 0x16ED3,
0x16F00, 0x16F4A,
0x16F4F, 0x16F87,
0x16F8F, 0x16F9F,
0x16FE0, 0x16FE1,
0x16FE3, 0x16FE3,
- 0x16FF0, 0x16FF1,
- 0x17000, 0x187F7,
- 0x18800, 0x18CD5,
- 0x18D00, 0x18D08,
+ 0x16FF0, 0x16FF6,
+ 0x17000, 0x18CD5,
+ 0x18CFF, 0x18D1E,
+ 0x18D80, 0x18DF2,
0x1AFF0, 0x1AFF3,
0x1AFF5, 0x1AFFB,
0x1AFFD, 0x1AFFE,
@@ -677,6 +701,11 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1E290, 0x1E2AD,
0x1E2C0, 0x1E2EB,
0x1E4D0, 0x1E4EB,
+ 0x1E5D0, 0x1E5ED,
+ 0x1E5F0, 0x1E5F0,
+ 0x1E6C0, 0x1E6DE,
+ 0x1E6E0, 0x1E6F5,
+ 0x1E6FE, 0x1E6FF,
0x1E7E0, 0x1E7E6,
0x1E7E8, 0x1E7EB,
0x1E7ED, 0x1E7EE,
@@ -722,16 +751,16 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1F150, 0x1F169,
0x1F170, 0x1F189,
0x20000, 0x2A6DF,
- 0x2A700, 0x2B739,
- 0x2B740, 0x2B81D,
- 0x2B820, 0x2CEA1,
+ 0x2A700, 0x2B81D,
+ 0x2B820, 0x2CEAD,
0x2CEB0, 0x2EBE0,
+ 0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
- 0x31350, 0x323AF,
+ 0x31350, 0x33479,
};
-#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
+#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598
static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
@@ -739,7 +768,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x2EC, 0x2EC,
0x2EE, 0x2EE,
0x345, 0x345,
- 0x370, 0x374,
+ 0x363, 0x374,
0x376, 0x377,
0x37A, 0x37D,
0x37F, 0x37F,
@@ -778,7 +807,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x840, 0x858,
0x860, 0x86A,
0x870, 0x887,
- 0x889, 0x88E,
+ 0x889, 0x88F,
+ 0x897, 0x897,
0x8A0, 0x8C9,
0x8D4, 0x8DF,
0x8E3, 0x8E9,
@@ -872,7 +902,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xC4A, 0xC4C,
0xC55, 0xC56,
0xC58, 0xC5A,
- 0xC5D, 0xC5D,
+ 0xC5C, 0xC5D,
0xC60, 0xC63,
0xC66, 0xC6F,
0xC80, 0xC83,
@@ -885,7 +915,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xCC6, 0xCC8,
0xCCA, 0xCCC,
0xCD5, 0xCD6,
- 0xCDD, 0xCDE,
+ 0xCDC, 0xCDE,
0xCE0, 0xCE3,
0xCE6, 0xCEF,
0xCF1, 0xCF3,
@@ -1007,7 +1037,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1C00, 0x1C36,
0x1C40, 0x1C49,
0x1C4D, 0x1C7D,
- 0x1C80, 0x1C88,
+ 0x1C80, 0x1C8A,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1CE9, 0x1CEC,
@@ -1015,7 +1045,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1CF5, 0x1CF6,
0x1CFA, 0x1CFA,
0x1D00, 0x1DBF,
- 0x1DE7, 0x1DF4,
+ 0x1DD3, 0x1DF4,
0x1E00, 0x1F15,
0x1F18, 0x1F1D,
0x1F20, 0x1F45,
@@ -1094,11 +1124,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xA67F, 0xA6EF,
0xA717, 0xA71F,
0xA722, 0xA788,
- 0xA78B, 0xA7CA,
- 0xA7D0, 0xA7D1,
- 0xA7D3, 0xA7D3,
- 0xA7D5, 0xA7D9,
- 0xA7F2, 0xA805,
+ 0xA78B, 0xA7DC,
+ 0xA7F1, 0xA805,
0xA807, 0xA827,
0xA840, 0xA873,
0xA880, 0xA8C3,
@@ -1191,6 +1218,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x105A3, 0x105B1,
0x105B3, 0x105B9,
0x105BB, 0x105BC,
+ 0x105C0, 0x105F3,
0x10600, 0x10736,
0x10740, 0x10755,
0x10760, 0x10767,
@@ -1209,6 +1237,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x108F4, 0x108F5,
0x10900, 0x10915,
0x10920, 0x10939,
+ 0x10940, 0x10959,
0x10980, 0x109B7,
0x109BE, 0x109BF,
0x10A00, 0x10A03,
@@ -1229,9 +1258,14 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x10CC0, 0x10CF2,
0x10D00, 0x10D27,
0x10D30, 0x10D39,
+ 0x10D40, 0x10D65,
+ 0x10D69, 0x10D69,
+ 0x10D6F, 0x10D85,
0x10E80, 0x10EA9,
0x10EAB, 0x10EAC,
0x10EB0, 0x10EB1,
+ 0x10EC2, 0x10EC7,
+ 0x10EFA, 0x10EFC,
0x10F00, 0x10F1C,
0x10F27, 0x10F27,
0x10F30, 0x10F45,
@@ -1278,6 +1312,17 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11350, 0x11350,
0x11357, 0x11357,
0x1135D, 0x11363,
+ 0x11380, 0x11389,
+ 0x1138B, 0x1138B,
+ 0x1138E, 0x1138E,
+ 0x11390, 0x113B5,
+ 0x113B7, 0x113C0,
+ 0x113C2, 0x113C2,
+ 0x113C5, 0x113C5,
+ 0x113C7, 0x113CA,
+ 0x113CC, 0x113CD,
+ 0x113D1, 0x113D1,
+ 0x113D3, 0x113D3,
0x11400, 0x11441,
0x11443, 0x11445,
0x11447, 0x1144A,
@@ -1297,6 +1342,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11680, 0x116B5,
0x116B8, 0x116B8,
0x116C0, 0x116C9,
+ 0x116D0, 0x116E3,
0x11700, 0x1171A,
0x1171D, 0x1172A,
0x11730, 0x11739,
@@ -1322,6 +1368,9 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11A50, 0x11A97,
0x11A9D, 0x11A9D,
0x11AB0, 0x11AF8,
+ 0x11B60, 0x11B67,
+ 0x11BC0, 0x11BE0,
+ 0x11BF0, 0x11BF9,
0x11C00, 0x11C08,
0x11C0A, 0x11C36,
0x11C38, 0x11C3E,
@@ -1346,6 +1395,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11D93, 0x11D96,
0x11D98, 0x11D98,
0x11DA0, 0x11DA9,
+ 0x11DB0, 0x11DDB,
+ 0x11DE0, 0x11DE9,
0x11EE0, 0x11EF6,
0x11F00, 0x11F10,
0x11F12, 0x11F3A,
@@ -1358,7 +1409,10 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x12F90, 0x12FF0,
0x13000, 0x1342F,
0x13441, 0x13446,
+ 0x13460, 0x143FA,
0x14400, 0x14646,
+ 0x16100, 0x1612E,
+ 0x16130, 0x16139,
0x16800, 0x16A38,
0x16A40, 0x16A5E,
0x16A60, 0x16A69,
@@ -1370,16 +1424,20 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x16B50, 0x16B59,
0x16B63, 0x16B77,
0x16B7D, 0x16B8F,
+ 0x16D40, 0x16D6C,
+ 0x16D70, 0x16D79,
0x16E40, 0x16E7F,
+ 0x16EA0, 0x16EB8,
+ 0x16EBB, 0x16ED3,
0x16F00, 0x16F4A,
0x16F4F, 0x16F87,
0x16F8F, 0x16F9F,
0x16FE0, 0x16FE1,
0x16FE3, 0x16FE3,
- 0x16FF0, 0x16FF1,
- 0x17000, 0x187F7,
- 0x18800, 0x18CD5,
- 0x18D00, 0x18D08,
+ 0x16FF0, 0x16FF6,
+ 0x17000, 0x18CD5,
+ 0x18CFF, 0x18D1E,
+ 0x18D80, 0x18DF2,
0x1AFF0, 0x1AFF3,
0x1AFF5, 0x1AFFB,
0x1AFFD, 0x1AFFE,
@@ -1394,6 +1452,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1BC80, 0x1BC88,
0x1BC90, 0x1BC99,
0x1BC9E, 0x1BC9E,
+ 0x1CCF0, 0x1CCF9,
0x1D400, 0x1D454,
0x1D456, 0x1D49C,
0x1D49E, 0x1D49F,
@@ -1443,6 +1502,11 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1E2F0, 0x1E2F9,
0x1E4D0, 0x1E4EB,
0x1E4F0, 0x1E4F9,
+ 0x1E5D0, 0x1E5ED,
+ 0x1E5F0, 0x1E5FA,
+ 0x1E6C0, 0x1E6DE,
+ 0x1E6E0, 0x1E6F5,
+ 0x1E6FE, 0x1E6FF,
0x1E7E0, 0x1E7E6,
0x1E7E8, 0x1E7EB,
0x1E7ED, 0x1E7EE,
@@ -1490,16 +1554,16 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1F170, 0x1F189,
0x1FBF0, 0x1FBF9,
0x20000, 0x2A6DF,
- 0x2A700, 0x2B739,
- 0x2B740, 0x2B81D,
- 0x2B820, 0x2CEA1,
+ 0x2A700, 0x2B81D,
+ 0x2B820, 0x2CEAD,
0x2CEB0, 0x2EBE0,
+ 0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
- 0x31350, 0x323AF,
+ 0x31350, 0x33479,
};
-#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1296
+#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320
static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
0x100, 0x100,
0x102, 0x102,
@@ -1582,9 +1646,9 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x1B5, 0x1B5,
0x1B7, 0x1B8,
0x1BC, 0x1BC,
- 0x1C4, 0x1C4,
- 0x1C7, 0x1C7,
- 0x1CA, 0x1CA,
+ 0x1C4, 0x1C5,
+ 0x1C7, 0x1C8,
+ 0x1CA, 0x1CB,
0x1CD, 0x1CD,
0x1CF, 0x1CF,
0x1D1, 0x1D1,
@@ -1602,7 +1666,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x1EA, 0x1EA,
0x1EC, 0x1EC,
0x1EE, 0x1EE,
- 0x1F1, 0x1F1,
+ 0x1F1, 0x1F2,
0x1F4, 0x1F4,
0x1F6, 0x1F8,
0x1FA, 0x1FA,
@@ -1774,6 +1838,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x10C7, 0x10C7,
0x10CD, 0x10CD,
0x13A0, 0x13F5,
+ 0x1C89, 0x1C89,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1E00, 0x1E00,
@@ -1910,11 +1975,14 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x1F5D, 0x1F5D,
0x1F5F, 0x1F5F,
0x1F68, 0x1F6F,
- 0x1FB8, 0x1FBB,
- 0x1FC8, 0x1FCB,
+ 0x1F88, 0x1F8F,
+ 0x1F98, 0x1F9F,
+ 0x1FA8, 0x1FAF,
+ 0x1FB8, 0x1FBC,
+ 0x1FC8, 0x1FCC,
0x1FD8, 0x1FDB,
0x1FE8, 0x1FEC,
- 0x1FF8, 0x1FFB,
+ 0x1FF8, 0x1FFC,
0x2102, 0x2102,
0x2107, 0x2107,
0x210B, 0x210D,
@@ -2100,9 +2168,15 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0xA7C2, 0xA7C2,
0xA7C4, 0xA7C7,
0xA7C9, 0xA7C9,
+ 0xA7CB, 0xA7CC,
+ 0xA7CE, 0xA7CE,
0xA7D0, 0xA7D0,
+ 0xA7D2, 0xA7D2,
+ 0xA7D4, 0xA7D4,
0xA7D6, 0xA7D6,
0xA7D8, 0xA7D8,
+ 0xA7DA, 0xA7DA,
+ 0xA7DC, 0xA7DC,
0xA7F5, 0xA7F5,
0xFF21, 0xFF3A,
0x10400, 0x10427,
@@ -2112,8 +2186,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x1058C, 0x10592,
0x10594, 0x10595,
0x10C80, 0x10CB2,
+ 0x10D50, 0x10D65,
0x118A0, 0x118BF,
0x16E40, 0x16E5F,
+ 0x16EA0, 0x16EB8,
0x1D400, 0x1D419,
0x1D434, 0x1D44D,
0x1D468, 0x1D481,
@@ -2252,13 +2328,13 @@ static const uint8_t pm_utf_8_dfa[] = {
*/
static pm_unicode_codepoint_t
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
- assert(n >= 1);
- size_t maximum = (size_t) n;
+ assert(n >= 0);
+ size_t maximum = (n > 4) ? 4 : ((size_t) n);
uint32_t codepoint;
uint32_t state = 0;
- for (size_t index = 0; index < 4 && index < maximum; index++) {
+ for (size_t index = 0; index < maximum; index++) {
uint32_t byte = b[index];
uint32_t type = pm_utf_8_dfa[byte];
@@ -2267,7 +2343,7 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
(0xffu >> type) & (byte);
state = pm_utf_8_dfa[256 + (state * 16) + type];
- if (!state) {
+ if (state == 0) {
*width = index + 1;
return (pm_unicode_codepoint_t) codepoint;
}
@@ -2277,11 +2353,22 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
return 0;
}
-static size_t
+/**
+ * Return the size of the next character in the UTF-8 encoding.
+ */
+size_t
pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
- size_t width;
- pm_utf_8_codepoint(b, n, &width);
- return width;
+ assert(n >= 0);
+
+ size_t maximum = (n > 4) ? 4 : ((size_t) n);
+ uint32_t state = 0;
+
+ for (size_t index = 0; index < maximum; index++) {
+ state = pm_utf_8_dfa[256 + (state * 16) + pm_utf_8_dfa[b[index]]];
+ if (state == 0) return index + 1;
+ }
+
+ return 0;
}
/**
@@ -2290,6 +2377,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
@@ -2310,6 +2401,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
@@ -2330,6 +2425,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
*/
bool
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
@@ -2344,9 +2443,12 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
}
}
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
static pm_unicode_codepoint_t
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
- if (b[0] < 0x80) {
+
+ if ((n > 0) && (b[0] < 0x80)) {
*width = 1;
return (pm_unicode_codepoint_t) b[0];
}
@@ -2385,6 +2487,10 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
static size_t
pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
size_t width;
pm_cesu_8_codepoint(b, n, &width);
return width;
@@ -2392,6 +2498,10 @@ pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
@@ -2408,6 +2518,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
@@ -2424,6 +2538,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
static bool
pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
@@ -2438,13 +2556,15 @@ pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
}
}
+#endif
+
#undef UNICODE_ALPHA_CODEPOINTS_LENGTH
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
/**
* Each element of the following table contains a bitfield that indicates a
- * piece of information about the corresponding ASCII character.
+ * piece of information about the corresponding US-ASCII character.
*/
static const uint8_t pm_encoding_ascii_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
@@ -2466,6 +2586,8 @@ static const uint8_t pm_encoding_ascii_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
};
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
/**
* Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding CP850 character.
@@ -3613,7 +3735,7 @@ static const uint8_t pm_encoding_windows_1250_table[256] = {
0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7, // Ax
0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 3, 0, 7, 0, 3, 3, // Bx
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
- 7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
};
@@ -3661,7 +3783,7 @@ static const uint8_t pm_encoding_windows_1252_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
- 7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
};
@@ -3835,14 +3957,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
};
#define PRISM_ENCODING_TABLE(name) \
- static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
+ static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT)); \
} \
- static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
+ static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
} \
- static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
+ static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT)); \
}
PRISM_ENCODING_TABLE(cp850)
@@ -3904,14 +4026,15 @@ PRISM_ENCODING_TABLE(windows_1258)
PRISM_ENCODING_TABLE(windows_874)
#undef PRISM_ENCODING_TABLE
+#endif
/**
* Returns the size of the next character in the ASCII encoding. This basically
* means that if the top bit is not set, the character is 1 byte long.
*/
static size_t
-pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return *b < 0x80 ? 1 : 0;
+pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
+ return ((n > 0) && (*b < 0x80)) ? 1 : 0;
}
/**
@@ -3919,8 +4042,8 @@ pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
* alphabetical character.
*/
static size_t
-pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
+pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
}
/**
@@ -3930,7 +4053,7 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
}
/**
@@ -3938,8 +4061,8 @@ pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
* alphanumeric character.
*/
static size_t
-pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
+pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
/**
@@ -3949,7 +4072,7 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
}
/**
@@ -3957,27 +4080,137 @@ pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
* character.
*/
static bool
-pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
+pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
}
/**
- * Certain encodings are equivalent to ASCII below 0x80, so it works for our
- * purposes to have a function here that first checks the bounds and then falls
- * back to checking the ASCII lookup table.
+ * For a lot of encodings the default is that they are a single byte long no
+ * matter what the codepoint, so this function is shared between them.
+ */
+static size_t
+pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
+ return 1;
+}
+
+/**
+ * Returns the size of the next character in the EUC-JP encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
+ // These are the single byte characters.
+ if ((n > 0) && (*b < 0x80)) {
+ return 1;
+ }
+
+ // These are the double byte characters.
+ if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
+ return 2;
+ }
+
+ // These are the triple byte characters.
+ if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
+ return 3;
+ }
+
+ return 0;
+}
+
+/**
+ * Returns the size of the next character in the EUC-JP encoding if it is an
+ * uppercase character.
*/
static bool
-pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
+pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ size_t width = pm_encoding_euc_jp_char_width(b, n);
+
+ if (width == 1) {
+ return pm_encoding_ascii_isupper_char(b, n);
+ } else if (width == 2) {
+ return (
+ (b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) ||
+ (b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) ||
+ (b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1)
+ );
+ } else {
+ return false;
+ }
}
/**
- * For a lot of encodings the default is that they are a single byte long no
- * matter what the codepoint, so this function is shared between them.
+ * Returns the size of the next character in the Shift_JIS encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
*/
static size_t
-pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return 1;
+pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+ // These are the single byte characters.
+ if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
+ return 1;
+ }
+
+ // These are the double byte characters.
+ if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) {
+ return 2;
+ }
+
+ return 0;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * alphanumeric character.
+ */
+static size_t
+pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ size_t width = pm_encoding_shift_jis_char_width(b, n);
+ return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alnum_char(b, n)) : width;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * alphabetical character.
+ */
+static size_t
+pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ size_t width = pm_encoding_shift_jis_char_width(b, n);
+ return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alpha_char(b, n)) : width;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * uppercase character.
+ */
+static bool
+pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ size_t width = pm_encoding_shift_jis_char_width(b, n);
+
+ if (width == 1) {
+ return pm_encoding_ascii_isupper_char(b, n);
+ } else if (width == 2) {
+ return (
+ ((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) ||
+ ((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) ||
+ ((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60))
+ );
+ } else {
+ return width;
+ }
+}
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
+/**
+ * Certain encodings are equivalent to ASCII below 0x80, so it works for our
+ * purposes to have a function here that first checks the bounds and then falls
+ * back to checking the ASCII lookup table.
+ */
+static bool
+pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
+ return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
}
/**
@@ -3987,7 +4220,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
static size_t
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4006,12 +4239,12 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters
- if (*b <= 0x80) {
+ if ((n > 0) && (*b <= 0x80)) {
return 1;
}
// These are the double byte characters
- if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xfe) && (b[1] >= 0x41 && b[1] <= 0xfe)) {
+ if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && ((b[1] >= 0x41 && b[1] <= 0x5A) || (b[1] >= 0x61 && b[1] <= 0x7A) || (b[1] >= 0x81 && b[1] <= 0xFE))) {
return 2;
}
@@ -4025,7 +4258,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4062,37 +4295,13 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
}
/**
- * Returns the size of the next character in the EUC-JP encoding, or 0 if a
- * character cannot be decoded from the given bytes.
- */
-static size_t
-pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80) {
- return 1;
- }
-
- // These are the double byte characters.
- if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
- return 2;
- }
-
- // These are the triple byte characters.
- if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
- return 3;
- }
-
- return 0;
-}
-
-/**
* Returns the size of the next character in the EUC-KR encoding, or 0 if a
* character cannot be decoded from the given bytes.
*/
static size_t
pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4111,7 +4320,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4135,7 +4344,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4159,7 +4368,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b <= 0x80) {
+ if ((n > 0) && (*b <= 0x80)) {
return 1;
}
@@ -4183,33 +4392,7 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
return 0;
}
-/**
- * Returns the size of the next character in the KOI-8 encoding. This means
- * checking if it's a valid codepoint in KOI-8 and if it is returning 1.
- */
-static size_t
-pm_encoding_koi8_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
-}
-
-/**
- * Returns the size of the next character in the Shift_JIS encoding, or 0 if a
- * character cannot be decoded from the given bytes.
- */
-static size_t
-pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
- return 1;
- }
-
- // These are the double byte characters.
- if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC)) {
- return 2;
- }
-
- return 0;
-}
+#endif
/**
* This is the table of all of the encodings that prism supports.
@@ -4223,6 +4406,14 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
},
+ [PM_ENCODING_US_ASCII] = {
+ .name = "US-ASCII",
+ .char_width = pm_encoding_ascii_char_width,
+ .alnum_char = pm_encoding_ascii_alnum_char,
+ .alpha_char = pm_encoding_ascii_alpha_char,
+ .isupper_char = pm_encoding_ascii_isupper_char,
+ .multibyte = false
+ },
[PM_ENCODING_ASCII_8BIT] = {
.name = "ASCII-8BIT",
.char_width = pm_encoding_single_char_width,
@@ -4231,6 +4422,24 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_ascii_isupper_char,
.multibyte = false
},
+ [PM_ENCODING_EUC_JP] = {
+ .name = "EUC-JP",
+ .char_width = pm_encoding_euc_jp_char_width,
+ .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+ .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
+ .multibyte = true
+ },
+ [PM_ENCODING_WINDOWS_31J] = {
+ .name = "Windows-31J",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+ },
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
[PM_ENCODING_BIG5] = {
.name = "Big5",
.char_width = pm_encoding_big5_char_width,
@@ -4268,7 +4477,7 @@ const pm_encoding_t pm_encodings[] = {
.char_width = pm_encoding_euc_jp_char_width,
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
},
[PM_ENCODING_CP850] = {
@@ -4327,20 +4536,12 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_ascii_isupper_char_7bit,
.multibyte = true
},
- [PM_ENCODING_EUC_JP] = {
- .name = "EUC-JP",
- .char_width = pm_encoding_euc_jp_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
- .multibyte = true
- },
[PM_ENCODING_EUC_JP_MS] = {
.name = "eucJP-ms",
.char_width = pm_encoding_euc_jp_char_width,
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
},
[PM_ENCODING_EUC_JIS_2004] = {
@@ -4348,7 +4549,7 @@ const pm_encoding_t pm_encodings[] = {
.char_width = pm_encoding_euc_jp_char_width,
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
},
[PM_ENCODING_EUC_KR] = {
@@ -4649,7 +4850,7 @@ const pm_encoding_t pm_encodings[] = {
},
[PM_ENCODING_KOI8_R] = {
.name = "KOI8-R",
- .char_width = pm_encoding_koi8_char_width,
+ .char_width = pm_encoding_single_char_width,
.alnum_char = pm_encoding_koi8_r_alnum_char,
.alpha_char = pm_encoding_koi8_r_alpha_char,
.isupper_char = pm_encoding_koi8_r_isupper_char,
@@ -4657,7 +4858,7 @@ const pm_encoding_t pm_encodings[] = {
},
[PM_ENCODING_KOI8_U] = {
.name = "KOI8-U",
- .char_width = pm_encoding_koi8_char_width,
+ .char_width = pm_encoding_single_char_width,
.alnum_char = pm_encoding_koi8_u_alnum_char,
.alpha_char = pm_encoding_koi8_u_alpha_char,
.isupper_char = pm_encoding_koi8_u_isupper_char,
@@ -4706,9 +4907,9 @@ const pm_encoding_t pm_encodings[] = {
[PM_ENCODING_MAC_JAPANESE] = {
.name = "MacJapanese",
.char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
},
[PM_ENCODING_MAC_ROMAN] = {
@@ -4754,33 +4955,33 @@ const pm_encoding_t pm_encodings[] = {
[PM_ENCODING_SHIFT_JIS] = {
.name = "Shift_JIS",
.char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
},
[PM_ENCODING_SJIS_DOCOMO] = {
.name = "SJIS-DoCoMo",
.char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
},
[PM_ENCODING_SJIS_KDDI] = {
.name = "SJIS-KDDI",
.char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
},
[PM_ENCODING_SJIS_SOFTBANK] = {
.name = "SJIS-SoftBank",
.char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
},
[PM_ENCODING_STATELESS_ISO_2022_JP] = {
@@ -4807,14 +5008,6 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_tis_620_isupper_char,
.multibyte = false
},
- [PM_ENCODING_US_ASCII] = {
- .name = "US-ASCII",
- .char_width = pm_encoding_ascii_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char,
- .alpha_char = pm_encoding_ascii_alpha_char,
- .isupper_char = pm_encoding_ascii_isupper_char,
- .multibyte = false
- },
[PM_ENCODING_UTF8_MAC] = {
.name = "UTF8-MAC",
.char_width = pm_encoding_utf_8_char_width,
@@ -4919,14 +5112,6 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_windows_1258_isupper_char,
.multibyte = false
},
- [PM_ENCODING_WINDOWS_31J] = {
- .name = "Windows-31J",
- .char_width = pm_encoding_shift_jis_char_width,
- .alnum_char = pm_encoding_ascii_alnum_char_7bit,
- .alpha_char = pm_encoding_ascii_alpha_char_7bit,
- .isupper_char = pm_encoding_ascii_isupper_char_7bit,
- .multibyte = true
- },
[PM_ENCODING_WINDOWS_874] = {
.name = "Windows-874",
.char_width = pm_encoding_single_char_width,
@@ -4935,6 +5120,7 @@ const pm_encoding_t pm_encodings[] = {
.isupper_char = pm_encoding_windows_874_isupper_char,
.multibyte = false
}
+#endif
};
/**
@@ -4949,11 +5135,13 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
// UTF-8 can contain extra information at the end about the platform it is
// encoded on, such as UTF-8-MAC or UTF-8-UNIX. We'll ignore those suffixes.
if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "UTF-8", 5) == 0)) {
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
// We need to explicitly handle UTF-8-HFS, as that one needs to switch
// over to being UTF8-MAC.
if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-HFS", 4) == 0)) {
return &pm_encodings[PM_ENCODING_UTF8_MAC];
}
+#endif
// Otherwise we'll return the default UTF-8 encoding.
return PM_ENCODING_UTF_8_ENTRY;
@@ -4973,11 +5161,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
break;
case 'B': case 'b':
ENCODING1("BINARY", PM_ENCODING_ASCII_8BIT);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("Big5", PM_ENCODING_BIG5);
ENCODING2("Big5-HKSCS", "Big5-HKSCS:2008", PM_ENCODING_BIG5_HKSCS);
ENCODING1("Big5-UAO", PM_ENCODING_BIG5_UAO);
+#endif
break;
case 'C': case 'c':
+ ENCODING1("CP65001", PM_ENCODING_UTF_8);
+ ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("CESU-8", PM_ENCODING_CESU_8);
ENCODING1("CP437", PM_ENCODING_IBM437);
ENCODING1("CP720", PM_ENCODING_IBM720);
@@ -4997,7 +5190,6 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
ENCODING1("CP874", PM_ENCODING_WINDOWS_874);
ENCODING1("CP878", PM_ENCODING_KOI8_R);
ENCODING1("CP863", PM_ENCODING_IBM863);
- ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
ENCODING1("CP936", PM_ENCODING_GBK);
ENCODING1("CP949", PM_ENCODING_CP949);
ENCODING1("CP950", PM_ENCODING_CP950);
@@ -5012,25 +5204,30 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
ENCODING1("CP1257", PM_ENCODING_WINDOWS_1257);
ENCODING1("CP1258", PM_ENCODING_WINDOWS_1258);
ENCODING1("CP51932", PM_ENCODING_CP51932);
- ENCODING1("CP65001", PM_ENCODING_UTF_8);
+#endif
break;
case 'E': case 'e':
ENCODING2("EUC-JP", "eucJP", PM_ENCODING_EUC_JP);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING2("eucJP-ms", "euc-jp-ms", PM_ENCODING_EUC_JP_MS);
ENCODING2("EUC-JIS-2004", "EUC-JISX0213", PM_ENCODING_EUC_JIS_2004);
ENCODING2("EUC-KR", "eucKR", PM_ENCODING_EUC_KR);
ENCODING2("EUC-CN", "eucCN", PM_ENCODING_GB2312);
ENCODING2("EUC-TW", "eucTW", PM_ENCODING_EUC_TW);
ENCODING1("Emacs-Mule", PM_ENCODING_EMACS_MULE);
+#endif
break;
case 'G': case 'g':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("GBK", PM_ENCODING_GBK);
ENCODING1("GB12345", PM_ENCODING_GB12345);
ENCODING1("GB18030", PM_ENCODING_GB18030);
ENCODING1("GB1988", PM_ENCODING_GB1988);
ENCODING1("GB2312", PM_ENCODING_GB2312);
+#endif
break;
case 'I': case 'i':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("IBM437", PM_ENCODING_IBM437);
ENCODING1("IBM720", PM_ENCODING_IBM720);
ENCODING1("IBM737", PM_ENCODING_IBM737);
@@ -5062,12 +5259,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
ENCODING2("ISO-8859-14", "ISO8859-14", PM_ENCODING_ISO_8859_14);
ENCODING2("ISO-8859-15", "ISO8859-15", PM_ENCODING_ISO_8859_15);
ENCODING2("ISO-8859-16", "ISO8859-16", PM_ENCODING_ISO_8859_16);
+#endif
break;
case 'K': case 'k':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("KOI8-R", PM_ENCODING_KOI8_R);
ENCODING1("KOI8-U", PM_ENCODING_KOI8_U);
+#endif
break;
case 'M': case 'm':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("macCentEuro", PM_ENCODING_MAC_CENT_EURO);
ENCODING1("macCroatian", PM_ENCODING_MAC_CROATIAN);
ENCODING1("macCyrillic", PM_ENCODING_MAC_CYRILLIC);
@@ -5080,31 +5281,39 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
ENCODING1("macThai", PM_ENCODING_MAC_THAI);
ENCODING1("macTurkish", PM_ENCODING_MAC_TURKISH);
ENCODING1("macUkraine", PM_ENCODING_MAC_UKRAINE);
+#endif
break;
case 'P': case 'p':
ENCODING1("PCK", PM_ENCODING_WINDOWS_31J);
break;
case 'S': case 's':
- ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
ENCODING1("SJIS", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+ ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
ENCODING1("SJIS-DoCoMo", PM_ENCODING_SJIS_DOCOMO);
ENCODING1("SJIS-KDDI", PM_ENCODING_SJIS_KDDI);
ENCODING1("SJIS-SoftBank", PM_ENCODING_SJIS_SOFTBANK);
ENCODING1("stateless-ISO-2022-JP", PM_ENCODING_STATELESS_ISO_2022_JP);
ENCODING1("stateless-ISO-2022-JP-KDDI", PM_ENCODING_STATELESS_ISO_2022_JP_KDDI);
+#endif
break;
case 'T': case 't':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("TIS-620", PM_ENCODING_TIS_620);
+#endif
break;
case 'U': case 'u':
ENCODING1("US-ASCII", PM_ENCODING_US_ASCII);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING2("UTF8-MAC", "UTF-8-HFS", PM_ENCODING_UTF8_MAC);
ENCODING1("UTF8-DoCoMo", PM_ENCODING_UTF8_DOCOMO);
ENCODING1("UTF8-KDDI", PM_ENCODING_UTF8_KDDI);
ENCODING1("UTF8-SoftBank", PM_ENCODING_UTF8_SOFTBANK);
+#endif
break;
case 'W': case 'w':
ENCODING1("Windows-31J", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
ENCODING1("Windows-874", PM_ENCODING_WINDOWS_874);
ENCODING1("Windows-1250", PM_ENCODING_WINDOWS_1250);
ENCODING1("Windows-1251", PM_ENCODING_WINDOWS_1251);
@@ -5115,6 +5324,7 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
ENCODING1("Windows-1256", PM_ENCODING_WINDOWS_1256);
ENCODING1("Windows-1257", PM_ENCODING_WINDOWS_1257);
ENCODING1("Windows-1258", PM_ENCODING_WINDOWS_1258);
+#endif
break;
case '6':
ENCODING1("646", PM_ENCODING_US_ASCII);
diff --git a/prism/encoding.h b/prism/encoding.h
index 8fe01aea69..5f7724821f 100644
--- a/prism/encoding.h
+++ b/prism/encoding.h
@@ -80,6 +80,16 @@ typedef struct {
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
/**
+ * Return the size of the next character in the UTF-8 encoding.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ * the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
+
+/**
* Return the size of the next character in the UTF-8 encoding if it is an
* alphabetical character.
*
@@ -125,7 +135,14 @@ extern const uint8_t pm_encoding_unicode_table[256];
*/
typedef enum {
PM_ENCODING_UTF_8 = 0,
+ PM_ENCODING_US_ASCII,
PM_ENCODING_ASCII_8BIT,
+ PM_ENCODING_EUC_JP,
+ PM_ENCODING_WINDOWS_31J,
+
+// We optionally support excluding the full set of encodings to only support the
+// minimum necessary to process Ruby code without encoding comments.
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
PM_ENCODING_BIG5,
PM_ENCODING_BIG5_HKSCS,
PM_ENCODING_BIG5_UAO,
@@ -138,7 +155,6 @@ typedef enum {
PM_ENCODING_CP950,
PM_ENCODING_CP951,
PM_ENCODING_EMACS_MULE,
- PM_ENCODING_EUC_JP,
PM_ENCODING_EUC_JP_MS,
PM_ENCODING_EUC_JIS_2004,
PM_ENCODING_EUC_KR,
@@ -198,7 +214,6 @@ typedef enum {
PM_ENCODING_STATELESS_ISO_2022_JP,
PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
PM_ENCODING_TIS_620,
- PM_ENCODING_US_ASCII,
PM_ENCODING_UTF8_MAC,
PM_ENCODING_UTF8_DOCOMO,
PM_ENCODING_UTF8_KDDI,
@@ -212,8 +227,9 @@ typedef enum {
PM_ENCODING_WINDOWS_1256,
PM_ENCODING_WINDOWS_1257,
PM_ENCODING_WINDOWS_1258,
- PM_ENCODING_WINDOWS_31J,
PM_ENCODING_WINDOWS_874,
+#endif
+
PM_ENCODING_MAXIMUM
} pm_encoding_type_t;
@@ -236,6 +252,25 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
/**
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
+ * can compare against it because invalid multibyte characters are not a thing
+ * in this encoding. It is also needed for handling Regexp encoding flags.
+ */
+#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
+
+/**
+ * This is the EUC-JP encoding. We need a reference to it to quickly process
+ * regular expression modifiers.
+ */
+#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
+
+/**
+ * This is the Windows-31J encoding. We need a reference to it to quickly
+ * process regular expression modifiers.
+ */
+#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
+
+/**
* Parse the given name of an encoding and return a pointer to the corresponding
* encoding struct if one can be found, otherwise return NULL.
*
diff --git a/prism/extension.c b/prism/extension.c
index 28bea39c7f..71c2d91b98 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -1,5 +1,9 @@
#include "prism/extension.h"
+#ifdef _WIN32
+#include <ruby/win32.h>
+#endif
+
// NOTE: this file should contain only bindings. All non-trivial logic should be
// in libprism so it can be shared its the various callers.
@@ -15,35 +19,45 @@ VALUE rb_cPrismEmbDocComment;
VALUE rb_cPrismMagicComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
+VALUE rb_cPrismResult;
VALUE rb_cPrismParseResult;
-
-ID rb_option_id_filepath;
-ID rb_option_id_encoding;
-ID rb_option_id_line;
-ID rb_option_id_frozen_string_literal;
-ID rb_option_id_verbose;
-ID rb_option_id_version;
-ID rb_option_id_scopes;
+VALUE rb_cPrismLexResult;
+VALUE rb_cPrismParseLexResult;
+VALUE rb_cPrismStringQuery;
+VALUE rb_cPrismScope;
+VALUE rb_cPrismCurrentVersionError;
+
+VALUE rb_cPrismDebugEncoding;
+
+ID rb_id_option_command_line;
+ID rb_id_option_encoding;
+ID rb_id_option_filepath;
+ID rb_id_option_freeze;
+ID rb_id_option_frozen_string_literal;
+ID rb_id_option_line;
+ID rb_id_option_main_script;
+ID rb_id_option_partial_script;
+ID rb_id_option_scopes;
+ID rb_id_option_version;
+ID rb_id_source_for;
+ID rb_id_forwarding_positionals;
+ID rb_id_forwarding_keywords;
+ID rb_id_forwarding_block;
+ID rb_id_forwarding_all;
/******************************************************************************/
/* IO of Ruby code */
/******************************************************************************/
/**
- * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
- * not a string, then raise a type error. Otherwise return the VALUE as a C
- * string.
+ * Check if the given VALUE is a string. If it's not a string, then raise a
+ * TypeError. Otherwise return the VALUE as a C string.
*/
static const char *
check_string(VALUE value) {
- // If the value is nil, then we don't need to do anything.
- if (NIL_P(value)) {
- return NULL;
- }
-
// Check if the value is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(value, T_STRING)) {
- rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
+ rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(value));
}
// Otherwise, return the value as a C string.
@@ -57,7 +71,7 @@ static void
input_load_string(pm_string_t *input, VALUE string) {
// Check if the string is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(string, T_STRING)) {
- rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
+ rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
}
pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
@@ -79,26 +93,69 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
// Initialize the scopes array.
size_t scopes_count = RARRAY_LEN(scopes);
- pm_options_scopes_init(options, scopes_count);
+ if (!pm_options_scopes_init(options, scopes_count)) {
+ rb_raise(rb_eNoMemError, "failed to allocate memory");
+ }
// Iterate over the scopes and add them to the options.
for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
VALUE scope = rb_ary_entry(scopes, scope_index);
- // Check that the scope is an array. If it's not, then raise a type
- // error.
- if (!RB_TYPE_P(scope, T_ARRAY)) {
- rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
+ // The scope can be either an array or it can be a Prism::Scope object.
+ // Parse out the correct values here from either.
+ VALUE locals;
+ uint8_t forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
+
+ if (RB_TYPE_P(scope, T_ARRAY)) {
+ locals = scope;
+ } else if (rb_obj_is_kind_of(scope, rb_cPrismScope)) {
+ locals = rb_ivar_get(scope, rb_intern("@locals"));
+ if (!RB_TYPE_P(locals, T_ARRAY)) {
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(locals));
+ }
+
+ VALUE names = rb_ivar_get(scope, rb_intern("@forwarding"));
+ if (!RB_TYPE_P(names, T_ARRAY)) {
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(names));
+ }
+
+ size_t names_count = RARRAY_LEN(names);
+ for (size_t name_index = 0; name_index < names_count; name_index++) {
+ VALUE name = rb_ary_entry(names, name_index);
+
+ // Check that the name is a symbol. If it's not, then raise
+ // a type error.
+ if (!RB_TYPE_P(name, T_SYMBOL)) {
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(name));
+ }
+
+ ID id = SYM2ID(name);
+ if (id == rb_id_forwarding_positionals) {
+ forwarding |= PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS;
+ } else if (id == rb_id_forwarding_keywords) {
+ forwarding |= PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS;
+ } else if (id == rb_id_forwarding_block) {
+ forwarding |= PM_OPTIONS_SCOPE_FORWARDING_BLOCK;
+ } else if (id == rb_id_forwarding_all) {
+ forwarding |= PM_OPTIONS_SCOPE_FORWARDING_ALL;
+ } else {
+ rb_raise(rb_eArgError, "invalid forwarding value: %" PRIsVALUE, name);
+ }
+ }
+ } else {
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array or Prism::Scope)", rb_obj_class(scope));
}
// Initialize the scope array.
- size_t locals_count = RARRAY_LEN(scope);
+ size_t locals_count = RARRAY_LEN(locals);
pm_options_scope_t *options_scope = &options->scopes[scope_index];
- pm_options_scope_init(options_scope, locals_count);
+ if (!pm_options_scope_init(options_scope, locals_count)) {
+ rb_raise(rb_eNoMemError, "failed to allocate memory");
+ }
// Iterate over the locals and add them to the scope.
for (size_t local_index = 0; local_index < locals_count; local_index++) {
- VALUE local = rb_ary_entry(scope, local_index);
+ VALUE local = rb_ary_entry(locals, local_index);
// Check that the local is a symbol. If it's not, then raise a
// type error.
@@ -111,6 +168,9 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
const char *name = rb_id2name(SYM2ID(local));
pm_string_constant_init(scope_local, name, strlen(name));
}
+
+ // Now set the forwarding options.
+ pm_options_scope_forwarding_set(options_scope, forwarding);
}
}
@@ -122,28 +182,62 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
pm_options_t *options = (pm_options_t *) argument;
ID key_id = SYM2ID(key);
- if (key_id == rb_option_id_filepath) {
+ if (key_id == rb_id_option_filepath) {
if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
- } else if (key_id == rb_option_id_encoding) {
- if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
- } else if (key_id == rb_option_id_line) {
+ } else if (key_id == rb_id_option_encoding) {
+ if (!NIL_P(value)) {
+ if (value == Qfalse) {
+ pm_options_encoding_locked_set(options, true);
+ } else {
+ pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
+ }
+ }
+ } else if (key_id == rb_id_option_line) {
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
- } else if (key_id == rb_option_id_frozen_string_literal) {
- if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
- } else if (key_id == rb_option_id_verbose) {
- pm_options_suppress_warnings_set(options, value != Qtrue);
- } else if (key_id == rb_option_id_version) {
+ } else if (key_id == rb_id_option_frozen_string_literal) {
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
+ } else if (key_id == rb_id_option_version) {
if (!NIL_P(value)) {
const char *version = check_string(value);
- if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
- rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
+ if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) {
+ const char *current_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
+ if (!pm_options_version_set(options, current_version, 3)) {
+ rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, current_version));
+ }
+ } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
+ rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
}
}
- } else if (key_id == rb_option_id_scopes) {
+ } else if (key_id == rb_id_option_scopes) {
if (!NIL_P(value)) build_options_scopes(options, value);
+ } else if (key_id == rb_id_option_command_line) {
+ if (!NIL_P(value)) {
+ const char *string = check_string(value);
+ uint8_t command_line = 0;
+
+ for (size_t index = 0; index < strlen(string); index++) {
+ switch (string[index]) {
+ case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break;
+ case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break;
+ case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break;
+ case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break;
+ case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break;
+ case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break;
+ default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break;
+ }
+ }
+
+ pm_options_command_line_set(options, command_line);
+ }
+ } else if (key_id == rb_id_option_main_script) {
+ if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
+ } else if (key_id == rb_id_option_partial_script) {
+ if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
+ } else if (key_id == rb_id_option_freeze) {
+ if (!NIL_P(value)) pm_options_freeze_set(options, RTEST(value));
} else {
- rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
+ rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
}
return ST_CONTINUE;
@@ -176,6 +270,7 @@ build_options(VALUE argument) {
static void
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
options->line = 1; // default
+
if (!NIL_P(keywords)) {
struct build_options_data data = { .options = options, .keywords = keywords };
struct build_options_data *argument = &data;
@@ -215,22 +310,47 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
/**
* Read options for methods that look like (filepath, **options).
*/
-static bool
-file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
+static void
+file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
VALUE filepath;
VALUE keywords;
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
- extract_options(options, filepath, keywords);
+ Check_Type(filepath, T_STRING);
+ *encoded_filepath = rb_str_encode_ospath(filepath);
+ extract_options(options, *encoded_filepath, keywords);
- if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
- pm_options_free(options);
- return false;
- }
+ const char *source = (const char *) pm_string_source(&options->filepath);
+ pm_string_init_result_t result;
+
+ switch (result = pm_string_file_init(input, source)) {
+ case PM_STRING_INIT_SUCCESS:
+ break;
+ case PM_STRING_INIT_ERROR_GENERIC: {
+ pm_options_free(options);
+
+#ifdef _WIN32
+ int e = rb_w32_map_errno(GetLastError());
+#else
+ int e = errno;
+#endif
- return true;
+ rb_syserr_fail(e, source);
+ break;
+ }
+ case PM_STRING_INIT_ERROR_DIRECTORY:
+ pm_options_free(options);
+ rb_syserr_fail(EISDIR, source);
+ break;
+ default:
+ pm_options_free(options);
+ rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
+ break;
+ }
}
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
/******************************************************************************/
/* Serializing the AST */
/******************************************************************************/
@@ -272,17 +392,18 @@ dump(int argc, VALUE *argv, VALUE self) {
pm_options_t options = { 0 };
string_options(argc, argv, &input, &options);
-#ifdef PRISM_DEBUG_MODE_BUILD
+#ifdef PRISM_BUILD_DEBUG
size_t length = pm_string_length(&input);
- char* dup = malloc(length);
+ char* dup = xmalloc(length);
memcpy(dup, pm_string_source(&input), length);
pm_string_constant_init(&input, dup, length);
#endif
VALUE value = dump_input(&input, &options);
+ if (options.freeze) rb_obj_freeze(value);
-#ifdef PRISM_DEBUG_MODE_BUILD
- free(dup);
+#ifdef PRISM_BUILD_DEBUG
+ xfree(dup);
#endif
pm_string_free(&input);
@@ -302,7 +423,9 @@ static VALUE
dump_file(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE value = dump_input(&input, &options);
pm_string_free(&input);
@@ -311,60 +434,96 @@ dump_file(int argc, VALUE *argv, VALUE self) {
return value;
}
+#endif
+
/******************************************************************************/
/* Extracting values for the parse result */
/******************************************************************************/
/**
+ * The same as rb_class_new_instance, but accepts an additional boolean to
+ * indicate whether or not the resulting class instance should be frozen.
+ */
+static inline VALUE
+rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool freeze) {
+ VALUE value = rb_class_new_instance(argc, argv, klass);
+ if (freeze) rb_obj_freeze(value);
+ return value;
+}
+
+/**
+ * Create a new Location instance from the given parser and bounds.
+ */
+static inline VALUE
+parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
+ VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
+ return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
+}
+
+/**
+ * Create a new Location instance from the given parser and location.
+ */
+#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
+ parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
+
+/**
+ * Build a new Comment instance from the given parser and comment.
+ */
+static inline VALUE
+parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
+ VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
+ VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
+ return rb_class_new_instance_freeze(1, argv, type, freeze);
+}
+
+/**
* Extract the comments out of the parser into an array.
*/
static VALUE
-parser_comments(pm_parser_t *parser, VALUE source) {
- VALUE comments = rb_ary_new();
-
- for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
- VALUE location_argv[] = {
- source,
- LONG2FIX(comment->location.start - parser->start),
- LONG2FIX(comment->location.end - comment->location.start)
- };
-
- VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
- VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
- rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
+parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
+ VALUE comments = rb_ary_new_capa(parser->comment_list.size);
+
+ for (
+ const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head;
+ comment != NULL;
+ comment = (const pm_comment_t *) comment->node.next
+ ) {
+ VALUE value = parser_comment(parser, source, freeze, comment);
+ rb_ary_push(comments, value);
}
+ if (freeze) rb_obj_freeze(comments);
return comments;
}
/**
+ * Build a new MagicComment instance from the given parser and magic comment.
+ */
+static inline VALUE
+parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
+ VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
+ VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
+ VALUE argv[] = { key_loc, value_loc };
+ return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
+}
+
+/**
* Extract the magic comments out of the parser into an array.
*/
static VALUE
-parser_magic_comments(pm_parser_t *parser, VALUE source) {
- VALUE magic_comments = rb_ary_new();
-
- for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
- VALUE key_loc_argv[] = {
- source,
- LONG2FIX(magic_comment->key_start - parser->start),
- LONG2FIX(magic_comment->key_length)
- };
-
- VALUE value_loc_argv[] = {
- source,
- LONG2FIX(magic_comment->value_start - parser->start),
- LONG2FIX(magic_comment->value_length)
- };
-
- VALUE magic_comment_argv[] = {
- rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
- rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
- };
-
- rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
+parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
+ VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
+
+ for (
+ const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head;
+ magic_comment != NULL;
+ magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
+ ) {
+ VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
+ rb_ary_push(magic_comments, value);
}
+ if (freeze) rb_obj_freeze(magic_comments);
return magic_comments;
}
@@ -373,17 +532,11 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
* exists.
*/
static VALUE
-parser_data_loc(const pm_parser_t *parser, VALUE source) {
+parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
if (parser->data_loc.end == NULL) {
return Qnil;
} else {
- VALUE argv[] = {
- source,
- LONG2FIX(parser->data_loc.start - parser->start),
- LONG2FIX(parser->data_loc.end - parser->data_loc.start)
- };
-
- return rb_class_new_instance(3, argv, rb_cPrismLocation);
+ return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
}
}
@@ -391,25 +544,39 @@ parser_data_loc(const pm_parser_t *parser, VALUE source) {
* Extract the errors out of the parser into an array.
*/
static VALUE
-parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
- VALUE errors = rb_ary_new();
- pm_diagnostic_t *error;
-
- for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
- VALUE location_argv[] = {
- source,
- LONG2FIX(error->location.start - parser->start),
- LONG2FIX(error->location.end - error->location.start)
- };
-
- VALUE error_argv[] = {
- rb_enc_str_new_cstr(error->message, encoding),
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
- };
+parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE errors = rb_ary_new_capa(parser->error_list.size);
+
+ for (
+ const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head;
+ error != NULL;
+ error = (const pm_diagnostic_t *) error->node.next
+ ) {
+ VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
+ VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
+ VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
+
+ VALUE level = Qnil;
+ switch (error->level) {
+ case PM_ERROR_LEVEL_SYNTAX:
+ level = ID2SYM(rb_intern("syntax"));
+ break;
+ case PM_ERROR_LEVEL_ARGUMENT:
+ level = ID2SYM(rb_intern("argument"));
+ break;
+ case PM_ERROR_LEVEL_LOAD:
+ level = ID2SYM(rb_intern("load"));
+ break;
+ default:
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
+ }
- rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
+ VALUE argv[] = { type, message, location, level };
+ VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze);
+ rb_ary_push(errors, value);
}
+ if (freeze) rb_obj_freeze(errors);
return errors;
}
@@ -417,28 +584,57 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
* Extract the warnings out of the parser into an array.
*/
static VALUE
-parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
- VALUE warnings = rb_ary_new();
- pm_diagnostic_t *warning;
-
- for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
- VALUE location_argv[] = {
- source,
- LONG2FIX(warning->location.start - parser->start),
- LONG2FIX(warning->location.end - warning->location.start)
- };
-
- VALUE warning_argv[] = {
- rb_enc_str_new_cstr(warning->message, encoding),
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
- };
+parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
+
+ for (
+ const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head;
+ warning != NULL;
+ warning = (const pm_diagnostic_t *) warning->node.next
+ ) {
+ VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
+ VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
+ VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
+
+ VALUE level = Qnil;
+ switch (warning->level) {
+ case PM_WARNING_LEVEL_DEFAULT:
+ level = ID2SYM(rb_intern("default"));
+ break;
+ case PM_WARNING_LEVEL_VERBOSE:
+ level = ID2SYM(rb_intern("verbose"));
+ break;
+ default:
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
+ }
- rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
+ VALUE argv[] = { type, message, location, level };
+ VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze);
+ rb_ary_push(warnings, value);
}
+ if (freeze) rb_obj_freeze(warnings);
return warnings;
}
+/**
+ * Create a new parse result from the given parser, value, encoding, and source.
+ */
+static VALUE
+parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE result_argv[] = {
+ value,
+ parser_comments(parser, source, freeze),
+ parser_magic_comments(parser, source, freeze),
+ parser_data_loc(parser, source, freeze),
+ parser_errors(parser, encoding, source, freeze),
+ parser_warnings(parser, encoding, source, freeze),
+ source
+ };
+
+ return rb_class_new_instance_freeze(7, result_argv, class, freeze);
+}
+
/******************************************************************************/
/* Lexing Ruby code */
/******************************************************************************/
@@ -452,6 +648,7 @@ typedef struct {
VALUE source;
VALUE tokens;
rb_encoding *encoding;
+ bool freeze;
} parse_lex_data_t;
/**
@@ -463,9 +660,13 @@ static void
parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
- VALUE yields = rb_ary_new_capa(2);
- rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
- rb_ary_push(yields, INT2FIX(parser->lex_state));
+ VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze);
+ VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state));
+
+ if (parse_lex_data->freeze) {
+ rb_obj_freeze(value);
+ rb_obj_freeze(yields);
+ }
rb_ary_push(parse_lex_data->tokens, yields);
}
@@ -485,14 +686,37 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
// one or two tokens, since the encoding can only change at the top of the
// file.
VALUE tokens = parse_lex_data->tokens;
+ VALUE next_tokens = rb_ary_new();
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
VALUE yields = rb_ary_entry(tokens, index);
VALUE token = rb_ary_entry(yields, 0);
VALUE value = rb_ivar_get(token, rb_intern("@value"));
- rb_enc_associate(value, parse_lex_data->encoding);
- ENC_CODERANGE_CLEAR(value);
+ VALUE next_value = rb_str_dup(value);
+
+ rb_enc_associate(next_value, parse_lex_data->encoding);
+ if (parse_lex_data->freeze) rb_obj_freeze(next_value);
+
+ VALUE next_token_argv[] = {
+ parse_lex_data->source,
+ rb_ivar_get(token, rb_intern("@type")),
+ next_value,
+ rb_ivar_get(token, rb_intern("@location"))
+ };
+
+ VALUE next_token = rb_class_new_instance(4, next_token_argv, rb_cPrismToken);
+ VALUE next_yields = rb_assoc_new(next_token, rb_ary_entry(yields, 1));
+
+ if (parse_lex_data->freeze) {
+ rb_obj_freeze(next_token);
+ rb_obj_freeze(next_yields);
+ }
+
+ rb_ary_push(next_tokens, next_yields);
}
+
+ rb_ary_replace(parse_lex_data->tokens, next_tokens);
}
/**
@@ -505,14 +729,15 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
- VALUE offsets = rb_ary_new();
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
- VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
+ VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
+ VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
+ VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
parse_lex_data_t parse_lex_data = {
.source = source,
.tokens = rb_ary_new(),
- .encoding = rb_utf8_encoding()
+ .encoding = rb_utf8_encoding(),
+ .freeze = options->freeze,
};
parse_lex_data_t *data = &parse_lex_data;
@@ -524,43 +749,47 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
parser.lex_callback = &lex_callback;
pm_node_t *node = pm_parse(&parser);
- // Here we need to update the source range to have the correct newline
- // offsets. We do it here because we've already created the object and given
- // it over to all of the tokens.
+ // Here we need to update the Source object to have the correct
+ // encoding for the source string and the correct newline offsets.
+ // We do it here because we've already created the Source object and given
+ // it over to all of the tokens, and both of these are only set after pm_parse().
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ rb_enc_associate(source_string, encoding);
+
for (size_t index = 0; index < parser.newline_list.size; index++) {
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
+ rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
+ }
+
+ if (options->freeze) {
+ rb_obj_freeze(source_string);
+ rb_obj_freeze(offsets);
+ rb_obj_freeze(source);
+ rb_obj_freeze(parse_lex_data.tokens);
}
- VALUE value;
+ VALUE result;
if (return_nodes) {
- value = rb_ary_new_capa(2);
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
+ VALUE value = rb_ary_new_capa(2);
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze));
rb_ary_push(value, parse_lex_data.tokens);
+ if (options->freeze) rb_obj_freeze(value);
+ result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze);
} else {
- value = parse_lex_data.tokens;
+ result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
}
- VALUE result_argv[] = {
- value,
- parser_comments(&parser, source),
- parser_magic_comments(&parser, source),
- parser_data_loc(&parser, source),
- parser_errors(&parser, parse_lex_data.encoding, source),
- parser_warnings(&parser, parse_lex_data.encoding, source),
- source
- };
-
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
- return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
+
+ return result;
}
/**
* call-seq:
- * Prism::lex(source, **options) -> Array
+ * Prism::lex(source, **options) -> LexResult
*
- * Return an array of Token instances corresponding to the given string. For
- * supported options, see Prism::parse.
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given string. For supported options, see Prism::parse.
*/
static VALUE
lex(int argc, VALUE *argv, VALUE self) {
@@ -577,16 +806,18 @@ lex(int argc, VALUE *argv, VALUE self) {
/**
* call-seq:
- * Prism::lex_file(filepath, **options) -> Array
+ * Prism::lex_file(filepath, **options) -> LexResult
*
- * Return an array of Token instances corresponding to the given file. For
- * supported options, see Prism::parse.
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given file. For supported options, see Prism::parse.
*/
static VALUE
lex_file(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE value = parse_lex_input(&input, &options, false);
pm_string_free(&input);
@@ -610,18 +841,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
- VALUE source = pm_source_new(&parser, encoding);
- VALUE result_argv[] = {
- pm_ast_new(&parser, node, encoding),
- parser_comments(&parser, source),
- parser_magic_comments(&parser, source),
- parser_data_loc(&parser, source),
- parser_errors(&parser, encoding, source),
- parser_warnings(&parser, encoding, source),
- source
- };
+ VALUE source = pm_source_new(&parser, encoding, options->freeze);
+ VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze);
+ VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze);
- VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
+ if (options->freeze) {
+ rb_obj_freeze(source);
+ }
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@@ -636,22 +862,40 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
* Parse the given string and return a ParseResult instance. The options that
* are supported are:
*
- * * `filepath` - the filepath of the source being parsed. This should be a
- * string or nil
+ * * `command_line` - either nil or a string of the various options that were
+ * set on the command line. Valid values are combinations of "a", "l",
+ * "n", "p", and "x".
* * `encoding` - the encoding of the source being parsed. This should be an
- * encoding or nil
- * * `line` - the line number that the parse starts on. This should be an
- * integer or nil. Note that this is 1-indexed.
+ * encoding or nil.
+ * * `filepath` - the filepath of the source being parsed. This should be a
+ * string or nil.
+ * * `freeze` - whether or not to deeply freeze the AST. This should be a
+ * boolean or nil.
* * `frozen_string_literal` - whether or not the frozen string literal pragma
* has been set. This should be a boolean or nil.
- * * `verbose` - the current level of verbosity. This controls whether or not
- * the parser emits warnings. This should be a boolean or nil.
- * * `version` - the version of prism that should be used to parse Ruby code. By
- * default prism assumes you want to parse with the latest vesion of
- * prism (which you can trigger with `nil` or `"latest"`). If you want to
- * parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
+ * * `line` - the line number that the parse starts on. This should be an
+ * integer or nil. Note that this is 1-indexed.
+ * * `main_script` - a boolean indicating whether or not the source being parsed
+ * is the main script being run by the interpreter. This controls whether
+ * or not shebangs are parsed for additional flags and whether or not the
+ * parser will attempt to find a matching shebang if the first one does
+ * not contain the word "ruby".
+ * * `partial_script` - when the file being parsed is considered a "partial"
+ * script, jumps will not be marked as errors if they are not contained
+ * within loops/blocks. This is used in the case that you're parsing a
+ * script that you know will be embedded inside another script later, but
+ * you do not have that context yet. For example, when parsing an ERB
+ * template that will be evaluated inside another script.
* * `scopes` - the locals that are in scope surrounding the code that is being
- * parsed. This should be an array of arrays of symbols or nil.
+ * parsed. This should be an array of arrays of symbols or nil. Scopes are
+ * ordered from the outermost scope to the innermost one.
+ * * `version` - the version of Ruby syntax that prism should used to parse Ruby
+ * code. By default prism assumes you want to parse with the latest
+ * version of Ruby syntax (which you can trigger with `nil` or
+ * `"latest"`). You may also restrict the syntax to a specific version of
+ * Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that
+ * the current Ruby is running use `version: "current"`. Raises
+ * ArgumentError if the version is not currently supported by Prism.
*/
static VALUE
parse(int argc, VALUE *argv, VALUE self) {
@@ -659,17 +903,17 @@ parse(int argc, VALUE *argv, VALUE self) {
pm_options_t options = { 0 };
string_options(argc, argv, &input, &options);
-#ifdef PRISM_DEBUG_MODE_BUILD
+#ifdef PRISM_BUILD_DEBUG
size_t length = pm_string_length(&input);
- char* dup = malloc(length);
+ char* dup = xmalloc(length);
memcpy(dup, pm_string_source(&input), length);
pm_string_constant_init(&input, dup, length);
#endif
VALUE value = parse_input(&input, &options);
-#ifdef PRISM_DEBUG_MODE_BUILD
- free(dup);
+#ifdef PRISM_BUILD_DEBUG
+ xfree(dup);
#endif
pm_string_free(&input);
@@ -688,7 +932,9 @@ static VALUE
parse_file(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE value = parse_input(&input, &options);
pm_string_free(&input);
@@ -698,6 +944,125 @@ parse_file(int argc, VALUE *argv, VALUE self) {
}
/**
+ * Parse the given input and return nothing.
+ */
+static void
+profile_input(pm_string_t *input, const pm_options_t *options) {
+ pm_parser_t parser;
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+ pm_node_t *node = pm_parse(&parser);
+ pm_node_destroy(&parser, node);
+ pm_parser_free(&parser);
+}
+
+/**
+ * call-seq:
+ * Prism::profile(source, **options) -> nil
+ *
+ * Parse the given string and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile(int argc, VALUE *argv, VALUE self) {
+ pm_string_t input;
+ pm_options_t options = { 0 };
+
+ string_options(argc, argv, &input, &options);
+ profile_input(&input, &options);
+ pm_string_free(&input);
+ pm_options_free(&options);
+
+ return Qnil;
+}
+
+/**
+ * call-seq:
+ * Prism::profile_file(filepath, **options) -> nil
+ *
+ * Parse the given file and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile_file(int argc, VALUE *argv, VALUE self) {
+ pm_string_t input;
+ pm_options_t options = { 0 };
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
+
+ profile_input(&input, &options);
+ pm_string_free(&input);
+ pm_options_free(&options);
+
+ return Qnil;
+}
+
+static int
+parse_stream_eof(void *stream) {
+ if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * An implementation of fgets that is suitable for use with Ruby IO objects.
+ */
+static char *
+parse_stream_fgets(char *string, int size, void *stream) {
+ RUBY_ASSERT(size > 0);
+
+ VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
+ if (NIL_P(line)) {
+ return NULL;
+ }
+
+ const char *cstr = RSTRING_PTR(line);
+ long length = RSTRING_LEN(line);
+
+ memcpy(string, cstr, length);
+ string[length] = '\0';
+
+ return string;
+}
+
+/**
+ * call-seq:
+ * Prism::parse_stream(stream, **options) -> ParseResult
+ *
+ * Parse the given object that responds to `gets` and return a ParseResult
+ * instance. The options that are supported are the same as Prism::parse.
+ */
+static VALUE
+parse_stream(int argc, VALUE *argv, VALUE self) {
+ VALUE stream;
+ VALUE keywords;
+ rb_scan_args(argc, argv, "1:", &stream, &keywords);
+
+ pm_options_t options = { 0 };
+ extract_options(&options, Qnil, keywords);
+
+ pm_parser_t parser;
+ pm_buffer_t buffer;
+
+ pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+
+ VALUE source = pm_source_new(&parser, encoding, options.freeze);
+ VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
+ VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);
+
+ pm_node_destroy(&parser, node);
+ pm_buffer_free(&buffer);
+ pm_parser_free(&parser);
+
+ return result;
+}
+
+/**
* Parse the given input and return an array of Comment objects.
*/
static VALUE
@@ -708,8 +1073,8 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
- VALUE source = pm_source_new(&parser, encoding);
- VALUE comments = parser_comments(&parser, source);
+ VALUE source = pm_source_new(&parser, encoding, options->freeze);
+ VALUE comments = parser_comments(&parser, source, options->freeze);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@@ -748,7 +1113,9 @@ static VALUE
parse_file_comments(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE value = parse_input_comments(&input, &options);
pm_string_free(&input);
@@ -759,9 +1126,9 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) {
/**
* call-seq:
- * Prism::parse_lex(source, **options) -> ParseResult
+ * Prism::parse_lex(source, **options) -> ParseLexResult
*
- * Parse the given string and return a ParseResult instance that contains a
+ * Parse the given string and return a ParseLexResult instance that contains a
* 2-element array, where the first element is the AST and the second element is
* an array of Token instances.
*
@@ -786,9 +1153,9 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
/**
* call-seq:
- * Prism::parse_lex_file(filepath, **options) -> ParseResult
+ * Prism::parse_lex_file(filepath, **options) -> ParseLexResult
*
- * Parse the given file and return a ParseResult instance that contains a
+ * Parse the given file and return a ParseLexResult instance that contains a
* 2-element array, where the first element is the AST and the second element is
* an array of Token instances.
*
@@ -802,7 +1169,9 @@ static VALUE
parse_lex_file(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE value = parse_lex_input(&input, &options, true);
pm_string_free(&input);
@@ -830,7 +1199,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
/**
* call-seq:
- * Prism::parse_success?(source, **options) -> Array
+ * Prism::parse_success?(source, **options) -> bool
*
* Parse the given string and return true if it parses without errors. For
* supported options, see Prism::parse.
@@ -850,7 +1219,19 @@ parse_success_p(int argc, VALUE *argv, VALUE self) {
/**
* call-seq:
- * Prism::parse_file_success?(filepath, **options) -> Array
+ * Prism::parse_failure?(source, **options) -> bool
+ *
+ * Parse the given string and return true if it parses with errors. For
+ * supported options, see Prism::parse.
+ */
+static VALUE
+parse_failure_p(int argc, VALUE *argv, VALUE self) {
+ return RTEST(parse_success_p(argc, argv, self)) ? Qfalse : Qtrue;
+}
+
+/**
+ * call-seq:
+ * Prism::parse_file_success?(filepath, **options) -> bool
*
* Parse the given file and return true if it parses without errors. For
* supported options, see Prism::parse.
@@ -859,7 +1240,9 @@ static VALUE
parse_file_success_p(int argc, VALUE *argv, VALUE self) {
pm_string_t input;
pm_options_t options = { 0 };
- if (!file_options(argc, argv, &input, &options)) return Qnil;
+
+ VALUE encoded_filepath;
+ file_options(argc, argv, &input, &options, &encoded_filepath);
VALUE result = parse_input_success_p(&input, &options);
pm_string_free(&input);
@@ -868,150 +1251,78 @@ parse_file_success_p(int argc, VALUE *argv, VALUE self) {
return result;
}
-/******************************************************************************/
-/* Utility functions exposed to make testing easier */
-/******************************************************************************/
-
/**
* call-seq:
- * Debug::named_captures(source) -> Array
+ * Prism::parse_file_failure?(filepath, **options) -> bool
*
- * Returns an array of strings corresponding to the named capture groups in the
- * given source string. If prism was unable to parse the regular expression,
- * this function returns nil.
+ * Parse the given file and return true if it parses with errors. For
+ * supported options, see Prism::parse.
*/
static VALUE
-named_captures(VALUE self, VALUE source) {
- pm_string_list_t string_list = { 0 };
-
- if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) {
- pm_string_list_free(&string_list);
- return Qnil;
- }
-
- VALUE names = rb_ary_new();
- for (size_t index = 0; index < string_list.length; index++) {
- const pm_string_t *string = &string_list.strings[index];
- rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
- }
-
- pm_string_list_free(&string_list);
- return names;
+parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
+ return RTEST(parse_file_success_p(argc, argv, self)) ? Qfalse : Qtrue;
}
+/******************************************************************************/
+/* String query methods */
+/******************************************************************************/
+
/**
- * call-seq:
- * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
- *
- * Return a hash of information about the given source string's memory usage.
+ * Process the result of a call to a string query method and return an
+ * appropriate value.
*/
static VALUE
-memsize(VALUE self, VALUE string) {
- pm_parser_t parser;
- size_t length = RSTRING_LEN(string);
- pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
-
- pm_node_t *node = pm_parse(&parser);
- pm_memsize_t memsize;
- pm_node_memsize(node, &memsize);
-
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
-
- VALUE result = rb_hash_new();
- rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
- rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
- rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
- return result;
+string_query(pm_string_query_t result) {
+ switch (result) {
+ case PM_STRING_QUERY_ERROR:
+ rb_raise(rb_eArgError, "Invalid or non ascii-compatible encoding");
+ return Qfalse;
+ case PM_STRING_QUERY_FALSE:
+ return Qfalse;
+ case PM_STRING_QUERY_TRUE:
+ return Qtrue;
+ }
+ return Qfalse;
}
/**
* call-seq:
- * Debug::profile_file(filepath) -> nil
+ * Prism::StringQuery::local?(string) -> bool
*
- * Parse the file, but do nothing with the result. This is used to profile the
- * parser for memory and speed.
+ * Returns true if the string constitutes a valid local variable name. Note that
+ * this means the names that can be set through Binding#local_variable_set, not
+ * necessarily the ones that can be set through a local variable assignment.
*/
static VALUE
-profile_file(VALUE self, VALUE filepath) {
- pm_string_t input;
-
- const char *checked = check_string(filepath);
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
-
- pm_options_t options = { 0 };
- pm_options_filepath_set(&options, checked);
-
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
-
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
- pm_string_free(&input);
-
- return Qnil;
+string_query_local_p(VALUE self, VALUE string) {
+ const uint8_t *source = (const uint8_t *) check_string(string);
+ return string_query(pm_string_query_local(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}
/**
* call-seq:
- * Debug::inspect_node(source) -> inspected
+ * Prism::StringQuery::constant?(string) -> bool
*
- * Inspect the AST that represents the given source using the prism pretty print
- * as opposed to the Ruby implementation.
+ * Returns true if the string constitutes a valid constant name. Note that this
+ * means the names that can be set through Module#const_set, not necessarily the
+ * ones that can be set through a constant assignment.
*/
static VALUE
-inspect_node(VALUE self, VALUE source) {
- pm_string_t input;
- input_load_string(&input, source);
-
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
-
- pm_node_t *node = pm_parse(&parser);
- pm_buffer_t buffer = { 0 };
-
- pm_prettyprint(&buffer, &parser, node);
-
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
- VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
-
- pm_buffer_free(&buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
-
- return string;
+string_query_constant_p(VALUE self, VALUE string) {
+ const uint8_t *source = (const uint8_t *) check_string(string);
+ return string_query(pm_string_query_constant(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}
/**
* call-seq:
- * Debug::format_errors(source) -> String
+ * Prism::StringQuery::method_name?(string) -> bool
*
- * Format the errors that are found when parsing the given source string.
+ * Returns true if the string constitutes a valid method name.
*/
static VALUE
-format_errors(VALUE self, VALUE source) {
- pm_string_t input;
- input_load_string(&input, source);
-
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
-
- pm_node_t *node = pm_parse(&parser);
- pm_buffer_t buffer = { 0 };
-
- pm_parser_errors_format(&parser, &buffer, true);
-
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
- VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
-
- pm_buffer_free(&buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_string_free(&input);
-
- return result;
+string_query_method_name_p(VALUE self, VALUE string) {
+ const uint8_t *source = (const uint8_t *) check_string(string);
+ return string_query(pm_string_query_method_name(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}
/******************************************************************************/
@@ -1034,6 +1345,11 @@ Init_prism(void) {
);
}
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+ // Mark this extension as Ractor-safe.
+ rb_ext_ractor_safe(true);
+#endif
+
// Grab up references to all of the constants that we're going to need to
// reference throughout this extension.
rb_cPrism = rb_define_module("Prism");
@@ -1047,52 +1363,63 @@ Init_prism(void) {
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
- rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
-
- // Intern all of the options that we support so that we don't have to do it
- // every time we parse.
- rb_option_id_filepath = rb_intern_const("filepath");
- rb_option_id_encoding = rb_intern_const("encoding");
- rb_option_id_line = rb_intern_const("line");
- rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
- rb_option_id_verbose = rb_intern_const("verbose");
- rb_option_id_version = rb_intern_const("version");
- rb_option_id_scopes = rb_intern_const("scopes");
+ rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
+ rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
+ rb_cPrismLexResult = rb_define_class_under(rb_cPrism, "LexResult", rb_cPrismResult);
+ rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
+ rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);
+ rb_cPrismScope = rb_define_class_under(rb_cPrism, "Scope", rb_cObject);
+
+ rb_cPrismCurrentVersionError = rb_const_get(rb_cPrism, rb_intern("CurrentVersionError"));
+
+ // Intern all of the IDs eagerly that we support so that we don't have to do
+ // it every time we parse.
+ rb_id_option_command_line = rb_intern_const("command_line");
+ rb_id_option_encoding = rb_intern_const("encoding");
+ rb_id_option_filepath = rb_intern_const("filepath");
+ rb_id_option_freeze = rb_intern_const("freeze");
+ rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
+ rb_id_option_line = rb_intern_const("line");
+ rb_id_option_main_script = rb_intern_const("main_script");
+ rb_id_option_partial_script = rb_intern_const("partial_script");
+ rb_id_option_scopes = rb_intern_const("scopes");
+ rb_id_option_version = rb_intern_const("version");
+ rb_id_source_for = rb_intern("for");
+ rb_id_forwarding_positionals = rb_intern("*");
+ rb_id_forwarding_keywords = rb_intern("**");
+ rb_id_forwarding_block = rb_intern("&");
+ rb_id_forwarding_all = rb_intern("...");
/**
* The version of the prism library.
*/
- rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
-
- /**
- * The backend of the parser that prism is using to parse Ruby code. This
- * can be either :CEXT or :FFI. On runtimes that support C extensions, we
- * default to :CEXT. Otherwise we use :FFI.
- */
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
+ rb_define_const(rb_cPrism, "VERSION", rb_str_freeze(rb_str_new_cstr(EXPECTED_PRISM_VERSION)));
// First, the functions that have to do with lexing and parsing.
- rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
+ rb_define_singleton_method(rb_cPrism, "profile", profile, -1);
+ rb_define_singleton_method(rb_cPrism, "profile_file", profile_file, -1);
+ rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
+ rb_define_singleton_method(rb_cPrism, "parse_failure?", parse_failure_p, -1);
rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
+ rb_define_singleton_method(rb_cPrism, "parse_file_failure?", parse_file_failure_p, -1);
+
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+ rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
+#endif
- // Next, the functions that will be called by the parser to perform various
- // internal tasks. We expose these to make them easier to test.
- VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
- rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
- rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
- rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
- rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
- rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 1);
+ rb_define_singleton_method(rb_cPrismStringQuery, "local?", string_query_local_p, 1);
+ rb_define_singleton_method(rb_cPrismStringQuery, "constant?", string_query_constant_p, 1);
+ rb_define_singleton_method(rb_cPrismStringQuery, "method_name?", string_query_method_name_p, 1);
// Next, initialize the other APIs.
Init_prism_api_node();
diff --git a/prism/extension.h b/prism/extension.h
index a21370cfa4..510faa48e8 100644
--- a/prism/extension.h
+++ b/prism/extension.h
@@ -1,18 +1,19 @@
#ifndef PRISM_EXT_NODE_H
#define PRISM_EXT_NODE_H
-#define EXPECTED_PRISM_VERSION "0.19.0"
+#define EXPECTED_PRISM_VERSION "1.8.0"
#include <ruby.h>
#include <ruby/encoding.h>
#include "prism.h"
-VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
-VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
-VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
+VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze);
+VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze);
+VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze);
+VALUE pm_integer_new(const pm_integer_t *integer);
void Init_prism_api_node(void);
void Init_prism_pack(void);
-PRISM_EXPORTED_FUNCTION void Init_prism(void);
+RUBY_FUNC_EXPORTED void Init_prism(void);
#endif
diff --git a/prism/node.h b/prism/node.h
index 3e15d18552..e8686a327c 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -8,6 +8,14 @@
#include "prism/defines.h"
#include "prism/parser.h"
+#include "prism/util/pm_buffer.h"
+
+/**
+ * Loop through each node in the node list, writing each node to the given
+ * pm_node_t pointer.
+ */
+#define PM_NODE_LIST_FOREACH(list, index, node) \
+ for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
/**
* Append a new node onto the end of the node list.
@@ -18,33 +26,35 @@
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
/**
- * Deallocate a node and all of its children.
+ * Prepend a new node onto the beginning of the node list.
*
- * @param parser The parser that owns the node.
- * @param node The node to deallocate.
+ * @param list The list to prepend to.
+ * @param node The node to prepend.
*/
-PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
+void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
/**
- * This struct stores the information gathered by the pm_node_memsize function.
- * It contains both the memory footprint and additionally metadata about the
- * shape of the tree.
+ * Concatenate the given node list onto the end of the other node list.
+ *
+ * @param list The list to concatenate onto.
+ * @param other The list to concatenate.
*/
-typedef struct {
- /** The total memory footprint of the node and all of its children. */
- size_t memsize;
+void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
- /** The number of children the node has. */
- size_t node_count;
-} pm_memsize_t;
+/**
+ * Free the internal memory associated with the given node list.
+ *
+ * @param list The list to free.
+ */
+void pm_node_list_free(pm_node_list_t *list);
/**
- * Calculates the memory footprint of a given node.
+ * Deallocate a node and all of its children.
*
- * @param node The node to calculate the memory footprint of.
- * @param memsize The memory footprint of the node and all of its children.
+ * @param parser The parser that owns the node.
+ * @param node The node to deallocate.
*/
-PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
+PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
/**
* Returns a string representation of the given node type.
@@ -54,4 +64,66 @@ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *mems
*/
PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
+/**
+ * Visit each of the nodes in this subtree using the given visitor callback. The
+ * callback function will be called for each node in the subtree. If it returns
+ * false, then that node's children will not be visited. If it returns true,
+ * then the children will be visited. The data parameter is treated as an opaque
+ * pointer and is passed to the visitor callback for consumers to use as they
+ * see fit.
+ *
+ * As an example:
+ *
+ * ```c
+ * #include "prism.h"
+ *
+ * bool visit(const pm_node_t *node, void *data) {
+ * size_t *indent = (size_t *) data;
+ * for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
+ * printf("%s\n", pm_node_type_to_str(node->type));
+ *
+ * size_t next_indent = *indent + 1;
+ * size_t *next_data = &next_indent;
+ * pm_visit_child_nodes(node, visit, next_data);
+ *
+ * return false;
+ * }
+ *
+ * int main(void) {
+ * const char *source = "1 + 2; 3 + 4";
+ * size_t size = strlen(source);
+ *
+ * pm_parser_t parser;
+ * pm_options_t options = { 0 };
+ * pm_parser_init(&parser, (const uint8_t *) source, size, &options);
+ *
+ * size_t indent = 0;
+ * pm_node_t *node = pm_parse(&parser);
+ *
+ * size_t *data = &indent;
+ * pm_visit_node(node, visit, data);
+ *
+ * pm_node_destroy(&parser, node);
+ * pm_parser_free(&parser);
+ * return EXIT_SUCCESS;
+ * }
+ * ```
+ *
+ * @param node The root node to start visiting from.
+ * @param visitor The callback to call for each node in the subtree.
+ * @param data An opaque pointer that is passed to the visitor callback.
+ */
+PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+
+/**
+ * Visit the children of the given node with the given callback. This is the
+ * default behavior for walking the tree that is called from pm_visit_node if
+ * the callback returns true.
+ *
+ * @param node The node to visit the children of.
+ * @param visitor The callback to call for each child node.
+ * @param data An opaque pointer that is passed to the visitor callback.
+ */
+PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+
#endif
diff --git a/prism/options.c b/prism/options.c
index 0dcae0d16f..09d2a65a6c 100644
--- a/prism/options.c
+++ b/prism/options.c
@@ -1,6 +1,15 @@
#include "prism/options.h"
/**
+ * Set the shebang callback option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) {
+ options->shebang_callback = shebang_callback;
+ options->shebang_callback_data = shebang_callback_data;
+}
+
+/**
* Set the filepath option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
@@ -17,6 +26,14 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
}
/**
+ * Set the encoding_locked option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
+ options->encoding_locked = encoding_locked;
+}
+
+/**
* Set the line option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
@@ -29,15 +46,23 @@ pm_options_line_set(pm_options_t *options, int32_t line) {
*/
PRISM_EXPORTED_FUNCTION void
pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) {
- options->frozen_string_literal = frozen_string_literal;
+ options->frozen_string_literal = frozen_string_literal ? PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED : PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
}
/**
- * Set the suppress warnings option on the given options struct.
+ * Sets the command line option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
-pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings) {
- options->suppress_warnings = suppress_warnings;
+pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
+ options->command_line = command_line;
+}
+
+/**
+ * Checks if the given slice represents a number.
+ */
+static inline bool
+is_number(const char *string, size_t length) {
+ return pm_strspn_decimal_digit((const uint8_t *) string, (ptrdiff_t) length) == length;
}
/**
@@ -47,18 +72,59 @@ pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings)
*/
PRISM_EXPORTED_FUNCTION bool
pm_options_version_set(pm_options_t *options, const char *version, size_t length) {
- if (version == NULL && length == 0) {
+ if (version == NULL) {
options->version = PM_OPTIONS_VERSION_LATEST;
return true;
}
- if (length == 5) {
- if (strncmp(version, "3.3.0", 5) == 0) {
- options->version = PM_OPTIONS_VERSION_CRUBY_3_3_0;
+ if (length == 3) {
+ if (strncmp(version, "3.3", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+ return true;
+ }
+
+ if (strncmp(version, "3.4", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
+ return true;
+ }
+
+ if (strncmp(version, "3.5", 3) == 0 || strncmp(version, "4.0", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+ return true;
+ }
+
+ if (strncmp(version, "4.1", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
+ return true;
+ }
+
+ return false;
+ }
+
+ if (length >= 4 && is_number(version + 4, length - 4)) {
+ if (strncmp(version, "3.3.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+ return true;
+ }
+
+ if (strncmp(version, "3.4.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
+ return true;
+ }
+
+ if (strncmp(version, "3.5.", 4) == 0 || strncmp(version, "4.0.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
return true;
}
- if (strncmp(version, "latest", 6) == 0) {
+ if (strncmp(version, "4.1.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
+ return true;
+ }
+ }
+
+ if (length >= 6) {
+ if (strncmp(version, "latest", 7) == 0) { // 7 to compare the \0 as well
options->version = PM_OPTIONS_VERSION_LATEST;
return true;
}
@@ -68,13 +134,45 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
}
/**
- * Allocate and zero out the scopes array on the given options struct.
+ * Set the main script option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
+pm_options_main_script_set(pm_options_t *options, bool main_script) {
+ options->main_script = main_script;
+}
+
+/**
+ * Set the partial script option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_partial_script_set(pm_options_t *options, bool partial_script) {
+ options->partial_script = partial_script;
+}
+
+/**
+ * Set the freeze option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_freeze_set(pm_options_t *options, bool freeze) {
+ options->freeze = freeze;
+}
+
+// For some reason, GCC analyzer thinks we're leaking allocated scopes and
+// locals here, even though we definitely aren't. This is a false positive.
+// Ideally we wouldn't need to suppress this.
+#if defined(__GNUC__) && (__GNUC__ >= 10)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wanalyzer-malloc-leak"
+#endif
+
+/**
+ * Allocate and zero out the scopes array on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION bool
pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
options->scopes_count = scopes_count;
- options->scopes = calloc(scopes_count, sizeof(pm_options_scope_t));
- if (options->scopes == NULL) abort();
+ options->scopes = xcalloc(scopes_count, sizeof(pm_options_scope_t));
+ return options->scopes != NULL;
}
/**
@@ -89,11 +187,12 @@ pm_options_scope_get(const pm_options_t *options, size_t index) {
* Create a new options scope struct. This will hold a set of locals that are in
* scope surrounding the code that is being parsed.
*/
-PRISM_EXPORTED_FUNCTION void
+PRISM_EXPORTED_FUNCTION bool
pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) {
scope->locals_count = locals_count;
- scope->locals = calloc(locals_count, sizeof(pm_string_t));
- if (scope->locals == NULL) abort();
+ scope->locals = xcalloc(locals_count, sizeof(pm_string_t));
+ scope->forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
+ return scope->locals != NULL;
}
/**
@@ -105,6 +204,14 @@ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) {
}
/**
+ * Set the forwarding option on the given scope struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
+ scope->forwarding = forwarding;
+}
+
+/**
* Free the internal memory associated with the options.
*/
PRISM_EXPORTED_FUNCTION void
@@ -119,10 +226,10 @@ pm_options_free(pm_options_t *options) {
pm_string_free(&scope->locals[local_index]);
}
- free(scope->locals);
+ xfree(scope->locals);
}
- free(options->scopes);
+ xfree(options->scopes);
}
/**
@@ -188,22 +295,32 @@ pm_options_read(pm_options_t *options, const char *data) {
data += encoding_length;
}
- options->frozen_string_literal = *data++;
- options->suppress_warnings = *data++;
+ options->frozen_string_literal = (int8_t) *data++;
+ options->command_line = (uint8_t) *data++;
options->version = (pm_options_version_t) *data++;
+ options->encoding_locked = ((uint8_t) *data++) > 0;
+ options->main_script = ((uint8_t) *data++) > 0;
+ options->partial_script = ((uint8_t) *data++) > 0;
+ options->freeze = ((uint8_t) *data++) > 0;
uint32_t scopes_count = pm_options_read_u32(data);
data += 4;
if (scopes_count > 0) {
- pm_options_scopes_init(options, scopes_count);
+ if (!pm_options_scopes_init(options, scopes_count)) return;
for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
uint32_t locals_count = pm_options_read_u32(data);
data += 4;
pm_options_scope_t *scope = &options->scopes[scope_index];
- pm_options_scope_init(scope, locals_count);
+ if (!pm_options_scope_init(scope, locals_count)) {
+ pm_options_free(options);
+ return;
+ }
+
+ uint8_t forwarding = (uint8_t) *data++;
+ pm_options_scope_forwarding_set(&options->scopes[scope_index], forwarding);
for (size_t local_index = 0; local_index < locals_count; local_index++) {
uint32_t local_length = pm_options_read_u32(data);
@@ -215,3 +332,7 @@ pm_options_read(pm_options_t *options, const char *data) {
}
}
}
+
+#if defined(__GNUC__) && (__GNUC__ >= 10)
+#pragma GCC diagnostic pop
+#endif
diff --git a/prism/options.h b/prism/options.h
index 130d635b98..c00c7bf755 100644
--- a/prism/options.h
+++ b/prism/options.h
@@ -7,6 +7,7 @@
#define PRISM_OPTIONS_H
#include "prism/defines.h"
+#include "prism/util/pm_char.h"
#include "prism/util/pm_string.h"
#include <stdbool.h>
@@ -14,6 +15,22 @@
#include <stdint.h>
/**
+ * String literals should be made frozen.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED ((int8_t) -1)
+
+/**
+ * String literals may be frozen or mutable depending on the implementation
+ * default.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET ((int8_t) 0)
+
+/**
+ * String literals should be made mutable.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1)
+
+/**
* A scope of locals surrounding the code that is being parsed.
*/
typedef struct pm_options_scope {
@@ -22,31 +39,93 @@ typedef struct pm_options_scope {
/** The names of the locals in the scope. */
pm_string_t *locals;
+
+ /** Flags for the set of forwarding parameters in this scope. */
+ uint8_t forwarding;
} pm_options_scope_t;
+/** The default value for parameters. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0;
+
+/** When the scope is fowarding with the * parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS = 0x1;
+
+/** When the scope is fowarding with the ** parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS = 0x2;
+
+/** When the scope is fowarding with the & parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4;
+
+/** When the scope is fowarding with the ... parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8;
+
+// Forward declaration needed by the callback typedef.
+struct pm_options;
+
+/**
+ * The callback called when additional switches are found in a shebang comment
+ * that need to be processed by the runtime.
+ *
+ * @param options The options struct that may be updated by this callback.
+ * Certain fields will be checked for changes, specifically encoding,
+ * command_line, and frozen_string_literal.
+ * @param source The source of the shebang comment.
+ * @param length The length of the source.
+ * @param shebang_callback_data Any additional data that should be passed along
+ * to the callback.
+ */
+typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
+
/**
- * The version of prism that we should be parsing with. This is used to allow
- * consumers to specify which behavior they want in case they need to parse
- * exactly as a specific version of CRuby.
+ * The version of Ruby syntax that we should be parsing with. This is used to
+ * allow consumers to specify which behavior they want in case they need to
+ * parse in the same way as a specific version of CRuby would have.
*/
typedef enum {
- /** The current version of prism. */
- PM_OPTIONS_VERSION_LATEST = 0,
+ /** If an explicit version is not provided, the current version of prism will be used. */
+ PM_OPTIONS_VERSION_UNSET = 0,
+
+ /** The vendored version of prism in CRuby 3.3.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
+
+ /** The vendored version of prism in CRuby 3.4.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_4 = 2,
+
+ /** The vendored version of prism in CRuby 4.0.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_5 = 3,
+
+ /** The vendored version of prism in CRuby 4.0.x. */
+ PM_OPTIONS_VERSION_CRUBY_4_0 = 3,
+
+ /** The vendored version of prism in CRuby 4.1.x. */
+ PM_OPTIONS_VERSION_CRUBY_4_1 = 4,
- /** The vendored version of prism in CRuby 3.3.0. */
- PM_OPTIONS_VERSION_CRUBY_3_3_0 = 1
+ /** The current version of prism. */
+ PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1
} pm_options_version_t;
/**
* The options that can be passed to the parser.
*/
-typedef struct {
+typedef struct pm_options {
+ /**
+ * The callback to call when additional switches are found in a shebang
+ * comment.
+ */
+ pm_options_shebang_callback_t shebang_callback;
+
+ /**
+ * Any additional data that should be passed along to the shebang callback
+ * if one was set.
+ */
+ void *shebang_callback_data;
+
/** The name of the file that is currently being parsed. */
pm_string_t filepath;
/**
* The line within the file that the parse starts on. This value is
- * 0-indexed.
+ * 1-indexed.
*/
int32_t line;
@@ -64,7 +143,8 @@ typedef struct {
/**
* The scopes surrounding the code that is being parsed. For most parses
* this will be NULL, but for evals it will be the locals that are in scope
- * surrounding the eval.
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
+ * innermost one.
*/
pm_options_scope_t *scopes;
@@ -75,22 +155,106 @@ typedef struct {
*/
pm_options_version_t version;
- /** Whether or not the frozen string literal option has been set. */
- bool frozen_string_literal;
+ /** A bitset of the various options that were set on the command line. */
+ uint8_t command_line;
+
+ /**
+ * Whether or not the frozen string literal option has been set.
+ * May be:
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+ */
+ int8_t frozen_string_literal;
+
+ /**
+ * Whether or not the encoding magic comments should be respected. This is a
+ * niche use-case where you want to parse a file with a specific encoding
+ * but ignore any encoding magic comments at the top of the file.
+ */
+ bool encoding_locked;
+
+ /**
+ * When the file being parsed is the main script, the shebang will be
+ * considered for command-line flags (or for implicit -x). The caller needs
+ * to pass this information to the parser so that it can behave correctly.
+ */
+ bool main_script;
/**
- * Whether or not we should suppress warnings. This is purposefully negated
- * so that the default is to not suppress warnings, which allows us to still
- * create an options struct with zeroed memory.
+ * When the file being parsed is considered a "partial" script, jumps will
+ * not be marked as errors if they are not contained within loops/blocks.
+ * This is used in the case that you're parsing a script that you know will
+ * be embedded inside another script later, but you do not have that context
+ * yet. For example, when parsing an ERB template that will be evaluated
+ * inside another script.
*/
- bool suppress_warnings;
+ bool partial_script;
+
+ /**
+ * Whether or not the parser should freeze the nodes that it creates. This
+ * makes it possible to have a deeply frozen AST that is safe to share
+ * between concurrency primitives.
+ */
+ bool freeze;
} pm_options_t;
/**
+ * A bit representing whether or not the command line -a option was set. -a
+ * splits the input line $_ into $F.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_A = 0x1;
+
+/**
+ * A bit representing whether or not the command line -e option was set. -e
+ * allow the user to specify a script to be executed. This is necessary for
+ * prism to know because certain warnings are not generated when -e is used.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_E = 0x2;
+
+/**
+ * A bit representing whether or not the command line -l option was set. -l
+ * chomps the input line by default.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_L = 0x4;
+
+/**
+ * A bit representing whether or not the command line -n option was set. -n
+ * wraps the script in a while gets loop.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_N = 0x8;
+
+/**
+ * A bit representing whether or not the command line -p option was set. -p
+ * prints the value of $_ at the end of each loop.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
+
+/**
+ * A bit representing whether or not the command line -x option was set. -x
+ * searches the input file for a shebang that matches the current Ruby engine.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
+
+/**
+ * Set the shebang callback option on the given options struct.
+ *
+ * @param options The options struct to set the shebang callback on.
+ * @param shebang_callback The shebang callback to set.
+ * @param shebang_callback_data Any additional data that should be passed along
+ * to the callback.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
+
+/**
* Set the filepath option on the given options struct.
*
* @param options The options struct to set the filepath on.
* @param filepath The filepath to set.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
@@ -99,6 +263,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
*
* @param options The options struct to set the line on.
* @param line The line to set.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
@@ -107,24 +273,40 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
*
* @param options The options struct to set the encoding on.
* @param encoding The encoding to set.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
/**
+ * Set the encoding_locked option on the given options struct.
+ *
+ * @param options The options struct to set the encoding_locked value on.
+ * @param encoding_locked The encoding_locked value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
+
+/**
* Set the frozen string literal option on the given options struct.
*
* @param options The options struct to set the frozen string literal value on.
* @param frozen_string_literal The frozen string literal value to set.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
/**
- * Set the suppress warnings option on the given options struct.
+ * Sets the command line option on the given options struct.
+ *
+ * @param options The options struct to set the command line option on.
+ * @param command_line The command_line value to set.
*
- * @param options The options struct to set the suppress warnings value on.
- * @param suppress_warnings The suppress warnings value to set.
+ * \public \memberof pm_options
*/
-PRISM_EXPORTED_FUNCTION void pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings);
+PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
/**
* Set the version option on the given options struct by parsing the given
@@ -135,16 +317,51 @@ PRISM_EXPORTED_FUNCTION void pm_options_suppress_warnings_set(pm_options_t *opti
* @param version The version to set.
* @param length The length of the version string.
* @return Whether or not the version was parsed successfully.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
/**
+ * Set the main script option on the given options struct.
+ *
+ * @param options The options struct to set the main script value on.
+ * @param main_script The main script value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
+
+/**
+ * Set the partial script option on the given options struct.
+ *
+ * @param options The options struct to set the partial script value on.
+ * @param partial_script The partial script value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
+
+/**
+ * Set the freeze option on the given options struct.
+ *
+ * @param options The options struct to set the freeze value on.
+ * @param freeze The freeze value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze);
+
+/**
* Allocate and zero out the scopes array on the given options struct.
*
* @param options The options struct to initialize the scopes array on.
* @param scopes_count The number of scopes to allocate.
+ * @return Whether or not the scopes array was initialized successfully.
+ *
+ * \public \memberof pm_options
*/
-PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
+PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
/**
* Return a pointer to the scope at the given index within the given options.
@@ -152,6 +369,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_
* @param options The options struct to get the scope from.
* @param index The index of the scope to get.
* @return A pointer to the scope at the given index.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
@@ -161,8 +380,11 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm
*
* @param scope The scope struct to initialize.
* @param locals_count The number of locals to allocate.
+ * @return Whether or not the scope was initialized successfully.
+ *
+ * \public \memberof pm_options
*/
-PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
+PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
/**
* Return a pointer to the local at the given index within the given scope.
@@ -170,13 +392,27 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, si
* @param scope The scope struct to get the local from.
* @param index The index of the local to get.
* @return A pointer to the local at the given index.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
/**
+ * Set the forwarding option on the given scope struct.
+ *
+ * @param scope The scope struct to set the forwarding on.
+ * @param forwarding The forwarding value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding);
+
+/**
* Free the internal memory associated with the options.
*
* @param options The options struct whose internal memory should be freed.
+ *
+ * \public \memberof pm_options
*/
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
@@ -198,8 +434,15 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
* | `4` | the length the encoding |
* | ... | the encoding bytes |
* | `1` | frozen string literal |
- * | `1` | suppress warnings |
+ * | `1` | -p command line option |
+ * | `1` | -n command line option |
+ * | `1` | -l command line option |
+ * | `1` | -a command line option |
* | `1` | the version |
+ * | `1` | encoding locked |
+ * | `1` | main script |
+ * | `1` | partial script |
+ * | `1` | freeze |
* | `4` | the number of scopes |
* | ... | the scopes |
*
@@ -210,14 +453,15 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
* | `0` | use the latest version of prism |
* | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
*
- * Each scope is layed out as follows:
+ * Each scope is laid out as follows:
*
* | # bytes | field |
* | ------- | -------------------------- |
* | `4` | the number of locals |
+ * | `1` | the forwarding flags |
* | ... | the locals |
*
- * Each local is layed out as follows:
+ * Each local is laid out as follows:
*
* | # bytes | field |
* | ------- | -------------------------- |
@@ -232,8 +476,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
* * The encoding can have a length of 0, in which case we'll use the default
* encoding (UTF-8). If it's not 0, it should correspond to a name of an
* encoding that can be passed to `Encoding.find` in Ruby.
- * * The frozen string literal and suppress warnings fields are booleans, so
- * their values should be either 0 or 1.
+ * * The frozen string literal, encoding locked, main script, and partial script
+ * fields are booleans, so their values should be either 0 or 1.
* * The number of scopes can be 0.
*
* @param options The options struct to deserialize into.
diff --git a/prism/pack.c b/prism/pack.c
index d5bfc4d6fd..1388ca8a3b 100644
--- a/prism/pack.c
+++ b/prism/pack.c
@@ -1,16 +1,43 @@
#include "prism/pack.h"
+// We optionally support parsing String#pack templates. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PACK define.
+#ifdef PRISM_EXCLUDE_PACK
+
+void pm_pack_parse(void) {}
+
+#else
+
#include <stdbool.h>
#include <errno.h>
static uintmax_t
-strtoumaxc(const char **format);
+strtoumaxc(const char **format) {
+ uintmax_t value = 0;
+ while (**format >= '0' && **format <= '9') {
+ if (value > UINTMAX_MAX / 10) {
+ errno = ERANGE;
+ }
+ value = value * 10 + ((uintmax_t) (**format - '0'));
+ (*format)++;
+ }
+ return value;
+}
PRISM_EXPORTED_FUNCTION pm_pack_result
-pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end,
- pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size,
- pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding) {
-
+pm_pack_parse(
+ pm_pack_variant variant,
+ const char **format,
+ const char *format_end,
+ pm_pack_type *type,
+ pm_pack_signed *signed_type,
+ pm_pack_endian *endian,
+ pm_pack_size *size,
+ pm_pack_length_type *length_type,
+ uint64_t *length,
+ pm_pack_encoding *encoding
+) {
if (*encoding == PM_PACK_ENCODING_START) {
*encoding = PM_PACK_ENCODING_US_ASCII;
}
@@ -479,15 +506,4 @@ pm_size_to_native(pm_pack_size size) {
}
}
-static uintmax_t
-strtoumaxc(const char **format) {
- uintmax_t value = 0;
- while (**format >= '0' && **format <= '9') {
- if (value > UINTMAX_MAX / 10) {
- errno = ERANGE;
- }
- value = value * 10 + ((uintmax_t) (**format - '0'));
- (*format)++;
- }
- return value;
-}
+#endif
diff --git a/prism/pack.h b/prism/pack.h
index e494848389..0b0b4b19cc 100644
--- a/prism/pack.h
+++ b/prism/pack.h
@@ -8,6 +8,15 @@
#include "prism/defines.h"
+// We optionally support parsing String#pack templates. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PACK define.
+#ifdef PRISM_EXCLUDE_PACK
+
+void pm_pack_parse(void);
+
+#else
+
#include <stdint.h>
#include <stdlib.h>
@@ -150,3 +159,5 @@ pm_pack_parse(
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
#endif
+
+#endif
diff --git a/prism/parser.h b/prism/parser.h
index c7ebb64b60..95d7aac710 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -6,14 +6,14 @@
#ifndef PRISM_PARSER_H
#define PRISM_PARSER_H
-#include "prism/ast.h"
#include "prism/defines.h"
+#include "prism/ast.h"
#include "prism/encoding.h"
#include "prism/options.h"
+#include "prism/static_literals.h"
#include "prism/util/pm_constant_pool.h"
#include "prism/util/pm_list.h"
#include "prism/util/pm_newline_list.h"
-#include "prism/util/pm_state_stack.h"
#include "prism/util/pm_string.h"
#include <stdbool.h>
@@ -83,6 +83,23 @@ typedef enum {
} pm_heredoc_indent_t;
/**
+ * All of the information necessary to store to lexing a heredoc.
+ */
+typedef struct {
+ /** A pointer to the start of the heredoc identifier. */
+ const uint8_t *ident_start;
+
+ /** The length of the heredoc identifier. */
+ size_t ident_length;
+
+ /** The type of quote that the heredoc uses. */
+ pm_heredoc_quote_t quote;
+
+ /** The type of indentation that the heredoc uses. */
+ pm_heredoc_indent_t indent;
+} pm_heredoc_lex_mode_t;
+
+/**
* When lexing Ruby source, the lexer has a small amount of state to tell which
* kind of token it is currently lexing. For example, when we find the start of
* a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
@@ -173,7 +190,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the regular expression.
*/
- uint8_t breakpoints[6];
+ uint8_t breakpoints[7];
} regexp;
struct {
@@ -206,21 +223,14 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the string.
*/
- uint8_t breakpoints[6];
+ uint8_t breakpoints[7];
} string;
struct {
- /** A pointer to the start of the heredoc identifier. */
- const uint8_t *ident_start;
-
- /** The length of the heredoc identifier. */
- size_t ident_length;
-
- /** The type of quote that the heredoc uses. */
- pm_heredoc_quote_t quote;
-
- /** The type of indentation that the heredoc uses. */
- pm_heredoc_indent_t indent;
+ /**
+ * All of the data necessary to lex a heredoc.
+ */
+ pm_heredoc_lex_mode_t base;
/**
* This is the pointer to the character where lexing should resume
@@ -233,7 +243,10 @@ typedef struct pm_lex_mode {
* line so that we know how much to dedent each line in the case of
* a tilde heredoc.
*/
- size_t common_whitespace;
+ size_t *common_whitespace;
+
+ /** True if the previous token ended with a line continuation. */
+ bool line_continuation;
} heredoc;
} as;
@@ -259,15 +272,39 @@ typedef struct pm_parser pm_parser_t;
* token that is understood by a parent context but not by the current context.
*/
typedef enum {
+ /** a null context, used for returning a value from a function */
+ PM_CONTEXT_NONE = 0,
+
/** a begin statement */
PM_CONTEXT_BEGIN,
+ /** an ensure statement with an explicit begin */
+ PM_CONTEXT_BEGIN_ENSURE,
+
+ /** a rescue else statement with an explicit begin */
+ PM_CONTEXT_BEGIN_ELSE,
+
+ /** a rescue statement with an explicit begin */
+ PM_CONTEXT_BEGIN_RESCUE,
+
/** expressions in block arguments using braces */
PM_CONTEXT_BLOCK_BRACES,
/** expressions in block arguments using do..end */
PM_CONTEXT_BLOCK_KEYWORDS,
+ /** an ensure statement within a do..end block */
+ PM_CONTEXT_BLOCK_ENSURE,
+
+ /** a rescue else statement within a do..end block */
+ PM_CONTEXT_BLOCK_ELSE,
+
+ /** expressions in block parameters `foo do |...| end ` */
+ PM_CONTEXT_BLOCK_PARAMETERS,
+
+ /** a rescue statement within a do..end block */
+ PM_CONTEXT_BLOCK_RESCUE,
+
/** a case when statements */
PM_CONTEXT_CASE_WHEN,
@@ -277,12 +314,33 @@ typedef enum {
/** a class declaration */
PM_CONTEXT_CLASS,
+ /** an ensure statement within a class statement */
+ PM_CONTEXT_CLASS_ENSURE,
+
+ /** a rescue else statement within a class statement */
+ PM_CONTEXT_CLASS_ELSE,
+
+ /** a rescue statement within a class statement */
+ PM_CONTEXT_CLASS_RESCUE,
+
/** a method definition */
PM_CONTEXT_DEF,
+ /** an ensure statement within a method definition */
+ PM_CONTEXT_DEF_ENSURE,
+
+ /** a rescue else statement within a method definition */
+ PM_CONTEXT_DEF_ELSE,
+
+ /** a rescue statement within a method definition */
+ PM_CONTEXT_DEF_RESCUE,
+
/** a method definition's parameters */
PM_CONTEXT_DEF_PARAMS,
+ /** a defined? expression */
+ PM_CONTEXT_DEFINED,
+
/** a method definition's default parameter */
PM_CONTEXT_DEFAULT_PARAMS,
@@ -295,12 +353,6 @@ typedef enum {
/** an interpolated expression */
PM_CONTEXT_EMBEXPR,
- /** an ensure statement */
- PM_CONTEXT_ENSURE,
-
- /** an ensure statement within a method definition */
- PM_CONTEXT_ENSURE_DEF,
-
/** a for loop */
PM_CONTEXT_FOR,
@@ -316,12 +368,36 @@ typedef enum {
/** a lambda expression with do..end */
PM_CONTEXT_LAMBDA_DO_END,
+ /** an ensure statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_ENSURE,
+
+ /** a rescue else statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_ELSE,
+
+ /** a rescue statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_RESCUE,
+
+ /** the predicate clause of a loop statement */
+ PM_CONTEXT_LOOP_PREDICATE,
+
/** the top level context */
PM_CONTEXT_MAIN,
/** a module declaration */
PM_CONTEXT_MODULE,
+ /** an ensure statement within a module statement */
+ PM_CONTEXT_MODULE_ENSURE,
+
+ /** a rescue else statement within a module statement */
+ PM_CONTEXT_MODULE_ELSE,
+
+ /** a rescue statement within a module statement */
+ PM_CONTEXT_MODULE_RESCUE,
+
+ /** a multiple target expression */
+ PM_CONTEXT_MULTI_TARGET,
+
/** a parenthesized expression */
PM_CONTEXT_PARENS,
@@ -334,20 +410,23 @@ typedef enum {
/** a BEGIN block */
PM_CONTEXT_PREEXE,
- /** a rescue else statement */
- PM_CONTEXT_RESCUE_ELSE,
+ /** a modifier rescue clause */
+ PM_CONTEXT_RESCUE_MODIFIER,
- /** a rescue else statement within a method definition */
- PM_CONTEXT_RESCUE_ELSE_DEF,
+ /** a singleton class definition */
+ PM_CONTEXT_SCLASS,
- /** a rescue statement */
- PM_CONTEXT_RESCUE,
+ /** an ensure statement with a singleton class */
+ PM_CONTEXT_SCLASS_ENSURE,
- /** a rescue statement within a method definition */
- PM_CONTEXT_RESCUE_DEF,
+ /** a rescue else statement with a singleton class */
+ PM_CONTEXT_SCLASS_ELSE,
- /** a singleton class definition */
- PM_CONTEXT_SCLASS,
+ /** a rescue statement with a singleton class */
+ PM_CONTEXT_SCLASS_RESCUE,
+
+ /** a ternary expression */
+ PM_CONTEXT_TERNARY,
/** an unless statement */
PM_CONTEXT_UNLESS,
@@ -442,46 +521,133 @@ typedef struct {
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
} pm_lex_callback_t;
+/** The type of shareable constant value that can be set. */
+typedef uint8_t pm_shareable_constant_value_t;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
+
+/**
+ * This tracks an individual local variable in a certain lexical context, as
+ * well as the number of times is it read.
+ */
+typedef struct {
+ /** The name of the local variable. */
+ pm_constant_id_t name;
+
+ /** The location of the local variable in the source. */
+ pm_location_t location;
+
+ /** The index of the local variable in the local table. */
+ uint32_t index;
+
+ /** The number of times the local variable is read. */
+ uint32_t reads;
+
+ /** The hash of the local variable. */
+ uint32_t hash;
+} pm_local_t;
+
+/**
+ * This is a set of local variables in a certain lexical context (method, class,
+ * module, etc.). We need to track how many times these variables are read in
+ * order to warn if they only get written.
+ */
+typedef struct pm_locals {
+ /** The number of local variables in the set. */
+ uint32_t size;
+
+ /** The capacity of the local variables set. */
+ uint32_t capacity;
+
+ /** The nullable allocated memory for the local variables in the set. */
+ pm_local_t *locals;
+} pm_locals_t;
+
+/** The flags about scope parameters that can be set. */
+typedef uint8_t pm_scope_parameters_t;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
+
/**
* This struct represents a node in a linked list of scopes. Some scopes can see
* into their parent scopes, while others cannot.
*/
typedef struct pm_scope {
- /** The IDs of the locals in the given scope. */
- pm_constant_id_list_t locals;
-
/** A pointer to the previous scope in the linked list. */
struct pm_scope *previous;
+ /** The IDs of the locals in the given scope. */
+ pm_locals_t locals;
+
/**
- * A boolean indicating whether or not this scope can see into its parent.
- * If closed is true, then the scope cannot see into its parent.
+ * This is a list of the implicit parameters contained within the block.
+ * These will be processed after the block is parsed to determine the kind
+ * of parameters node that should be used and to check if any errors need to
+ * be added.
*/
- bool closed;
+ pm_node_list_t implicit_parameters;
/**
- * A boolean indicating whether or not this scope has explicit parameters.
- * This is necessary to determine whether or not numbered parameters are
- * allowed.
+ * This is a bitfield that indicates the parameters that are being used in
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
+ * There are three different kinds of parameters that can be used in a
+ * scope:
+ *
+ * - Ordinary parameters (e.g., def foo(bar); end)
+ * - Numbered parameters (e.g., def foo; _1; end)
+ * - The it parameter (e.g., def foo; it; end)
+ *
+ * If ordinary parameters are being used, then certain parameters can be
+ * forwarded to another method/structure. Those are indicated by four
+ * additional bits in the params field. For example, some combinations of:
+ *
+ * - def foo(*); end
+ * - def foo(**); end
+ * - def foo(&); end
+ * - def foo(...); end
*/
- bool explicit_params;
+ pm_scope_parameters_t parameters;
/**
- * An integer indicating the number of numbered parameters on this scope.
- * This is necessary to determine if child blocks are allowed to use
- * numbered parameters, and to pass information to consumers of the AST
- * about how many numbered parameters exist.
+ * The current state of constant shareability for this scope. This is
+ * changed by magic shareable_constant_value comments.
*/
- uint8_t numbered_parameters;
+ pm_shareable_constant_value_t shareable_constant;
+
+ /**
+ * A boolean indicating whether or not this scope can see into its parent.
+ * If closed is true, then the scope cannot see into its parent.
+ */
+ bool closed;
} pm_scope_t;
/**
+ * A struct that represents a stack of boolean values.
+ */
+typedef uint32_t pm_state_stack_t;
+
+/**
* This struct represents the overall parser. It contains a reference to the
* source file, as well as pointers that indicate where in the source it's
* currently parsing. It also contains the most recent and current token that
* it's considering.
*/
struct pm_parser {
+ /**
+ * The next node identifier that will be assigned. This is a unique
+ * identifier used to track nodes such that the syntax tree can be dropped
+ * but the node can be found through another parse.
+ */
+ uint32_t node_id;
+
/** The current state of the lexer. */
pm_lex_state_t lex_state;
@@ -577,6 +743,15 @@ struct pm_parser {
pm_context_node_t *current_context;
/**
+ * The hash keys for the hash that is currently being parsed. This is not
+ * usually necessary because it can pass it down the various call chains,
+ * but in the event that you're parsing a hash that is being directly
+ * pushed into another hash with **, we need to share the hash keys so that
+ * we can warn for the nested hash as well.
+ */
+ pm_static_literals_t *current_hash_keys;
+
+ /**
* The encoding functions for the current file is attached to the parser as
* it's parsing so that it can change with a magic comment.
*/
@@ -605,7 +780,7 @@ struct pm_parser {
* This is the path of the file being parsed. We use the filepath when
* constructing SourceFileNodes.
*/
- pm_string_t filepath_string;
+ pm_string_t filepath;
/**
* This constant pool keeps all of the constants defined throughout the file
@@ -667,12 +842,48 @@ struct pm_parser {
*/
const pm_encoding_t *explicit_encoding;
- /** The current parameter name id on parsing its default value. */
- pm_constant_id_t current_param_name;
+ /**
+ * When parsing block exits (e.g., break, next, redo), we need to validate
+ * that they are in correct contexts. For the most part we can do this by
+ * looking at our parent contexts. However, modifier while and until
+ * expressions can change that context to make block exits valid. In these
+ * cases, we need to keep track of the block exits and then validate them
+ * after the expression has been parsed.
+ *
+ * We use a pointer here because we don't want to keep a whole list attached
+ * since this will only be used in the context of begin/end expressions.
+ */
+ pm_node_list_t *current_block_exits;
/** The version of prism that we should use to parse. */
pm_options_version_t version;
+ /** The command line flags given from the options. */
+ uint8_t command_line;
+
+ /**
+ * Whether or not we have found a frozen_string_literal magic comment with
+ * a true or false value.
+ * May be:
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+ */
+ int8_t frozen_string_literal;
+
+ /**
+ * Whether or not we are parsing an eval string. This impacts whether or not
+ * we should evaluate if block exits/yields are valid.
+ */
+ bool parsing_eval;
+
+ /**
+ * Whether or not we are parsing a "partial" script, which is a script that
+ * will be evaluated in the context of another script, so we should not
+ * check jumps (next/break/etc.) for validity.
+ */
+ bool partial_script;
+
/** Whether or not we're at the beginning of a command. */
bool command_start;
@@ -680,6 +891,14 @@ struct pm_parser {
bool recovering;
/**
+ * This is very specialized behavior for when you want to parse in a context
+ * that does not respect encoding comments. Its main use case is translating
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
+ * before they are parsed and ignores encoding comments.
+ */
+ bool encoding_locked;
+
+ /**
* Whether or not the encoding has been changed by a magic comment. We use
* this to provide a fast path for the lexer instead of going through the
* function pointer.
@@ -702,17 +921,16 @@ struct pm_parser {
bool semantic_token_seen;
/**
- * Whether or not we have found a frozen_string_literal magic comment with
- * a true value.
+ * True if the current regular expression being lexed contains only ASCII
+ * characters.
*/
- bool frozen_string_literal;
+ bool current_regular_expression_ascii_only;
/**
- * Whether or not we should emit warnings. This will be set to false if the
- * consumer of the library specified it, usually because they are parsing
- * when $VERBOSE is nil.
+ * By default, Ruby always warns about mismatched indentation. This can be
+ * toggled with a magic comment.
*/
- bool suppress_warnings;
+ bool warn_mismatched_indentation;
};
#endif
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
index 351b92df39..5a52b2b6b8 100644
--- a/prism/prettyprint.h
+++ b/prism/prettyprint.h
@@ -8,6 +8,12 @@
#include "prism/defines.h"
+#ifdef PRISM_EXCLUDE_PRETTYPRINT
+
+void pm_prettyprint(void);
+
+#else
+
#include <stdio.h>
#include "prism/ast.h"
@@ -24,3 +30,5 @@
PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
#endif
+
+#endif
diff --git a/prism/prism.c b/prism/prism.c
index bbeb3cffe9..b158e505b2 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -14,164 +14,27 @@ pm_version(void) {
*/
#define PM_TAB_WHITESPACE_SIZE 8
-#ifndef PM_DEBUG_LOGGING
-/**
- * Debugging logging will provide you with additional debugging functions as
- * well as automatically replace some functions with their debugging
- * counterparts.
- */
-#define PM_DEBUG_LOGGING 0
-#endif
-
-#if PM_DEBUG_LOGGING
+// Macros for min/max.
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
/******************************************************************************/
-/* Debugging */
+/* Helpful AST-related macros */
/******************************************************************************/
-PRISM_ATTRIBUTE_UNUSED static const char *
-debug_context(pm_context_t context) {
- switch (context) {
- case PM_CONTEXT_BEGIN: return "BEGIN";
- case PM_CONTEXT_CLASS: return "CLASS";
- case PM_CONTEXT_CASE_IN: return "CASE_IN";
- case PM_CONTEXT_CASE_WHEN: return "CASE_WHEN";
- case PM_CONTEXT_DEF: return "DEF";
- case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
- case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
- case PM_CONTEXT_ENSURE: return "ENSURE";
- case PM_CONTEXT_ENSURE_DEF: return "ENSURE_DEF";
- case PM_CONTEXT_ELSE: return "ELSE";
- case PM_CONTEXT_ELSIF: return "ELSIF";
- case PM_CONTEXT_EMBEXPR: return "EMBEXPR";
- case PM_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
- case PM_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
- case PM_CONTEXT_FOR: return "FOR";
- case PM_CONTEXT_FOR_INDEX: return "FOR_INDEX";
- case PM_CONTEXT_IF: return "IF";
- case PM_CONTEXT_MAIN: return "MAIN";
- case PM_CONTEXT_MODULE: return "MODULE";
- case PM_CONTEXT_PARENS: return "PARENS";
- case PM_CONTEXT_POSTEXE: return "POSTEXE";
- case PM_CONTEXT_PREDICATE: return "PREDICATE";
- case PM_CONTEXT_PREEXE: return "PREEXE";
- case PM_CONTEXT_RESCUE: return "RESCUE";
- case PM_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE";
- case PM_CONTEXT_RESCUE_ELSE_DEF: return "RESCUE_ELSE_DEF";
- case PM_CONTEXT_RESCUE_DEF: return "RESCUE_DEF";
- case PM_CONTEXT_SCLASS: return "SCLASS";
- case PM_CONTEXT_UNLESS: return "UNLESS";
- case PM_CONTEXT_UNTIL: return "UNTIL";
- case PM_CONTEXT_WHILE: return "WHILE";
- case PM_CONTEXT_LAMBDA_BRACES: return "LAMBDA_BRACES";
- case PM_CONTEXT_LAMBDA_DO_END: return "LAMBDA_DO_END";
- }
- return NULL;
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_contexts(pm_parser_t *parser) {
- pm_context_node_t *context_node = parser->current_context;
- fprintf(stderr, "CONTEXTS: ");
-
- if (context_node != NULL) {
- while (context_node != NULL) {
- fprintf(stderr, "%s", debug_context(context_node->context));
- context_node = context_node->prev;
- if (context_node != NULL) {
- fprintf(stderr, " <- ");
- }
- }
- } else {
- fprintf(stderr, "NONE");
- }
-
- fprintf(stderr, "\n");
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_node(const pm_parser_t *parser, const pm_node_t *node) {
- pm_buffer_t output_buffer = { 0 };
- pm_prettyprint(&output_buffer, parser, node);
-
- fprintf(stderr, "%.*s", (int) output_buffer.length, output_buffer.value);
- pm_buffer_free(&output_buffer);
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_lex_mode(pm_parser_t *parser) {
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
- bool first = true;
-
- while (lex_mode != NULL) {
- if (first) {
- first = false;
- } else {
- fprintf(stderr, " <- ");
- }
-
- switch (lex_mode->mode) {
- case PM_LEX_DEFAULT: fprintf(stderr, "DEFAULT"); break;
- case PM_LEX_EMBEXPR: fprintf(stderr, "EMBEXPR"); break;
- case PM_LEX_EMBVAR: fprintf(stderr, "EMBVAR"); break;
- case PM_LEX_HEREDOC: fprintf(stderr, "HEREDOC"); break;
- case PM_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break;
- case PM_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break;
- case PM_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break;
- }
-
- lex_mode = lex_mode->prev;
- }
-
- fprintf(stderr, "\n");
-}
-
-PRISM_ATTRIBUTE_UNUSED static void
-debug_state(pm_parser_t *parser) {
- fprintf(stderr, "STATE: ");
- bool first = true;
-
- if (parser->lex_state == PM_LEX_STATE_NONE) {
- fprintf(stderr, "NONE\n");
- return;
- }
-
-#define CHECK_STATE(state) \
- if (parser->lex_state & state) { \
- if (!first) fprintf(stderr, "|"); \
- fprintf(stderr, "%s", #state); \
- first = false; \
- }
-
- CHECK_STATE(PM_LEX_STATE_BEG)
- CHECK_STATE(PM_LEX_STATE_END)
- CHECK_STATE(PM_LEX_STATE_ENDARG)
- CHECK_STATE(PM_LEX_STATE_ENDFN)
- CHECK_STATE(PM_LEX_STATE_ARG)
- CHECK_STATE(PM_LEX_STATE_CMDARG)
- CHECK_STATE(PM_LEX_STATE_MID)
- CHECK_STATE(PM_LEX_STATE_FNAME)
- CHECK_STATE(PM_LEX_STATE_DOT)
- CHECK_STATE(PM_LEX_STATE_CLASS)
- CHECK_STATE(PM_LEX_STATE_LABEL)
- CHECK_STATE(PM_LEX_STATE_LABELED)
- CHECK_STATE(PM_LEX_STATE_FITEM)
-
-#undef CHECK_STATE
+#define FL PM_NODE_FLAGS
+#define UP PM_NODE_UPCAST
- fprintf(stderr, "\n");
-}
+#define PM_TOKEN_START(token_) ((token_)->start)
+#define PM_TOKEN_END(token_) ((token_)->end)
-PRISM_ATTRIBUTE_UNUSED static void
-debug_token(pm_token_t * token) {
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
-}
+#define PM_NODE_START(node_) (UP(node_)->location.start)
+#define PM_NODE_END(node_) (UP(node_)->location.end)
-#endif
-
-// Macros for min/max.
-#define MIN(a,b) (((a)<(b))?(a):(b))
-#define MAX(a,b) (((a)>(b))?(a):(b))
+#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
+#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
+#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
+#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
/******************************************************************************/
/* Lex mode manipulations */
@@ -225,7 +88,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
parser->lex_modes.index++;
if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
- parser->lex_modes.current = (pm_lex_mode_t *) malloc(sizeof(pm_lex_mode_t));
+ parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
if (parser->lex_modes.current == NULL) return false;
*parser->lex_modes.current = lex_mode;
@@ -259,10 +122,13 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
// We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.list.breakpoints;
memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
-
- // Now we'll add the terminator to the list of breakpoints.
size_t index = 7;
- breakpoints[index++] = terminator;
+
+ // Now we'll add the terminator to the list of breakpoints. If the
+ // terminator is not already a NULL byte, add it to the list.
+ if (terminator != '\0') {
+ breakpoints[index++] = terminator;
+ }
// If interpolation is allowed, then we're going to check for the #
// character. Otherwise we'll only look for escapes and the terminator.
@@ -307,16 +173,20 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
// regular expression. We'll use strpbrk to find the first of these
// characters.
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
- memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+ memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+ size_t index = 4;
// First we'll add the terminator.
- breakpoints[3] = terminator;
+ if (terminator != '\0') {
+ breakpoints[index++] = terminator;
+ }
// Next, if there is an incrementor, then we'll check for that as well.
if (incrementor != '\0') {
- breakpoints[4] = incrementor;
+ breakpoints[index++] = incrementor;
}
+ parser->explicit_encoding = NULL;
return lex_mode_push(parser, lex_mode);
}
@@ -339,11 +209,14 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.string.breakpoints;
- memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+ memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+ size_t index = 3;
- // Now add in the terminator.
- size_t index = 2;
- breakpoints[index++] = terminator;
+ // Now add in the terminator. If the terminator is not already a NULL byte,
+ // then we'll add it.
+ if (terminator != '\0') {
+ breakpoints[index++] = terminator;
+ }
// If interpolation is allowed, then we're going to check for the #
// character. Otherwise we'll only look for escapes and the terminator.
@@ -386,7 +259,7 @@ lex_mode_pop(pm_parser_t *parser) {
} else {
parser->lex_modes.index--;
pm_lex_mode_t *prev = parser->lex_modes.current->prev;
- free(parser->lex_modes.current);
+ xfree(parser->lex_modes.current);
parser->lex_modes.current = prev;
}
}
@@ -395,7 +268,7 @@ lex_mode_pop(pm_parser_t *parser) {
* This is the equivalent of IS_lex_state is CRuby.
*/
static inline bool
-lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
+lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
return parser->lex_state & state;
}
@@ -458,8 +331,52 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
parser->lex_state = state;
}
+#ifndef PM_DEBUG_LOGGING
+/**
+ * Debugging logging will print additional information to stdout whenever the
+ * lexer state changes.
+ */
+#define PM_DEBUG_LOGGING 0
+#endif
+
#if PM_DEBUG_LOGGING
-static inline void
+PRISM_ATTRIBUTE_UNUSED static void
+debug_state(pm_parser_t *parser) {
+ fprintf(stderr, "STATE: ");
+ bool first = true;
+
+ if (parser->lex_state == PM_LEX_STATE_NONE) {
+ fprintf(stderr, "NONE\n");
+ return;
+ }
+
+#define CHECK_STATE(state) \
+ if (parser->lex_state & state) { \
+ if (!first) fprintf(stderr, "|"); \
+ fprintf(stderr, "%s", #state); \
+ first = false; \
+ }
+
+ CHECK_STATE(PM_LEX_STATE_BEG)
+ CHECK_STATE(PM_LEX_STATE_END)
+ CHECK_STATE(PM_LEX_STATE_ENDARG)
+ CHECK_STATE(PM_LEX_STATE_ENDFN)
+ CHECK_STATE(PM_LEX_STATE_ARG)
+ CHECK_STATE(PM_LEX_STATE_CMDARG)
+ CHECK_STATE(PM_LEX_STATE_MID)
+ CHECK_STATE(PM_LEX_STATE_FNAME)
+ CHECK_STATE(PM_LEX_STATE_DOT)
+ CHECK_STATE(PM_LEX_STATE_CLASS)
+ CHECK_STATE(PM_LEX_STATE_LABEL)
+ CHECK_STATE(PM_LEX_STATE_LABELED)
+ CHECK_STATE(PM_LEX_STATE_FITEM)
+
+#undef CHECK_STATE
+
+ fprintf(stderr, "\n");
+}
+
+static void
debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
debug_state(parser);
@@ -473,6 +390,31 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
#endif
/******************************************************************************/
+/* Command-line macro helpers */
+/******************************************************************************/
+
+/** True if the parser has the given command-line option. */
+#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
+
+/** True if the -a command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
+
+/** True if the -e command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
+
+/** True if the -l command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
+
+/** True if the -n command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
+
+/** True if the -p command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
+
+/** True if the -x command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
+
+/******************************************************************************/
/* Diagnostic-related functions */
/******************************************************************************/
@@ -487,7 +429,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
/**
* Append an error to the list of errors on the parser using a format string.
*/
-#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
@@ -502,7 +445,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
* Append an error to the list of errors on the parser using the given location
* using a format string.
*/
-#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
+ PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
@@ -517,7 +461,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
* Append an error to the list of errors on the parser using the location of the
* given node and a format string.
*/
-#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
+ PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given node and a format string, and add on the content of the node.
+ */
+#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
+ PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
/**
* Append an error to the list of errors on the parser using the location of the
@@ -541,16 +493,22 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
* Append an error to the list of errors on the parser using the location of the
* given token and a format string.
*/
-#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
+ PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given token and a format string, and add on the content of the token.
+ */
+#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
/**
* Append a warning to the list of warnings on the parser.
*/
static inline void
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- if (!parser->suppress_warnings) {
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
- }
+ pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
}
/**
@@ -562,6 +520,483 @@ pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic
pm_parser_warn(parser, token->start, token->end, diag_id);
}
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given node.
+ */
+static inline void
+pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
+ pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
+}
+
+/**
+ * Append a warning to the list of warnings on the parser using a format string.
+ */
+#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
+ pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given token and a format string.
+ */
+#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
+ PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given token and a format string, and add on the content of the token.
+ */
+#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given node and a format string.
+ */
+#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
+ PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+
+/**
+ * Add an error for an expected heredoc terminator. This is a special function
+ * only because it grabs its location off of a lex mode instead of a node or a
+ * token.
+ */
+static void
+pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
+ PM_PARSER_ERR_FORMAT(
+ parser,
+ ident_start,
+ ident_start + ident_length,
+ PM_ERR_HEREDOC_TERM,
+ (int) ident_length,
+ (const char *) ident_start
+ );
+}
+
+/******************************************************************************/
+/* Scope-related functions */
+/******************************************************************************/
+
+/**
+ * Allocate and initialize a new scope. Push it onto the scope stack.
+ */
+static bool
+pm_parser_scope_push(pm_parser_t *parser, bool closed) {
+ pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
+ if (scope == NULL) return false;
+
+ *scope = (pm_scope_t) {
+ .previous = parser->current_scope,
+ .locals = { 0 },
+ .parameters = PM_SCOPE_PARAMETERS_NONE,
+ .implicit_parameters = { 0 },
+ .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
+ .closed = closed
+ };
+
+ parser->current_scope = scope;
+ return true;
+}
+
+/**
+ * Determine if the current scope is at the top level. This means it is either
+ * the top-level scope or it is open to the top-level.
+ */
+static bool
+pm_parser_scope_toplevel_p(pm_parser_t *parser) {
+ pm_scope_t *scope = parser->current_scope;
+
+ do {
+ if (scope->previous == NULL) return true;
+ if (scope->closed) return false;
+ } while ((scope = scope->previous) != NULL);
+
+ assert(false && "unreachable");
+ return true;
+}
+
+/**
+ * Retrieve the scope at the given depth.
+ */
+static pm_scope_t *
+pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
+ pm_scope_t *scope = parser->current_scope;
+
+ while (depth-- > 0) {
+ assert(scope != NULL);
+ scope = scope->previous;
+ }
+
+ return scope;
+}
+
+typedef enum {
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
+} pm_scope_forwarding_param_check_result_t;
+
+static pm_scope_forwarding_param_check_result_t
+pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
+ pm_scope_t *scope = parser->current_scope;
+ bool conflict = false;
+
+ while (scope != NULL) {
+ if (scope->parameters & mask) {
+ if (scope->closed) {
+ if (conflict) {
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
+ } else {
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
+ }
+ }
+
+ conflict = true;
+ }
+
+ if (scope->closed) break;
+ scope = scope->previous;
+ }
+
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
+}
+
+static void
+pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
+ break;
+ }
+}
+
+static void
+pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+ break;
+ }
+}
+
+static void
+pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ // This shouldn't happen, because ... is not allowed in the
+ // declaration of blocks. If we get here, we assume we already have
+ // an error for this.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+ break;
+ }
+}
+
+static void
+pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+ // Pass.
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
+ break;
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
+ break;
+ }
+}
+
+/**
+ * Get the current state of constant shareability.
+ */
+static inline pm_shareable_constant_value_t
+pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
+ return parser->current_scope->shareable_constant;
+}
+
+/**
+ * Set the current state of constant shareability. We'll set it on all of the
+ * open scopes so that reads are quick.
+ */
+static void
+pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
+ pm_scope_t *scope = parser->current_scope;
+
+ do {
+ scope->shareable_constant = shareable_constant;
+ } while (!scope->closed && (scope = scope->previous) != NULL);
+}
+
+/******************************************************************************/
+/* Local variable-related functions */
+/******************************************************************************/
+
+/**
+ * The point at which the set of locals switches from being a list to a hash.
+ */
+#define PM_LOCALS_HASH_THRESHOLD 9
+
+static void
+pm_locals_free(pm_locals_t *locals) {
+ if (locals->capacity > 0) {
+ xfree(locals->locals);
+ }
+}
+
+/**
+ * Use as simple and fast a hash function as we can that still properly mixes
+ * the bits.
+ */
+static uint32_t
+pm_locals_hash(pm_constant_id_t name) {
+ name = ((name >> 16) ^ name) * 0x45d9f3b;
+ name = ((name >> 16) ^ name) * 0x45d9f3b;
+ name = (name >> 16) ^ name;
+ return name;
+}
+
+/**
+ * Resize the locals list to be twice its current size. If the next capacity is
+ * above the threshold for switching to a hash, then we'll switch to a hash.
+ */
+static void
+pm_locals_resize(pm_locals_t *locals) {
+ uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
+ assert(next_capacity > locals->capacity);
+
+ pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
+ if (next_locals == NULL) abort();
+
+ if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
+ if (locals->size > 0) {
+ memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
+ }
+ } else {
+ // If we just switched from a list to a hash, then we need to fill in
+ // the hash values of all of the locals.
+ bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
+ uint32_t mask = next_capacity - 1;
+
+ for (uint32_t index = 0; index < locals->capacity; index++) {
+ pm_local_t *local = &locals->locals[index];
+
+ if (local->name != PM_CONSTANT_ID_UNSET) {
+ if (hash_needed) local->hash = pm_locals_hash(local->name);
+
+ uint32_t hash = local->hash;
+ while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
+ next_locals[hash & mask] = *local;
+ }
+ }
+ }
+
+ pm_locals_free(locals);
+ locals->locals = next_locals;
+ locals->capacity = next_capacity;
+}
+
+/**
+ * Add a new local to the set of locals. This will automatically rehash the
+ * locals if the size is greater than 3/4 of the capacity.
+ *
+ * @param locals The set of locals to add to.
+ * @param name The name of the local.
+ * @param start The source location that represents the start of the local. This
+ * is used for the location of the warning in case this local is not read.
+ * @param end The source location that represents the end of the local. This is
+ * used for the location of the warning in case this local is not read.
+ * @param reads The initial number of reads for this local. Usually this is set
+ * to 0, but for some locals (like parameters) we want to initialize it with
+ * 1 so that we never warn on unused parameters.
+ * @return True if the local was added, and false if the local already exists.
+ */
+static bool
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+ if (locals->size >= (locals->capacity / 4 * 3)) {
+ pm_locals_resize(locals);
+ }
+
+ if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
+ for (uint32_t index = 0; index < locals->capacity; index++) {
+ pm_local_t *local = &locals->locals[index];
+
+ if (local->name == PM_CONSTANT_ID_UNSET) {
+ *local = (pm_local_t) {
+ .name = name,
+ .location = { .start = start, .end = end },
+ .index = locals->size++,
+ .reads = reads,
+ .hash = 0
+ };
+ return true;
+ } else if (local->name == name) {
+ return false;
+ }
+ }
+ } else {
+ uint32_t mask = locals->capacity - 1;
+ uint32_t hash = pm_locals_hash(name);
+ uint32_t initial_hash = hash;
+
+ do {
+ pm_local_t *local = &locals->locals[hash & mask];
+
+ if (local->name == PM_CONSTANT_ID_UNSET) {
+ *local = (pm_local_t) {
+ .name = name,
+ .location = { .start = start, .end = end },
+ .index = locals->size++,
+ .reads = reads,
+ .hash = initial_hash
+ };
+ return true;
+ } else if (local->name == name) {
+ return false;
+ } else {
+ hash++;
+ }
+ } while ((hash & mask) != initial_hash);
+ }
+
+ assert(false && "unreachable");
+ return true;
+}
+
+/**
+ * Finds the index of a local variable in the locals set. If it is not found,
+ * this returns UINT32_MAX.
+ */
+static uint32_t
+pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+ if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
+ for (uint32_t index = 0; index < locals->size; index++) {
+ pm_local_t *local = &locals->locals[index];
+ if (local->name == name) return index;
+ }
+ } else {
+ uint32_t mask = locals->capacity - 1;
+ uint32_t hash = pm_locals_hash(name);
+ uint32_t initial_hash = hash & mask;
+
+ do {
+ pm_local_t *local = &locals->locals[hash & mask];
+
+ if (local->name == PM_CONSTANT_ID_UNSET) {
+ return UINT32_MAX;
+ } else if (local->name == name) {
+ return hash & mask;
+ } else {
+ hash++;
+ }
+ } while ((hash & mask) != initial_hash);
+ }
+
+ return UINT32_MAX;
+}
+
+/**
+ * Called when a variable is read in a certain lexical context. Tracks the read
+ * by adding to the reads count.
+ */
+static void
+pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
+ uint32_t index = pm_locals_find(locals, name);
+ assert(index != UINT32_MAX);
+
+ pm_local_t *local = &locals->locals[index];
+ assert(local->reads < UINT32_MAX);
+
+ local->reads++;
+}
+
+/**
+ * Called when a variable read is transformed into a variable write, because a
+ * write operator is found after the variable name.
+ */
+static void
+pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
+ uint32_t index = pm_locals_find(locals, name);
+ assert(index != UINT32_MAX);
+
+ pm_local_t *local = &locals->locals[index];
+ assert(local->reads > 0);
+
+ local->reads--;
+}
+
+/**
+ * Returns the current number of reads for a local variable.
+ */
+static uint32_t
+pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
+ uint32_t index = pm_locals_find(locals, name);
+ assert(index != UINT32_MAX);
+
+ return locals->locals[index].reads;
+}
+
+/**
+ * Write out the locals into the given list of constant ids in the correct
+ * order. This is used to set the list of locals on the nodes in the tree once
+ * we're sure no additional locals will be added to the set.
+ *
+ * This function is also responsible for warning when a local variable has been
+ * written but not read in certain contexts.
+ */
+static void
+pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
+ pm_constant_id_list_init_capacity(list, locals->size);
+
+ // If we're still below the threshold for switching to a hash, then we only
+ // need to loop over the locals until we hit the size because the locals are
+ // stored in a list.
+ uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
+
+ // We will only warn for unused variables if we're not at the top level, or
+ // if we're parsing a file outside of eval or -e.
+ bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
+
+ for (uint32_t index = 0; index < capacity; index++) {
+ pm_local_t *local = &locals->locals[index];
+
+ if (local->name != PM_CONSTANT_ID_UNSET) {
+ pm_constant_id_list_insert(list, (size_t) local->index, local->name);
+
+ if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
+
+ if (constant->length >= 1 && *constant->start != '_') {
+ PM_PARSER_WARN_FORMAT(
+ parser,
+ local->location.start,
+ local->location.end,
+ PM_WARN_UNUSED_LOCAL_VARIABLE,
+ (int) constant->length,
+ (const char *) constant->start
+ );
+ }
+ }
+ }
+ }
+}
+
/******************************************************************************/
/* Node-related functions */
/******************************************************************************/
@@ -578,7 +1013,7 @@ pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const
* Retrieve the constant pool id for the given string.
*/
static inline pm_constant_id_t
-pm_parser_constant_id_owned(pm_parser_t *parser, const uint8_t *start, size_t length) {
+pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
}
@@ -612,9 +1047,9 @@ pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *toke
* If the node is value node, it returns NULL.
* If not, it returns the pointer to the node to be inspected as "void expression".
*/
-static pm_node_t*
-pm_check_value_expression(pm_node_t *node) {
- pm_node_t* void_node = NULL;
+static pm_node_t *
+pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
+ pm_node_t *void_node = NULL;
while (node != NULL) {
switch (PM_NODE_TYPE(node)) {
@@ -629,12 +1064,56 @@ pm_check_value_expression(pm_node_t *node) {
return NULL;
case PM_BEGIN_NODE: {
pm_begin_node_t *cast = (pm_begin_node_t *) node;
- node = (pm_node_t *) cast->statements;
+
+ if (cast->ensure_clause != NULL) {
+ if (cast->rescue_clause != NULL) {
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
+ if (vn != NULL) return vn;
+ }
+
+ if (cast->statements != NULL) {
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn != NULL) return vn;
+ }
+
+ node = UP(cast->ensure_clause);
+ } else if (cast->rescue_clause != NULL) {
+ if (cast->statements == NULL) return NULL;
+
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+
+ for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
+ pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
+ if (vn == NULL) {
+ void_node = NULL;
+ break;
+ }
+ if (void_node == NULL) {
+ void_node = vn;
+ }
+ }
+
+ if (cast->else_clause != NULL) {
+ node = UP(cast->else_clause);
+ } else {
+ return void_node;
+ }
+ } else {
+ node = UP(cast->statements);
+ }
+
+ break;
+ }
+ case PM_ENSURE_NODE: {
+ pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
+ node = UP(cast->statements);
break;
}
case PM_PARENTHESES_NODE: {
pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
- node = (pm_node_t *) cast->body;
+ node = UP(cast->body);
break;
}
case PM_STATEMENTS_NODE: {
@@ -644,37 +1123,37 @@ pm_check_value_expression(pm_node_t *node) {
}
case PM_IF_NODE: {
pm_if_node_t *cast = (pm_if_node_t *) node;
- if (cast->statements == NULL || cast->consequent == NULL) {
+ if (cast->statements == NULL || cast->subsequent == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
if (void_node == NULL) {
void_node = vn;
}
- node = cast->consequent;
+ node = cast->subsequent;
break;
}
case PM_UNLESS_NODE: {
pm_unless_node_t *cast = (pm_unless_node_t *) node;
- if (cast->statements == NULL || cast->consequent == NULL) {
+ if (cast->statements == NULL || cast->else_clause == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
if (void_node == NULL) {
void_node = vn;
}
- node = (pm_node_t *) cast->consequent;
+ node = UP(cast->else_clause);
break;
}
case PM_ELSE_NODE: {
pm_else_node_t *cast = (pm_else_node_t *) node;
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
break;
}
case PM_AND_NODE: {
@@ -687,6 +1166,15 @@ pm_check_value_expression(pm_node_t *node) {
node = cast->left;
break;
}
+ case PM_LOCAL_VARIABLE_WRITE_NODE: {
+ pm_local_variable_write_node_t *cast = (pm_local_variable_write_node_t *) node;
+
+ pm_scope_t *scope = parser->current_scope;
+ for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
+
+ pm_locals_read(&scope->locals, cast->name);
+ return NULL;
+ }
default:
return NULL;
}
@@ -697,13 +1185,253 @@ pm_check_value_expression(pm_node_t *node) {
static inline void
pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
- pm_node_t *void_node = pm_check_value_expression(node);
+ pm_node_t *void_node = pm_check_value_expression(parser, node);
if (void_node != NULL) {
pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
}
}
/**
+ * Warn if the given node is a "void" statement.
+ */
+static void
+pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
+ const char *type = NULL;
+ int length = 0;
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_BACK_REFERENCE_READ_NODE:
+ case PM_CLASS_VARIABLE_READ_NODE:
+ case PM_GLOBAL_VARIABLE_READ_NODE:
+ case PM_INSTANCE_VARIABLE_READ_NODE:
+ case PM_LOCAL_VARIABLE_READ_NODE:
+ case PM_NUMBERED_REFERENCE_READ_NODE:
+ type = "a variable";
+ length = 10;
+ break;
+ case PM_CALL_NODE: {
+ const pm_call_node_t *cast = (const pm_call_node_t *) node;
+ if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
+
+ const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
+ switch (message->length) {
+ case 1:
+ switch (message->start[0]) {
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '%':
+ case '|':
+ case '^':
+ case '&':
+ case '>':
+ case '<':
+ type = (const char *) message->start;
+ length = 1;
+ break;
+ }
+ break;
+ case 2:
+ switch (message->start[1]) {
+ case '=':
+ if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
+ type = (const char *) message->start;
+ length = 2;
+ }
+ break;
+ case '@':
+ if (message->start[0] == '+' || message->start[0] == '-') {
+ type = (const char *) message->start;
+ length = 2;
+ }
+ break;
+ case '*':
+ if (message->start[0] == '*') {
+ type = (const char *) message->start;
+ length = 2;
+ }
+ break;
+ }
+ break;
+ case 3:
+ if (memcmp(message->start, "<=>", 3) == 0) {
+ type = "<=>";
+ length = 3;
+ }
+ break;
+ }
+
+ break;
+ }
+ case PM_CONSTANT_PATH_NODE:
+ type = "::";
+ length = 2;
+ break;
+ case PM_CONSTANT_READ_NODE:
+ type = "a constant";
+ length = 10;
+ break;
+ case PM_DEFINED_NODE:
+ type = "defined?";
+ length = 8;
+ break;
+ case PM_FALSE_NODE:
+ type = "false";
+ length = 5;
+ break;
+ case PM_FLOAT_NODE:
+ case PM_IMAGINARY_NODE:
+ case PM_INTEGER_NODE:
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
+ case PM_INTERPOLATED_STRING_NODE:
+ case PM_RATIONAL_NODE:
+ case PM_REGULAR_EXPRESSION_NODE:
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_STRING_NODE:
+ case PM_SYMBOL_NODE:
+ type = "a literal";
+ length = 9;
+ break;
+ case PM_NIL_NODE:
+ type = "nil";
+ length = 3;
+ break;
+ case PM_RANGE_NODE: {
+ const pm_range_node_t *cast = (const pm_range_node_t *) node;
+
+ if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
+ type = "...";
+ length = 3;
+ } else {
+ type = "..";
+ length = 2;
+ }
+
+ break;
+ }
+ case PM_SELF_NODE:
+ type = "self";
+ length = 4;
+ break;
+ case PM_TRUE_NODE:
+ type = "true";
+ length = 4;
+ break;
+ default:
+ break;
+ }
+
+ if (type != NULL) {
+ PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
+ }
+}
+
+/**
+ * Warn if any of the statements that are not the last statement in the list are
+ * a "void" statement.
+ */
+static void
+pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
+ assert(node->body.size > 0);
+ const size_t size = node->body.size - (last_value ? 1 : 0);
+ for (size_t index = 0; index < size; index++) {
+ pm_void_statement_check(parser, node->body.nodes[index]);
+ }
+}
+
+/**
+ * When we're handling the predicate of a conditional, we need to know our
+ * context in order to determine the kind of warning we should deliver to the
+ * user.
+ */
+typedef enum {
+ PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
+ PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
+ PM_CONDITIONAL_PREDICATE_TYPE_NOT
+} pm_conditional_predicate_type_t;
+
+/**
+ * Add a warning to the parser if the predicate of a conditional is a literal.
+ */
+static void
+pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
+ switch (type) {
+ case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
+ PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
+ break;
+ case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
+ PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
+ break;
+ case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
+ break;
+ }
+}
+
+/**
+ * Return true if the value being written within the predicate of a conditional
+ * is a literal value.
+ */
+static bool
+pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_ARRAY_NODE: {
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
+
+ const pm_array_node_t *cast = (const pm_array_node_t *) node;
+ for (size_t index = 0; index < cast->elements.size; index++) {
+ if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
+ }
+
+ return true;
+ }
+ case PM_HASH_NODE: {
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
+
+ const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
+ for (size_t index = 0; index < cast->elements.size; index++) {
+ const pm_node_t *element = cast->elements.nodes[index];
+ if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
+
+ const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
+ if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
+ }
+
+ return true;
+ }
+ case PM_FALSE_NODE:
+ case PM_FLOAT_NODE:
+ case PM_IMAGINARY_NODE:
+ case PM_INTEGER_NODE:
+ case PM_NIL_NODE:
+ case PM_RATIONAL_NODE:
+ case PM_REGULAR_EXPRESSION_NODE:
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_STRING_NODE:
+ case PM_SYMBOL_NODE:
+ case PM_TRUE_NODE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * Add a warning to the parser if the value that is being written inside of a
+ * predicate to a conditional is a literal.
+ */
+static inline void
+pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
+ if (pm_conditional_predicate_warn_write_literal_p(node)) {
+ pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
+ }
+}
+
+/**
* The predicate of conditional nodes can change what would otherwise be regular
* nodes into specialized nodes. For example:
*
@@ -711,20 +1439,23 @@ pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
* if foo and bar .. baz => RangeNode becomes FlipFlopNode
* if /foo/ => RegularExpressionNode becomes MatchLastLineNode
* if /foo #{bar}/ => InterpolatedRegularExpressionNode becomes InterpolatedMatchLastLineNode
+ *
+ * We also want to warn the user if they're using a static literal as a
+ * predicate or writing a static literal as the predicate.
*/
static void
-pm_conditional_predicate(pm_node_t *node) {
+pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
switch (PM_NODE_TYPE(node)) {
case PM_AND_NODE: {
pm_and_node_t *cast = (pm_and_node_t *) node;
- pm_conditional_predicate(cast->left);
- pm_conditional_predicate(cast->right);
+ pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
break;
}
case PM_OR_NODE: {
pm_or_node_t *cast = (pm_or_node_t *) node;
- pm_conditional_predicate(cast->left);
- pm_conditional_predicate(cast->right);
+ pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
break;
}
case PM_PARENTHESES_NODE: {
@@ -732,19 +1463,24 @@ pm_conditional_predicate(pm_node_t *node) {
if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
- if (statements->body.size == 1) pm_conditional_predicate(statements->body.nodes[0]);
+ if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
}
break;
}
+ case PM_BEGIN_NODE: {
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
+ if (cast->statements != NULL) {
+ pm_statements_node_t *statements = cast->statements;
+ if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
+ }
+ break;
+ }
case PM_RANGE_NODE: {
pm_range_node_t *cast = (pm_range_node_t *) node;
- if (cast->left) {
- pm_conditional_predicate(cast->left);
- }
- if (cast->right) {
- pm_conditional_predicate(cast->right);
- }
+
+ if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
+ if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
// Here we change the range node into a flip flop node. We can do
// this since the nodes are exactly the same except for the type.
@@ -762,6 +1498,11 @@ pm_conditional_predicate(pm_node_t *node) {
// for the type.
assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
node->type = PM_MATCH_LAST_LINE_NODE;
+
+ if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
+ }
+
break;
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
// Here we change the interpolated regular expression node into an
@@ -769,6 +1510,54 @@ pm_conditional_predicate(pm_node_t *node) {
// are exactly the same except for the type.
assert(sizeof(pm_interpolated_regular_expression_node_t) == sizeof(pm_interpolated_match_last_line_node_t));
node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
+
+ if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
+ }
+
+ break;
+ case PM_INTEGER_NODE:
+ if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
+ if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+ pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
+ }
+ } else {
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
+ }
+ break;
+ case PM_STRING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_INTERPOLATED_STRING_NODE:
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
+ break;
+ case PM_SYMBOL_NODE:
+ case PM_INTERPOLATED_SYMBOL_NODE:
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
+ break;
+ case PM_SOURCE_LINE_NODE:
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_FLOAT_NODE:
+ case PM_RATIONAL_NODE:
+ case PM_IMAGINARY_NODE:
+ pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
+ break;
+ case PM_CLASS_VARIABLE_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
+ break;
+ case PM_CONSTANT_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
+ break;
+ case PM_GLOBAL_VARIABLE_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
+ break;
+ case PM_INSTANCE_VARIABLE_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
+ break;
+ case PM_LOCAL_VARIABLE_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
+ break;
+ case PM_MULTI_WRITE_NODE:
+ pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
break;
default:
break;
@@ -777,9 +1566,9 @@ pm_conditional_predicate(pm_node_t *node) {
/**
* In a lot of places in the tree you can have tokens that are not provided but
- * that do not cause an error. For example, in a method call without
- * parentheses. In these cases we set the token to the "not provided" type. For
- * example:
+ * that do not cause an error. For example, this happens in a method call
+ * without parentheses. In these cases we set the token to the "not provided" type.
+ * For example:
*
* pm_token_t token = not_provided(parser);
*/
@@ -788,13 +1577,6 @@ not_provided(pm_parser_t *parser) {
return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
}
-#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
-#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
-#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
-#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
-#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
-#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
-
/**
* This is a special out parameter to the parse_arguments_list function that
* includes opening and closing parentheses in addition to the arguments since
@@ -864,7 +1646,187 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
// If we didn't hit a case before this check, then at this point we need to
// add a syntax error.
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+}
+
+/******************************************************************************/
+/* Basic character checks */
+/******************************************************************************/
+
+/**
+ * This function is used extremely frequently to lex all of the identifiers in a
+ * source file, so it's important that it be as fast as possible. For this
+ * reason we have the encoding_changed boolean to check if we need to go through
+ * the function pointer or can just directly use the UTF-8 functions.
+ */
+static inline size_t
+char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
+ if (n <= 0) return 0;
+
+ if (parser->encoding_changed) {
+ size_t width;
+
+ if ((width = parser->encoding->alpha_char(b, n)) != 0) {
+ return width;
+ } else if (*b == '_') {
+ return 1;
+ } else if (*b >= 0x80) {
+ return parser->encoding->char_width(b, n);
+ } else {
+ return 0;
+ }
+ } else if (*b < 0x80) {
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
+ } else {
+ return pm_encoding_utf_8_char_width(b, n);
+ }
+}
+
+/**
+ * Similar to char_is_identifier but this function assumes that the encoding
+ * has not been changed.
+ */
+static inline size_t
+char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
+ if (n <= 0) {
+ return 0;
+ } else if (*b < 0x80) {
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
+ } else {
+ return pm_encoding_utf_8_char_width(b, n);
+ }
+}
+
+/**
+ * Like the above, this function is also used extremely frequently to lex all of
+ * the identifiers in a source file once the first character has been found. So
+ * it's important that it be as fast as possible.
+ */
+static inline size_t
+char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
+ if (n <= 0) {
+ return 0;
+ } else if (parser->encoding_changed) {
+ size_t width;
+
+ if ((width = parser->encoding->alnum_char(b, n)) != 0) {
+ return width;
+ } else if (*b == '_') {
+ return 1;
+ } else if (*b >= 0x80) {
+ return parser->encoding->char_width(b, n);
+ } else {
+ return 0;
+ }
+ } else {
+ return char_is_identifier_utf8(b, n);
+ }
+}
+
+// Here we're defining a perfect hash for the characters that are allowed in
+// global names. This is used to quickly check the next character after a $ to
+// see if it's a valid character for a global name.
+#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
+#define PUNCT(idx) ( \
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
+ BIT('0', idx))
+
+const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
+
+#undef BIT
+#undef PUNCT
+
+static inline bool
+char_is_global_name_punctuation(const uint8_t b) {
+ const unsigned int i = (const unsigned int) b;
+ if (i <= 0x20 || 0x7e < i) return false;
+
+ return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
+}
+
+static inline bool
+token_is_setter_name(pm_token_t *token) {
+ return (
+ (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
+ ((token->type == PM_TOKEN_IDENTIFIER) &&
+ (token->end - token->start >= 2) &&
+ (token->end[-1] == '='))
+ );
+}
+
+/**
+ * Returns true if the given local variable is a keyword.
+ */
+static bool
+pm_local_is_keyword(const char *source, size_t length) {
+#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
+
+ switch (length) {
+ case 2:
+ switch (source[0]) {
+ case 'd': KEYWORD("do"); return false;
+ case 'i': KEYWORD("if"); KEYWORD("in"); return false;
+ case 'o': KEYWORD("or"); return false;
+ default: return false;
+ }
+ case 3:
+ switch (source[0]) {
+ case 'a': KEYWORD("and"); return false;
+ case 'd': KEYWORD("def"); return false;
+ case 'e': KEYWORD("end"); return false;
+ case 'f': KEYWORD("for"); return false;
+ case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
+ default: return false;
+ }
+ case 4:
+ switch (source[0]) {
+ case 'c': KEYWORD("case"); return false;
+ case 'e': KEYWORD("else"); return false;
+ case 'n': KEYWORD("next"); return false;
+ case 'r': KEYWORD("redo"); return false;
+ case 's': KEYWORD("self"); return false;
+ case 't': KEYWORD("then"); KEYWORD("true"); return false;
+ case 'w': KEYWORD("when"); return false;
+ default: return false;
+ }
+ case 5:
+ switch (source[0]) {
+ case 'a': KEYWORD("alias"); return false;
+ case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
+ case 'c': KEYWORD("class"); return false;
+ case 'e': KEYWORD("elsif"); return false;
+ case 'f': KEYWORD("false"); return false;
+ case 'r': KEYWORD("retry"); return false;
+ case 's': KEYWORD("super"); return false;
+ case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
+ case 'w': KEYWORD("while"); return false;
+ case 'y': KEYWORD("yield"); return false;
+ default: return false;
+ }
+ case 6:
+ switch (source[0]) {
+ case 'e': KEYWORD("ensure"); return false;
+ case 'm': KEYWORD("module"); return false;
+ case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
+ case 'u': KEYWORD("unless"); return false;
+ default: return false;
+ }
+ case 8:
+ KEYWORD("__LINE__");
+ KEYWORD("__FILE__");
+ return false;
+ case 12:
+ KEYWORD("__ENCODING__");
+ return false;
+ default:
+ return false;
+ }
+
+#undef KEYWORD
}
/******************************************************************************/
@@ -909,40 +1871,6 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
/******************************************************************************/
/**
- * Parse the decimal number represented by the range of bytes. returns
- * UINT32_MAX if the number fails to parse. This function assumes that the range
- * of bytes has already been validated to contain only decimal digits.
- */
-static uint32_t
-parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- ptrdiff_t diff = end - start;
- assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
- size_t length = (size_t) diff;
-
- char *digits = calloc(length + 1, sizeof(char));
- memcpy(digits, start, length);
- digits[length] = '\0';
-
- char *endptr;
- errno = 0;
- unsigned long value = strtoul(digits, &endptr, 10);
-
- if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
- pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
- value = UINT32_MAX;
- }
-
- free(digits);
-
- if (value > UINT32_MAX) {
- pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
- value = UINT32_MAX;
- }
-
- return (uint32_t) value;
-}
-
-/**
* When you have an encoding flag on a regular expression, it takes precedence
* over all of the previously set encoding flags. So we need to mask off any
* previously set encoding flags before setting the new one.
@@ -953,10 +1881,12 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
* Parse out the options for a regular expression.
*/
static inline pm_node_flags_t
-pm_regular_expression_flags_create(const pm_token_t *closing) {
+pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
pm_node_flags_t flags = 0;
if (closing->type == PM_TOKEN_REGEXP_END) {
+ pm_buffer_t unknown_flags = { 0 };
+
for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
switch (*flag) {
case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
@@ -969,9 +1899,16 @@ pm_regular_expression_flags_create(const pm_token_t *closing) {
case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
- default: assert(false && "unreachable");
+ default: pm_buffer_append_byte(&unknown_flags, *flag);
}
}
+
+ size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
+ if (unknown_flags_length != 0) {
+ const char *word = unknown_flags_length >= 2 ? "options" : "option";
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
+ }
+ pm_buffer_free(&unknown_flags);
}
return flags;
@@ -983,7 +1920,7 @@ static pm_statements_node_t *
pm_statements_node_create(pm_parser_t *parser);
static void
-pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement);
+pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
static size_t
pm_statements_node_body_length(pm_statements_node_t *node);
@@ -993,24 +1930,44 @@ pm_statements_node_body_length(pm_statements_node_t *node);
* implement our own arena allocation.
*/
static inline void *
-pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
- void *memory = calloc(1, size);
+pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
+ void *memory = xcalloc(1, size);
if (memory == NULL) {
- fprintf(stderr, "Failed to allocate %zu bytes\n", size);
+ fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
abort();
}
return memory;
}
-#define PM_ALLOC_NODE(parser, type) (type *) pm_alloc_node(parser, sizeof(type))
+#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
+#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
+ .type = (type_), \
+ .flags = (flags_), \
+ .node_id = ++(parser_)->node_id, \
+ .location = { .start = (start_), .end = (end_) } \
+}
+
+#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
+#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
+#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
+#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
+
+#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
+#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
+#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
+#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
/**
* Allocate a new MissingNode node.
*/
static pm_missing_node_t *
pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- pm_missing_node_t *node = PM_ALLOC_NODE(parser, pm_missing_node_t);
- *node = (pm_missing_node_t) {{ .type = PM_MISSING_NODE, .location = { .start = start, .end = end } }};
+ pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
+
+ *node = (pm_missing_node_t) {
+ .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
+ };
+
return node;
}
@@ -1020,16 +1977,10 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t
static pm_alias_global_variable_node_t *
pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_global_variable_node_t *node = PM_ALLOC_NODE(parser, pm_alias_global_variable_node_t);
+ pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
*node = (pm_alias_global_variable_node_t) {
- {
- .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
.new_name = new_name,
.old_name = old_name,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
@@ -1044,16 +1995,10 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw
static pm_alias_method_node_t *
pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_method_node_t *node = PM_ALLOC_NODE(parser, pm_alias_method_node_t);
+ pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
*node = (pm_alias_method_node_t) {
- {
- .type = PM_ALIAS_METHOD_NODE,
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
.new_name = new_name,
.old_name = old_name,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
@@ -1067,16 +2012,10 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n
*/
static pm_alternation_pattern_node_t *
pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
- pm_alternation_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_alternation_pattern_node_t);
+ pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
*node = (pm_alternation_pattern_node_t) {
- {
- .type = PM_ALTERNATION_PATTERN_NODE,
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
.left = left,
.right = right,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -1092,16 +2031,10 @@ static pm_and_node_t *
pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_and_node_t *node = PM_ALLOC_NODE(parser, pm_and_node_t);
+ pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
*node = (pm_and_node_t) {
- {
- .type = PM_AND_NODE,
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
.left = left,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.right = right
@@ -1115,13 +2048,10 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera
*/
static pm_arguments_node_t *
pm_arguments_node_create(pm_parser_t *parser) {
- pm_arguments_node_t *node = PM_ALLOC_NODE(parser, pm_arguments_node_t);
+ pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
*node = (pm_arguments_node_t) {
- {
- .type = PM_ARGUMENTS_NODE,
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
+ .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
.arguments = { 0 }
};
@@ -1145,8 +2075,19 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen
node->base.location.start = argument->location.start;
}
- node->base.location.end = argument->location.end;
+ if (node->base.location.end < argument->location.end) {
+ node->base.location.end = argument->location.end;
+ }
+
pm_node_list_append(&node->arguments, argument);
+
+ if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
+ if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
+ } else {
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
+ }
+ }
}
/**
@@ -1154,14 +2095,10 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen
*/
static pm_array_node_t *
pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_array_node_t *node = PM_ALLOC_NODE(parser, pm_array_node_t);
+ pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
*node = (pm_array_node_t) {
- {
- .type = PM_ARRAY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.elements = { 0 }
@@ -1171,14 +2108,6 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
}
/**
- * Return the size of the given array node.
- */
-static inline size_t
-pm_array_node_size(pm_array_node_t *node) {
- return node->elements.size;
-}
-
-/**
* Append an argument to an array node.
*/
static inline void
@@ -1193,11 +2122,11 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
// If the element is not a static literal, then the array is not a static
// literal. Turn that flag off.
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
}
if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
- pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
+ pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
}
}
@@ -1217,30 +2146,24 @@ pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
+ pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
*node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .location = {
- .start = nodes->nodes[0]->location.start,
- .end = nodes->nodes[nodes->size - 1]->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
.constant = NULL,
.rest = NULL,
.requireds = { 0 },
.posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .opening_loc = { 0 },
+ .closing_loc = { 0 }
};
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list.
bool found_rest = false;
- for (size_t index = 0; index < nodes->size; index++) {
- pm_node_t *child = nodes->nodes[index];
+ pm_node_t *child;
+ PM_NODE_LIST_FOREACH(nodes, index, child) {
if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
node->rest = child;
found_rest = true;
@@ -1259,19 +2182,16 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
- pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
+ pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
*node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .location = rest->location,
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
.constant = NULL,
.rest = rest,
.requireds = { 0 },
.posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .opening_loc = { 0 },
+ .closing_loc = { 0 }
};
return node;
@@ -1283,16 +2203,10 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
+ pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
*node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .location = {
- .start = constant->location.start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
.constant = constant,
.rest = NULL,
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1310,16 +2224,10 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant,
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
+ pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
*node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
.constant = NULL,
.rest = NULL,
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1341,10 +2249,10 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t
*/
static pm_assoc_node_t *
pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
- pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
+ pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
const uint8_t *end;
- if (value != NULL) {
+ if (value != NULL && value->location.end > key->location.end) {
end = value->location.end;
} else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
end = operator->end;
@@ -1352,27 +2260,25 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
end = key->location.end;
}
+ // Hash string keys will be frozen, so we can mark them as frozen here so
+ // that the compiler picks them up and also when we check for static literal
+ // on the keys it gets factored in.
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+ key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
+ }
+
// If the key and value of this assoc node are both static literals, then
// we can mark this node as a static literal.
pm_node_flags_t flags = 0;
- if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
+ if (
+ !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
+ value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
+ ) {
flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
}
- // Hash string keys should be frozen
- if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
- key->flags |= PM_STRING_FLAGS_FROZEN;
- }
-
*node = (pm_assoc_node_t) {
- {
- .type = PM_ASSOC_NODE,
- .flags = flags,
- .location = {
- .start = key->location.start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
.key = key,
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
@@ -1387,16 +2293,14 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
static pm_assoc_splat_node_t *
pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
assert(operator->type == PM_TOKEN_USTAR_STAR);
- pm_assoc_splat_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_splat_node_t);
+ pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
*node = (pm_assoc_splat_node_t) {
- {
- .type = PM_ASSOC_SPLAT_NODE,
- .location = {
- .start = operator->start,
- .end = value == NULL ? operator->end : value->location.end
- },
- },
+ .base = (
+ (value == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
+ ),
.value = value,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
};
@@ -1410,13 +2314,10 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token
static pm_back_reference_read_node_t *
pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_BACK_REFERENCE);
- pm_back_reference_read_node_t *node = PM_ALLOC_NODE(parser, pm_back_reference_read_node_t);
+ pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
*node = (pm_back_reference_read_node_t) {
- {
- .type = PM_BACK_REFERENCE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
.name = pm_parser_constant_id_token(parser, name)
};
@@ -1428,19 +2329,17 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static pm_begin_node_t *
pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
- pm_begin_node_t *node = PM_ALLOC_NODE(parser, pm_begin_node_t);
+ pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
*node = (pm_begin_node_t) {
- {
- .type = PM_BEGIN_NODE,
- .location = {
- .start = begin_keyword->start,
- .end = statements == NULL ? begin_keyword->end : statements->base.location.end
- },
- },
+ .base = (
+ (statements == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
+ ),
.begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
.statements = statements,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .end_keyword_loc = { 0 }
};
return node;
@@ -1493,16 +2392,14 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo
*/
static pm_block_argument_node_t *
pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_block_argument_node_t *node = PM_ALLOC_NODE(parser, pm_block_argument_node_t);
+ pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
*node = (pm_block_argument_node_t) {
- {
- .type = PM_BLOCK_ARGUMENT_NODE,
- .location = {
- .start = operator->start,
- .end = expression == NULL ? operator->end : expression->location.end
- },
- },
+ .base = (
+ (expression == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
+ ),
.expression = expression,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
};
@@ -1514,16 +2411,12 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
* Allocate and initialize a new BlockNode node.
*/
static pm_block_node_t *
-pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
- pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
+pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
+ pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
*node = (pm_block_node_t) {
- {
- .type = PM_BLOCK_NODE,
- .location = { .start = opening->start, .end = closing->end },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
.locals = *locals,
- .locals_body_index = locals_body_index,
.parameters = parameters,
.body = body,
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1539,16 +2432,14 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
static pm_block_parameter_node_t *
pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
- pm_block_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_block_parameter_node_t);
+ pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
*node = (pm_block_parameter_node_t) {
- {
- .type = PM_BLOCK_PARAMETER_NODE,
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
+ .base = (
+ (name->type == PM_TOKEN_NOT_PROVIDED)
+ ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
+ : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
+ ),
.name = pm_parser_optional_constant_id_token(parser, name),
.name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -1562,7 +2453,7 @@ pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, cons
*/
static pm_block_parameters_node_t *
pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
- pm_block_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_block_parameters_node_t);
+ pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
const uint8_t *start;
if (opening->type != PM_TOKEN_NOT_PROVIDED) {
@@ -1583,16 +2474,10 @@ pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *param
}
*node = (pm_block_parameters_node_t) {
- {
- .type = PM_BLOCK_PARAMETERS_NODE,
- .location = {
- .start = start,
- .end = end
- }
- },
+ .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
.parameters = parameters,
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .closing_loc = { 0 },
.locals = { 0 }
};
@@ -1615,14 +2500,10 @@ pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_
*/
static pm_block_local_variable_node_t *
pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
- assert(name->type == PM_TOKEN_IDENTIFIER || name->type == PM_TOKEN_MISSING);
- pm_block_local_variable_node_t *node = PM_ALLOC_NODE(parser, pm_block_local_variable_node_t);
+ pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
*node = (pm_block_local_variable_node_t) {
- {
- .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
.name = pm_parser_constant_id_token(parser, name)
};
@@ -1634,7 +2515,7 @@ pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static void
pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
- pm_node_list_append(&node->locals, (pm_node_t *) local);
+ pm_node_list_append(&node->locals, UP(local));
if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
node->base.location.end = local->base.location.end;
@@ -1646,16 +2527,14 @@ pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm
static pm_break_node_t *
pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
- pm_break_node_t *node = PM_ALLOC_NODE(parser, pm_break_node_t);
+ pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
*node = (pm_break_node_t) {
- {
- .type = PM_BREAK_NODE,
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- },
- },
+ .base = (
+ (arguments == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
+ ),
.arguments = arguments,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
};
@@ -1663,6 +2542,15 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
return node;
}
+// There are certain flags that we want to use internally but don't want to
+// expose because they are not relevant beyond parsing. Therefore we'll define
+// them here and not define them in config.yml/a header file.
+static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
+
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
+
/**
* Allocate and initialize a new CallNode node. This sets everything to NULL or
* PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
@@ -1670,20 +2558,17 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
*/
static pm_call_node_t *
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
- pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
+ pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
*node = (pm_call_node_t) {
- {
- .type = PM_CALL_NODE,
- .flags = flags,
- .location = PM_LOCATION_NULL_VALUE(parser),
- },
+ .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
.receiver = NULL,
- .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .call_operator_loc = { 0 },
+ .message_loc = { 0 },
+ .opening_loc = { 0 },
.arguments = NULL,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .closing_loc = { 0 },
+ .equal_loc = { 0 },
.block = NULL,
.name = 0
};
@@ -1708,7 +2593,12 @@ static pm_call_node_t *
pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
pm_assert_value_expression(parser, receiver);
- pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
+ pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
+ if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
+ flags |= PM_CALL_NODE_FLAGS_INDEX;
+ }
+
+ pm_call_node_t *node = pm_call_node_create(parser, flags);
node->base.location.start = receiver->location.start;
node->base.location.end = pm_arguments_end(arguments);
@@ -1730,11 +2620,11 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
* Allocate and initialize a new CallNode node from a binary expression.
*/
static pm_call_node_t *
-pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument) {
+pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
pm_assert_value_expression(parser, receiver);
pm_assert_value_expression(parser, argument);
- pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
node->base.location.start = MIN(receiver->location.start, argument->location.start);
node->base.location.end = MAX(receiver->location.end, argument->location.end);
@@ -1750,6 +2640,8 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
return node;
}
+static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
+
/**
* Allocate and initialize a new CallNode node from a call expression.
*/
@@ -1775,10 +2667,32 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
- node->name = pm_parser_constant_id_token(parser, message);
+ /**
+ * If the final character is `@` as is the case for `foo.~@`,
+ * we should ignore the @ in the same way we do for symbols.
+ */
+ node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
+ return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized CallNode node from a call expression.
+ */
+static pm_call_node_t *
+pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
+ pm_call_node_t *node = pm_call_node_create(parser, 0);
+ node->base.location.start = parser->start;
+ node->base.location.end = parser->end;
+
+ node->receiver = receiver;
+ node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
+ node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
+ node->arguments = arguments;
+
+ node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
return node;
}
@@ -1804,11 +2718,27 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
}
/**
+ * Allocate and initialize a new CallNode node from a synthesized call to a
+ * method name with the given arguments.
+ */
+static pm_call_node_t *
+pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
+
+ node->base.location = PM_LOCATION_NULL_VALUE(parser);
+ node->arguments = arguments;
+
+ node->name = name;
+ return node;
+}
+
+/**
* Allocate and initialize a new CallNode node from a not expression.
*/
static pm_call_node_t *
pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
pm_assert_value_expression(parser, receiver);
+ if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
@@ -1816,6 +2746,7 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
if (arguments->closing_loc.start != NULL) {
node->base.location.end = arguments->closing_loc.end;
} else {
+ assert(receiver != NULL);
node->base.location.end = receiver->location.end;
}
@@ -1849,7 +2780,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -1891,39 +2822,16 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
}
/**
- * Returns whether or not this call node is a "vcall" (a call to a method name
- * without a receiver that could also have been a local variable read).
- */
-static inline bool
-pm_call_node_variable_call_p(pm_call_node_t *node) {
- return PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
-}
-
-/**
- * Returns whether or not this call is to the [] method in the index form without a block (as
- * opposed to `foo.[]` and `foo[] { }`).
- */
-static inline bool
-pm_call_node_index_p(pm_call_node_t *node) {
- return (
- (node->call_operator_loc.start == NULL) &&
- (node->message_loc.start != NULL) &&
- (node->message_loc.start[0] == '[') &&
- (node->message_loc.end[-1] == ']') &&
- (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE))
- );
-}
-
-/**
* Returns whether or not this call can be used on the left-hand side of an
* operator assignment.
*/
static inline bool
-pm_call_node_writable_p(pm_call_node_t *node) {
+pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
return (
(node->message_loc.start != NULL) &&
(node->message_loc.end[-1] != '!') &&
(node->message_loc.end[-1] != '?') &&
+ char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
(node->opening_loc.start == NULL) &&
(node->arguments == NULL) &&
(node->block == NULL)
@@ -1940,7 +2848,7 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p
if (write_constant->length > 0) {
size_t length = write_constant->length - 1;
- void *memory = malloc(length);
+ void *memory = xmalloc(length);
memcpy(memory, write_constant->start, length);
*read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
@@ -1957,17 +2865,10 @@ static pm_call_and_write_node_t *
pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_call_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_and_write_node_t);
+ pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
*node = (pm_call_and_write_node_t) {
- {
- .type = PM_CALL_AND_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.message_loc = target->message_loc,
@@ -1982,34 +2883,53 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
/**
+ * Validate that index expressions do not have keywords or blocks if we are
+ * parsing as Ruby 3.4+.
+ */
+static void
+pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
+ pm_node_t *node;
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
+ break;
+ }
+ }
+ }
+
+ if (block != NULL) {
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
+ }
+ }
+}
+
+/**
* Allocate and initialize a new IndexAndWriteNode node.
*/
static pm_index_and_write_node_t *
pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
+ pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
+
+ pm_index_arguments_check(parser, target->arguments, target->block);
+ assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
*node = (pm_index_and_write_node_t) {
- {
- .type = PM_INDEX_AND_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.opening_loc = target->opening_loc,
.arguments = target->arguments,
.closing_loc = target->closing_loc,
- .block = target->block,
+ .block = (pm_block_argument_node_t *) target->block,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -2017,7 +2937,7 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2028,24 +2948,17 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
static pm_call_operator_write_node_t *
pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
- pm_call_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_operator_write_node_t);
+ pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
*node = (pm_call_operator_write_node_t) {
- {
- .type = PM_CALL_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.message_loc = target->message_loc,
.read_name = 0,
.write_name = target->name,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -2054,7 +2967,7 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2064,32 +2977,28 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
*/
static pm_index_operator_write_node_t *
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
+ pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
+
+ pm_index_arguments_check(parser, target->arguments, target->block);
+ assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
*node = (pm_index_operator_write_node_t) {
- {
- .type = PM_INDEX_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.opening_loc = target->opening_loc,
.arguments = target->arguments,
.closing_loc = target->closing_loc,
- .block = target->block,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .block = (pm_block_argument_node_t *) target->block,
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2101,17 +3010,10 @@ static pm_call_or_write_node_t *
pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_call_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_or_write_node_t);
+ pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
*node = (pm_call_or_write_node_t) {
- {
- .type = PM_CALL_OR_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.message_loc = target->message_loc,
@@ -2126,7 +3028,7 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2137,23 +3039,19 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
static pm_index_or_write_node_t *
pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
+ pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
+ pm_index_arguments_check(parser, target->arguments, target->block);
+
+ assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
*node = (pm_index_or_write_node_t) {
- {
- .type = PM_INDEX_OR_WRITE_NODE,
- .flags = target->base.flags,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.opening_loc = target->opening_loc,
.arguments = target->arguments,
.closing_loc = target->closing_loc,
- .block = target->block,
+ .block = (pm_block_argument_node_t *) target->block,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -2161,7 +3059,7 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2172,24 +3070,31 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
*/
static pm_call_target_node_t *
pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_call_target_node_t *node = PM_ALLOC_NODE(parser, pm_call_target_node_t);
+ pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
*node = (pm_call_target_node_t) {
- {
- .type = PM_CALL_TARGET_NODE,
- .flags = target->base.flags,
- .location = target->base.location
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
.receiver = target->receiver,
.call_operator_loc = target->call_operator_loc,
.name = target->name,
.message_loc = target->message_loc
};
+ /* It is possible to get here where we have parsed an invalid syntax tree
+ * where the call operator was not present. In that case we will have a
+ * problem because it is a required location. In this case we need to fill
+ * it in with a fake location so that the syntax tree remains valid. */
+ if (node->call_operator_loc.start == NULL) {
+ node->call_operator_loc = (pm_location_t) {
+ .start = target->base.location.start,
+ .end = target->base.location.start
+ };
+ }
+
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2200,30 +3105,24 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
*/
static pm_index_target_node_t *
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
- pm_node_flags_t flags = target->base.flags;
+ pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
- if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) {
- flags |= PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE;
- }
+ pm_index_arguments_check(parser, target->arguments, target->block);
+ assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
*node = (pm_index_target_node_t) {
- {
- .type = PM_INDEX_TARGET_NODE,
- .flags = flags,
- .location = target->base.location
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
.receiver = target->receiver,
.opening_loc = target->opening_loc,
.arguments = target->arguments,
.closing_loc = target->closing_loc,
- .block = target->block
+ .block = (pm_block_argument_node_t *) target->block,
};
// Here we're going to free the target, since it is no longer necessary.
// However, we don't want to call `pm_node_destroy` because we want to keep
// around all of its children since we just reused them.
- free(target);
+ xfree(target);
return node;
}
@@ -2232,17 +3131,11 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
* Allocate and initialize a new CapturePatternNode node.
*/
static pm_capture_pattern_node_t *
-pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *target, const pm_token_t *operator) {
- pm_capture_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_capture_pattern_node_t);
+pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
+ pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
*node = (pm_capture_pattern_node_t) {
- {
- .type = PM_CAPTURE_PATTERN_NODE,
- .location = {
- .start = value->location.start,
- .end = target->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
.value = value,
.target = target,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -2256,18 +3149,12 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
*/
static pm_case_node_t *
pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_node_t *node = PM_ALLOC_NODE(parser, pm_case_node_t);
+ pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
*node = (pm_case_node_t) {
- {
- .type = PM_CASE_NODE,
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
.predicate = predicate,
- .consequent = NULL,
+ .else_clause = NULL,
.case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
.end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
.conditions = { 0 }
@@ -2288,12 +3175,12 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
}
/**
- * Set the consequent of a CaseNode node.
+ * Set the else clause of a CaseNode node.
*/
static void
-pm_case_node_consequent_set(pm_case_node_t *node, pm_else_node_t *consequent) {
- node->consequent = consequent;
- node->base.location.end = consequent->base.location.end;
+pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
+ node->else_clause = else_clause;
+ node->base.location.end = else_clause->base.location.end;
}
/**
@@ -2310,18 +3197,12 @@ pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_key
*/
static pm_case_match_node_t *
pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_match_node_t *node = PM_ALLOC_NODE(parser, pm_case_match_node_t);
+ pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
*node = (pm_case_match_node_t) {
- {
- .type = PM_CASE_MATCH_NODE,
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
.predicate = predicate,
- .consequent = NULL,
+ .else_clause = NULL,
.case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
.end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
.conditions = { 0 }
@@ -2342,12 +3223,12 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi
}
/**
- * Set the consequent of a CaseMatchNode node.
+ * Set the else clause of a CaseMatchNode node.
*/
static void
-pm_case_match_node_consequent_set(pm_case_match_node_t *node, pm_else_node_t *consequent) {
- node->consequent = consequent;
- node->base.location.end = consequent->base.location.end;
+pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
+ node->else_clause = else_clause;
+ node->base.location.end = else_clause->base.location.end;
}
/**
@@ -2364,13 +3245,10 @@ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_toke
*/
static pm_class_node_t *
pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_class_node_t *node = PM_ALLOC_NODE(parser, pm_class_node_t);
+ pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
*node = (pm_class_node_t) {
- {
- .type = PM_CLASS_NODE,
- .location = { .start = class_keyword->start, .end = end_keyword->end },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
.locals = *locals,
.class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
.constant_path = constant_path,
@@ -2390,16 +3268,10 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
static pm_class_variable_and_write_node_t *
pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_class_variable_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_and_write_node_t);
+ pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
*node = (pm_class_variable_and_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -2414,21 +3286,15 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r
*/
static pm_class_variable_operator_write_node_t *
pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_operator_write_node_t);
+ pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
*node = (pm_class_variable_operator_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -2440,16 +3306,10 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
static pm_class_variable_or_write_node_t *
pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_class_variable_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_or_write_node_t);
+ pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
*node = (pm_class_variable_or_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -2465,13 +3325,10 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re
static pm_class_variable_read_node_t *
pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_CLASS_VARIABLE);
- pm_class_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_read_node_t);
+ pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
*node = (pm_class_variable_read_node_t) {
- {
- .type = PM_CLASS_VARIABLE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
.name = pm_parser_constant_id_token(parser, token)
};
@@ -2479,23 +3336,32 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token)
}
/**
+ * True if the given node is an implicit array node on a write, as in:
+ *
+ * a = *b
+ * a = 1, 2, 3
+ */
+static inline pm_node_flags_t
+pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
+ if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
+ return flags;
+ }
+ return 0;
+}
+
+/**
* Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
*/
static pm_class_variable_write_node_t *
pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_write_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_write_node_t);
+ pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_class_variable_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_WRITE_NODE,
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
.name = read_node->name,
- .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+ .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
+ .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -2508,16 +3374,10 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_
static pm_constant_path_and_write_node_t *
pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_path_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_and_write_node_t);
+ pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
*node = (pm_constant_path_and_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
.target = target,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
@@ -2531,20 +3391,14 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod
*/
static pm_constant_path_operator_write_node_t *
pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_operator_write_node_t);
+ pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
*node = (pm_constant_path_operator_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
.target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -2556,16 +3410,10 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
static pm_constant_path_or_write_node_t *
pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_path_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_or_write_node_t);
+ pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
*node = (pm_constant_path_or_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
.target = target,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value
@@ -2578,23 +3426,32 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
* Allocate and initialize a new ConstantPathNode node.
*/
static pm_constant_path_node_t *
-pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
+pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
pm_assert_value_expression(parser, parent);
+ pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
- pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
+ if (name_token->type == PM_TOKEN_CONSTANT) {
+ name = pm_parser_constant_id_token(parser, name_token);
+ }
- *node = (pm_constant_path_node_t) {
- {
- .type = PM_CONSTANT_PATH_NODE,
- .location = {
- .start = parent == NULL ? delimiter->start : parent->location.start,
- .end = child->location.end
- },
- },
- .parent = parent,
- .child = child,
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
- };
+ if (parent == NULL) {
+ *node = (pm_constant_path_node_t) {
+ .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
+ .parent = parent,
+ .name = name,
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
+ };
+ } else {
+ *node = (pm_constant_path_node_t) {
+ .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
+ .parent = parent,
+ .name = name,
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
+ };
+ }
return node;
}
@@ -2604,16 +3461,11 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to
*/
static pm_constant_path_write_node_t *
pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_write_node_t);
+ pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_constant_path_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
.target = target,
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
@@ -2628,16 +3480,10 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t
static pm_constant_and_write_node_t *
pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_and_write_node_t);
+ pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
*node = (pm_constant_and_write_node_t) {
- {
- .type = PM_CONSTANT_AND_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -2652,21 +3498,15 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *
*/
static pm_constant_operator_write_node_t *
pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_operator_write_node_t);
+ pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
*node = (pm_constant_operator_write_node_t) {
- {
- .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -2678,16 +3518,10 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
static pm_constant_or_write_node_t *
pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_or_write_node_t);
+ pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
*node = (pm_constant_or_write_node_t) {
- {
- .type = PM_CONSTANT_OR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -2703,13 +3537,10 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t
static pm_constant_read_node_t *
pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
- pm_constant_read_node_t *node = PM_ALLOC_NODE(parser, pm_constant_read_node_t);
+ pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
*node = (pm_constant_read_node_t) {
- {
- .type = PM_CONSTANT_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
.name = pm_parser_constant_id_token(parser, name)
};
@@ -2721,16 +3552,11 @@ pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
*/
static pm_constant_write_node_t *
pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_write_node_t);
+ pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_constant_write_node_t) {
- {
- .type = PM_CONSTANT_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
@@ -2741,17 +3567,61 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
}
/**
+ * Check if the receiver of a `def` node is allowed.
+ */
+static void
+pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_BEGIN_NODE: {
+ const pm_begin_node_t *cast = (pm_begin_node_t *) node;
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
+ break;
+ }
+ case PM_PARENTHESES_NODE: {
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
+ if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
+ break;
+ }
+ case PM_STATEMENTS_NODE: {
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
+ pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
+ break;
+ }
+ case PM_ARRAY_NODE:
+ case PM_FLOAT_NODE:
+ case PM_IMAGINARY_NODE:
+ case PM_INTEGER_NODE:
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
+ case PM_INTERPOLATED_STRING_NODE:
+ case PM_INTERPOLATED_SYMBOL_NODE:
+ case PM_INTERPOLATED_X_STRING_NODE:
+ case PM_RATIONAL_NODE:
+ case PM_REGULAR_EXPRESSION_NODE:
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_STRING_NODE:
+ case PM_SYMBOL_NODE:
+ case PM_X_STRING_NODE:
+ pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
* Allocate and initialize a new DefNode node.
*/
static pm_def_node_t *
pm_def_node_create(
pm_parser_t *parser,
- const pm_token_t *name,
+ pm_constant_id_t name,
+ const pm_token_t *name_loc,
pm_node_t *receiver,
pm_parameters_node_t *parameters,
pm_node_t *body,
pm_constant_id_list_t *locals,
- uint32_t locals_body_index,
const pm_token_t *def_keyword,
const pm_token_t *operator,
const pm_token_t *lparen,
@@ -2759,27 +3629,24 @@ pm_def_node_create(
const pm_token_t *equal,
const pm_token_t *end_keyword
) {
- pm_def_node_t *node = PM_ALLOC_NODE(parser, pm_def_node_t);
- const uint8_t *end;
+ pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
- if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
- end = body->location.end;
- } else {
- end = end_keyword->end;
+ if (receiver != NULL) {
+ pm_def_node_receiver_check(parser, receiver);
}
*node = (pm_def_node_t) {
- {
- .type = PM_DEF_NODE,
- .location = { .start = def_keyword->start, .end = end },
- },
- .name = pm_parser_constant_id_token(parser, name),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
+ .base = (
+ (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
+ ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
+ : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
+ ),
+ .name = name,
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
.receiver = receiver,
.parameters = parameters,
.body = body,
.locals = *locals,
- .locals_body_index = locals_body_index,
.def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -2795,21 +3662,19 @@ pm_def_node_create(
* Allocate a new DefinedNode node.
*/
static pm_defined_node_t *
-pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
- pm_defined_node_t *node = PM_ALLOC_NODE(parser, pm_defined_node_t);
+pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
+ pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
*node = (pm_defined_node_t) {
- {
- .type = PM_DEFINED_NODE,
- .location = {
- .start = keyword_loc->start,
- .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
- },
- },
+ .base = (
+ (rparen->type == PM_TOKEN_NOT_PROVIDED)
+ ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
+ : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
+ ),
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
.value = value,
.rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
- .keyword_loc = *keyword_loc
+ .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
};
return node;
@@ -2820,22 +3685,14 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t
*/
static pm_else_node_t *
pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_else_node_t *node = PM_ALLOC_NODE(parser, pm_else_node_t);
- const uint8_t *end = NULL;
- if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
- end = statements->base.location.end;
- } else {
- end = end_keyword->end;
- }
+ pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
*node = (pm_else_node_t) {
- {
- .type = PM_ELSE_NODE,
- .location = {
- .start = else_keyword->start,
- .end = end,
- },
- },
+ .base = (
+ ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
+ ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
+ : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
+ ),
.else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
.statements = statements,
.end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
@@ -2849,16 +3706,10 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat
*/
static pm_embedded_statements_node_t *
pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_embedded_statements_node_t *node = PM_ALLOC_NODE(parser, pm_embedded_statements_node_t);
+ pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
*node = (pm_embedded_statements_node_t) {
- {
- .type = PM_EMBEDDED_STATEMENTS_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.statements = statements,
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
@@ -2872,16 +3723,10 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin
*/
static pm_embedded_variable_node_t *
pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_embedded_variable_node_t *node = PM_ALLOC_NODE(parser, pm_embedded_variable_node_t);
+ pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
*node = (pm_embedded_variable_node_t) {
- {
- .type = PM_EMBEDDED_VARIABLE_NODE,
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.variable = variable
};
@@ -2894,16 +3739,10 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator
*/
static pm_ensure_node_t *
pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_ensure_node_t *node = PM_ALLOC_NODE(parser, pm_ensure_node_t);
+ pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
*node = (pm_ensure_node_t) {
- {
- .type = PM_ENSURE_NODE,
- .location = {
- .start = ensure_keyword->start,
- .end = end_keyword->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
.ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
.statements = statements,
.end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
@@ -2918,13 +3757,11 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_
static pm_false_node_t *
pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_FALSE);
- pm_false_node_t *node = PM_ALLOC_NODE(parser, pm_false_node_t);
+ pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
- *node = (pm_false_node_t) {{
- .type = PM_FALSE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_false_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+ };
return node;
}
@@ -2935,31 +3772,36 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_find_pattern_node_t *
pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_find_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_find_pattern_node_t);
+ pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
pm_node_t *left = nodes->nodes[0];
+ assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
+ pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
+
pm_node_t *right;
if (nodes->size == 1) {
- right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
+ right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
} else {
right = nodes->nodes[nodes->size - 1];
+ assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
}
+#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
+ // The resulting AST will anyway be ignored, but this file still needs to compile.
+ pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
+#else
+ pm_node_t *right_splat_node = right;
+#endif
*node = (pm_find_pattern_node_t) {
- {
- .type = PM_FIND_PATTERN_NODE,
- .location = {
- .start = left->location.start,
- .end = right->location.end,
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
.constant = NULL,
- .left = left,
- .right = right,
+ .left = left_splat_node,
+ .right = right_splat_node,
.requireds = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .opening_loc = { 0 },
+ .closing_loc = { 0 }
};
// For now we're going to just copy over each pointer manually. This could be
@@ -2973,18 +3815,90 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
}
/**
+ * Parse the value of a double, add appropriate errors if there is an issue, and
+ * return the value that should be saved on the PM_FLOAT_NODE node.
+ */
+static double
+pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
+ ptrdiff_t diff = token->end - token->start;
+ if (diff <= 0) return 0.0;
+
+ // First, get a buffer of the content.
+ size_t length = (size_t) diff;
+ char *buffer = xmalloc(sizeof(char) * (length + 1));
+ memcpy((void *) buffer, token->start, length);
+
+ // Next, determine if we need to replace the decimal point because of
+ // locale-specific options, and then normalize them if we have to.
+ char decimal_point = *localeconv()->decimal_point;
+ if (decimal_point != '.') {
+ for (size_t index = 0; index < length; index++) {
+ if (buffer[index] == '.') buffer[index] = decimal_point;
+ }
+ }
+
+ // Next, handle underscores by removing them from the buffer.
+ for (size_t index = 0; index < length; index++) {
+ if (buffer[index] == '_') {
+ memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
+ length--;
+ }
+ }
+
+ // Null-terminate the buffer so that strtod cannot read off the end.
+ buffer[length] = '\0';
+
+ // Now, call strtod to parse the value. Note that CRuby has their own
+ // version of strtod which avoids locales. We're okay using the locale-aware
+ // version because we've already validated through the parser that the token
+ // is in a valid format.
+ errno = 0;
+ char *eptr;
+ double value = strtod(buffer, &eptr);
+
+ // This should never happen, because we've already checked that the token
+ // is in a valid format. However it's good to be safe.
+ if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
+ xfree((void *) buffer);
+ return 0.0;
+ }
+
+ // If errno is set, then it should only be ERANGE. At this point we need to
+ // check if it's infinity (it should be).
+ if (errno == ERANGE && PRISM_ISINF(value)) {
+ int warn_width;
+ const char *ellipsis;
+
+ if (length > 20) {
+ warn_width = 20;
+ ellipsis = "...";
+ } else {
+ warn_width = (int) length;
+ ellipsis = "";
+ }
+
+ pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+ value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
+ }
+
+ // Finally we can free the buffer and return the value.
+ xfree((void *) buffer);
+ return value;
+}
+
+/**
* Allocate and initialize a new FloatNode node.
*/
static pm_float_node_t *
pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT);
- pm_float_node_t *node = PM_ALLOC_NODE(parser, pm_float_node_t);
+ pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
- *node = (pm_float_node_t) {{
- .type = PM_FLOAT_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_float_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+ .value = pm_double_parse(parser, token)
+ };
return node;
}
@@ -2996,44 +3910,68 @@ static pm_imaginary_node_t *
pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
- pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
+ pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
*node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT,
.start = token->start,
.end = token->end - 1
- }))
+ })))
};
return node;
}
/**
- * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token.
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
*/
static pm_rational_node_t *
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
- pm_rational_node_t *node = PM_ALLOC_NODE(parser, pm_rational_node_t);
+ pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
*node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
- .type = PM_TOKEN_FLOAT,
- .start = token->start,
- .end = token->end - 1
- }))
+ .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numerator = { 0 },
+ .denominator = { 0 }
};
+ const uint8_t *start = token->start;
+ const uint8_t *end = token->end - 1; // r
+
+ while (start < end && *start == '0') start++; // 0.1 -> .1
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
+
+ size_t length = (size_t) (end - start);
+ if (length == 1) {
+ node->denominator.value = 1;
+ return node;
+ }
+
+ const uint8_t *point = memchr(start, '.', length);
+ assert(point && "should have a decimal point");
+
+ uint8_t *digits = xmalloc(length);
+ if (digits == NULL) {
+ fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
+ abort();
+ }
+
+ memcpy(digits, start, (unsigned long) (point - start));
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
+ pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
+
+ size_t fract_length = 0;
+ for (const uint8_t *fract = point; fract < end; ++fract) {
+ if (*fract != '_') ++fract_length;
+ }
+ digits[0] = '1';
+ if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
+ xfree(digits);
+
+ pm_integers_reduce(&node->numerator, &node->denominator);
return node;
}
@@ -3045,18 +3983,14 @@ static pm_imaginary_node_t *
pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
+ pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
*node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
+ })))
};
return node;
@@ -3076,16 +4010,10 @@ pm_for_node_create(
const pm_token_t *do_keyword,
const pm_token_t *end_keyword
) {
- pm_for_node_t *node = PM_ALLOC_NODE(parser, pm_for_node_t);
+ pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
*node = (pm_for_node_t) {
- {
- .type = PM_FOR_NODE,
- .location = {
- .start = for_keyword->start,
- .end = end_keyword->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
.index = index,
.collection = collection,
.statements = statements,
@@ -3104,8 +4032,12 @@ pm_for_node_create(
static pm_forwarding_arguments_node_t *
pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_arguments_node_t *node = PM_ALLOC_NODE(parser, pm_forwarding_arguments_node_t);
- *node = (pm_forwarding_arguments_node_t) {{ .type = PM_FORWARDING_ARGUMENTS_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
+ pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
+
+ *node = (pm_forwarding_arguments_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
+ };
+
return node;
}
@@ -3115,8 +4047,12 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token
static pm_forwarding_parameter_node_t *
pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_forwarding_parameter_node_t);
- *node = (pm_forwarding_parameter_node_t) {{ .type = PM_FORWARDING_PARAMETER_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
+ pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
+
+ *node = (pm_forwarding_parameter_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
+ };
+
return node;
}
@@ -3127,7 +4063,7 @@ static pm_forwarding_super_node_t *
pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
assert(token->type == PM_TOKEN_KEYWORD_SUPER);
- pm_forwarding_super_node_t *node = PM_ALLOC_NODE(parser, pm_forwarding_super_node_t);
+ pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
pm_block_node_t *block = NULL;
if (arguments->block != NULL) {
@@ -3135,13 +4071,11 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
}
*node = (pm_forwarding_super_node_t) {
- {
- .type = PM_FORWARDING_SUPER_NODE,
- .location = {
- .start = token->start,
- .end = block != NULL ? block->base.location.end : token->end
- },
- },
+ .base = (
+ (block == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
+ ),
.block = block
};
@@ -3154,16 +4088,10 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
+ pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
*node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
.constant = NULL,
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -3179,15 +4107,15 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
- pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
+ pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
const uint8_t *start;
const uint8_t *end;
if (elements->size > 0) {
if (rest) {
- start = elements->nodes[0]->location.start;
- end = rest->location.end;
+ start = MIN(rest->location.start, elements->nodes[0]->location.start);
+ end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end);
} else {
start = elements->nodes[0]->location.start;
end = elements->nodes[elements->size - 1]->location.end;
@@ -3199,25 +4127,15 @@ pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *eleme
}
*node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .location = {
- .start = start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
.constant = NULL,
.elements = { 0 },
.rest = rest,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .opening_loc = { 0 },
+ .closing_loc = { 0 }
};
- for (size_t index = 0; index < elements->size; index++) {
- pm_node_t *element = elements->nodes[index];
- pm_node_list_append(&node->elements, element);
- }
-
+ pm_node_list_concat(&node->elements, elements);
return node;
}
@@ -3247,16 +4165,10 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
static pm_global_variable_and_write_node_t *
pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_global_variable_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_global_variable_and_write_node_t);
+ pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
*node = (pm_global_variable_and_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
.name = pm_global_variable_write_name(parser, target),
.name_loc = target->location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -3271,21 +4183,15 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_operator_write_node_t *
pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_global_variable_operator_write_node_t);
+ pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
*node = (pm_global_variable_operator_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
.name = pm_global_variable_write_name(parser, target),
.name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -3297,16 +4203,10 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
static pm_global_variable_or_write_node_t *
pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_global_variable_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_global_variable_or_write_node_t);
+ pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
*node = (pm_global_variable_or_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
.name = pm_global_variable_write_name(parser, target),
.name_loc = target->location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -3321,13 +4221,10 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_read_node_t *
pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_global_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_global_variable_read_node_t);
+ pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
*node = (pm_global_variable_read_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
.name = pm_parser_constant_id_token(parser, name)
};
@@ -3335,20 +4232,30 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name)
}
/**
- * Allocate a new GlobalVariableWriteNode node.
+ * Allocate and initialize a new synthesized GlobalVariableReadNode node.
+ */
+static pm_global_variable_read_node_t *
+pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
+ pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
+
+ *node = (pm_global_variable_read_node_t) {
+ .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
+ .name = name
+ };
+
+ return node;
+}
+
+/**
+ * Allocate and initialize a new GlobalVariableWriteNode node.
*/
static pm_global_variable_write_node_t *
pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_write_node_t *node = PM_ALLOC_NODE(parser, pm_global_variable_write_node_t);
+ pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_global_variable_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
.name = pm_global_variable_write_name(parser, target),
.name_loc = PM_LOCATION_NODE_VALUE(target),
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
@@ -3359,19 +4266,33 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con
}
/**
+ * Allocate and initialize a new synthesized GlobalVariableWriteNode node.
+ */
+static pm_global_variable_write_node_t *
+pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
+ pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
+
+ *node = (pm_global_variable_write_node_t) {
+ .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
+ .name = name,
+ .name_loc = PM_LOCATION_NULL_VALUE(parser),
+ .operator_loc = PM_LOCATION_NULL_VALUE(parser),
+ .value = value
+ };
+
+ return node;
+}
+
+/**
* Allocate a new HashNode node.
*/
static pm_hash_node_t *
pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
assert(opening != NULL);
- pm_hash_node_t *node = PM_ALLOC_NODE(parser, pm_hash_node_t);
+ pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
*node = (pm_hash_node_t) {
- {
- .type = PM_HASH_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
.elements = { 0 }
@@ -3396,7 +4317,7 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
}
if (!static_literal) {
- pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
}
}
@@ -3415,17 +4336,17 @@ pm_if_node_create(pm_parser_t *parser,
pm_node_t *predicate,
const pm_token_t *then_keyword,
pm_statements_node_t *statements,
- pm_node_t *consequent,
+ pm_node_t *subsequent,
const pm_token_t *end_keyword
) {
- pm_conditional_predicate(predicate);
- pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
const uint8_t *end;
if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
end = end_keyword->end;
- } else if (consequent != NULL) {
- end = consequent->location.end;
+ } else if (subsequent != NULL) {
+ end = subsequent->location.end;
} else if (pm_statements_node_body_length(statements) != 0) {
end = statements->base.location.end;
} else {
@@ -3433,19 +4354,12 @@ pm_if_node_create(pm_parser_t *parser,
}
*node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .location = {
- .start = if_keyword->start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
.if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
.predicate = predicate,
.then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
.statements = statements,
- .consequent = consequent,
+ .subsequent = subsequent,
.end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
};
@@ -3457,27 +4371,20 @@ pm_if_node_create(pm_parser_t *parser,
*/
static pm_if_node_t *
pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
- pm_conditional_predicate(predicate);
- pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, statement);
+ pm_statements_node_body_append(parser, statements, statement, true);
*node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
.if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
.predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .then_keyword_loc = { 0 },
.statements = statements,
- .consequent = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .subsequent = NULL,
+ .end_keyword_loc = { 0 }
};
return node;
@@ -3489,34 +4396,27 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t
static pm_if_node_t *
pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
pm_assert_value_expression(parser, predicate);
- pm_conditional_predicate(predicate);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
pm_statements_node_t *if_statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(if_statements, true_expression);
+ pm_statements_node_body_append(parser, if_statements, true_expression, true);
pm_statements_node_t *else_statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(else_statements, false_expression);
+ pm_statements_node_body_append(parser, else_statements, false_expression, true);
pm_token_t end_keyword = not_provided(parser);
pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
- pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
+ pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
*node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .location = {
- .start = predicate->location.start,
- .end = false_expression->location.end,
- },
- },
- .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
+ .if_keyword_loc = { 0 },
.predicate = predicate,
.then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
.statements = if_statements,
- .consequent = (pm_node_t *)else_node,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .subsequent = UP(else_node),
+ .end_keyword_loc = { 0 }
};
return node;
@@ -3540,13 +4440,10 @@ pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword
*/
static pm_implicit_node_t *
pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
- pm_implicit_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_node_t);
+ pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
*node = (pm_implicit_node_t) {
- {
- .type = PM_IMPLICIT_NODE,
- .location = value->location
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
.value = value
};
@@ -3560,13 +4457,10 @@ static pm_implicit_rest_node_t *
pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_COMMA);
- pm_implicit_rest_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_rest_node_t);
+ pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
*node = (pm_implicit_rest_node_t) {
- {
- .type = PM_IMPLICIT_REST_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
};
return node;
@@ -3578,14 +4472,23 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
static pm_integer_node_t *
pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER);
- pm_integer_node_t *node = PM_ALLOC_NODE(parser, pm_integer_node_t);
+ pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
- *node = (pm_integer_node_t) {{
- .type = PM_INTEGER_NODE,
- .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_integer_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
+ .value = { 0 }
+ };
+
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+ switch (base) {
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+ default: assert(false && "unreachable"); break;
+ }
+ pm_integer_parse(&node->value, integer_base, token->start, token->end);
return node;
}
@@ -3597,45 +4500,45 @@ static pm_imaginary_node_t *
pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
- pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
+ pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
*node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER,
.start = token->start,
.end = token->end - 1
- }))
+ })))
};
return node;
}
/**
- * Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
* token.
*/
static pm_rational_node_t *
pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
- pm_rational_node_t *node = PM_ALLOC_NODE(parser, pm_rational_node_t);
+ pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
*node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
- .type = PM_TOKEN_INTEGER,
- .start = token->start,
- .end = token->end - 1
- }))
+ .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numerator = { 0 },
+ .denominator = { .value = 1, 0 }
};
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+ switch (base) {
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+ default: assert(false && "unreachable"); break;
+ }
+
+ pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+
return node;
}
@@ -3647,18 +4550,14 @@ static pm_imaginary_node_t *
pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
+ pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
*node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+ .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
+ })))
};
return node;
@@ -3669,7 +4568,7 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b
*/
static pm_in_node_t *
pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
- pm_in_node_t *node = PM_ALLOC_NODE(parser, pm_in_node_t);
+ pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
const uint8_t *end;
if (statements != NULL) {
@@ -3681,13 +4580,7 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t
}
*node = (pm_in_node_t) {
- {
- .type = PM_IN_NODE,
- .location = {
- .start = in_keyword->start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
.pattern = pattern,
.statements = statements,
.in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
@@ -3703,16 +4596,10 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t
static pm_instance_variable_and_write_node_t *
pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_instance_variable_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_instance_variable_and_write_node_t);
+ pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
*node = (pm_instance_variable_and_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -3727,21 +4614,15 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari
*/
static pm_instance_variable_operator_write_node_t *
pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_instance_variable_operator_write_node_t);
+ pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
*node = (pm_instance_variable_operator_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
@@ -3753,16 +4634,10 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
static pm_instance_variable_or_write_node_t *
pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_instance_variable_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_instance_variable_or_write_node_t);
+ pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
*node = (pm_instance_variable_or_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
.name = target->name,
.name_loc = target->base.location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -3778,13 +4653,10 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia
static pm_instance_variable_read_node_t *
pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
- pm_instance_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_instance_variable_read_node_t);
+ pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
*node = (pm_instance_variable_read_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
.name = pm_parser_constant_id_token(parser, token)
};
@@ -3797,17 +4669,13 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok
*/
static pm_instance_variable_write_node_t *
pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_write_node_t *node = PM_ALLOC_NODE(parser, pm_instance_variable_write_node_t);
+ pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
*node = (pm_instance_variable_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
.name = read_node->name,
- .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
+ .name_loc = PM_LOCATION_NODE_VALUE(read_node),
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
@@ -3816,20 +4684,58 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
}
/**
+ * Append a part into a list of string parts. Importantly this handles nested
+ * interpolated strings by not necessarily removing the marker for static
+ * literals.
+ */
+static void
+pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+ switch (PM_NODE_TYPE(part)) {
+ case PM_STRING_NODE:
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+ break;
+ case PM_EMBEDDED_STATEMENTS_NODE: {
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+ if (embedded == NULL) {
+ // If there are no statements or more than one statement, then
+ // we lose the static literal flag.
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+ // If the embedded statement is a string, then we can keep the
+ // static literal flag and mark the string as frozen.
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the embedded statement is an interpolated string and it's
+ // a static literal, then we can keep the static literal flag.
+ } else {
+ // Otherwise we lose the static literal flag.
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ break;
+ }
+ case PM_EMBEDDED_VARIABLE_NODE:
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+ break;
+ default:
+ assert(false && "unexpected node type");
+ break;
+ }
+
+ pm_node_list_append(parts, part);
+}
+
+/**
* Allocate a new InterpolatedRegularExpressionNode node.
*/
static pm_interpolated_regular_expression_node_t *
pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_interpolated_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_regular_expression_node_t);
+ pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
*node = (pm_interpolated_regular_expression_node_t) {
- {
- .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
- .location = {
- .start = opening->start,
- .end = NULL,
- },
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
.parts = { 0 }
@@ -3846,14 +4752,128 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
if (node->base.location.end < part->location.end) {
node->base.location.end = part->location.end;
}
- pm_node_list_append(&node->parts, part);
+
+ pm_interpolated_node_append(UP(node), &node->parts, part);
}
static inline void
-pm_interpolated_regular_expression_node_closing_set(pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
+pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(closing));
+ pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
+}
+
+/**
+ * Append a part to an InterpolatedStringNode node.
+ *
+ * This has some somewhat complicated semantics, because we need to update
+ * multiple flags that have somewhat confusing interactions.
+ *
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
+ * single static literal string that can be pushed onto the stack on its own.
+ * Note that this doesn't necessarily mean that the string will be frozen or
+ * not; the instructions in CRuby will be either putobject or putstring,
+ * depending on the combination of `--enable-frozen-string-literal`,
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
+ * explicitly frozen. This will only happen if the string is comprised entirely
+ * of string parts that are themselves static literals and frozen.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
+ * be explicitly marked as mutable. This will happen from
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
+ * is necessary to indicate that the string should be left up to the runtime,
+ * which could potentially use a chilled string otherwise.
+ */
+static inline void
+pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+#define CLEAR_FLAGS(node) \
+ node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+
+#define MUTABLE_FLAGS(node) \
+ node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
+ node->base.location.start = part->location.start;
+ }
+
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
+
+ switch (PM_NODE_TYPE(part)) {
+ case PM_STRING_NODE:
+ // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
+ // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
+ // as long as this interpolation only consists of other string literals.
+ if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+ }
+ part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
+ break;
+ case PM_INTERPOLATED_STRING_NODE:
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the string that we're concatenating is a static literal,
+ // then we can keep the static literal flag for this string.
+ } else {
+ // Otherwise, we lose the static literal flag here and we should
+ // also clear the mutability flags.
+ CLEAR_FLAGS(node);
+ }
+ break;
+ case PM_EMBEDDED_STATEMENTS_NODE: {
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+ if (embedded == NULL) {
+ // If we're embedding multiple statements or no statements, then
+ // the string is not longer a static literal.
+ CLEAR_FLAGS(node);
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+ // If the embedded statement is a string, then we can make that
+ // string as frozen and static literal, and not touch the static
+ // literal status of this string.
+ embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
+
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+ MUTABLE_FLAGS(node);
+ }
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+ // If the embedded statement is an interpolated string, but that
+ // string is marked as static literal, then we can keep our
+ // static literal status for this string.
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+ MUTABLE_FLAGS(node);
+ }
+ } else {
+ // In all other cases, we lose the static literal flag here and
+ // become mutable.
+ CLEAR_FLAGS(node);
+ }
+
+ break;
+ }
+ case PM_EMBEDDED_VARIABLE_NODE:
+ // Embedded variables clear static literal, which means we also
+ // should clear the mutability flags.
+ CLEAR_FLAGS(node);
+ break;
+ case PM_X_STRING_NODE:
+ case PM_INTERPOLATED_X_STRING_NODE:
+ case PM_SYMBOL_NODE:
+ case PM_INTERPOLATED_SYMBOL_NODE:
+ // These will only happen in error cases. But we want to handle it
+ // here so that we don't fail the assertion.
+ CLEAR_FLAGS(node);
+ break;
+ default:
+ assert(false && "unexpected node type");
+ break;
+ }
+
+ pm_node_list_append(&node->parts, part);
+
+#undef CLEAR_FLAGS
+#undef MUTABLE_FLAGS
}
/**
@@ -3861,46 +4881,56 @@ pm_interpolated_regular_expression_node_closing_set(pm_interpolated_regular_expr
*/
static pm_interpolated_string_node_t *
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
+ pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
+
+ switch (parser->frozen_string_literal) {
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
+ break;
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
+ break;
+ }
*node = (pm_interpolated_string_node_t) {
- {
- .type = PM_INTERPOLATED_STRING_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = { 0 }
};
if (parts != NULL) {
- node->parts = *parts;
+ pm_node_t *part;
+ PM_NODE_LIST_FOREACH(parts, index, part) {
+ pm_interpolated_string_node_append(node, part);
+ }
}
return node;
}
/**
- * Append a part to an InterpolatedStringNode node.
+ * Set the closing token of the given InterpolatedStringNode node.
*/
-static inline void
-pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+static void
+pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
+ node->base.location.end = closing->end;
+}
+
+static void
+pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
node->base.location.start = part->location.start;
}
- pm_node_list_append(&node->parts, part);
- node->base.location.end = part->location.end;
+ pm_interpolated_node_append(UP(node), &node->parts, part);
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
}
-/**
- * Set the closing token of the given InterpolatedStringNode node.
- */
static void
-pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
}
@@ -3910,53 +4940,34 @@ pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, con
*/
static pm_interpolated_symbol_node_t *
pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_symbol_node_t);
+ pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
*node = (pm_interpolated_symbol_node_t) {
- {
- .type = PM_INTERPOLATED_SYMBOL_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = { 0 }
};
if (parts != NULL) {
- node->parts = *parts;
+ pm_node_t *part;
+ PM_NODE_LIST_FOREACH(parts, index, part) {
+ pm_interpolated_symbol_node_append(node, part);
+ }
}
return node;
}
-static inline void
-pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
- if (node->parts.size == 0 && node->opening_loc.start == NULL) {
- node->base.location.start = part->location.start;
- }
-
- pm_node_list_append(&node->parts, part);
- node->base.location.end = part->location.end;
-}
-
/**
* Allocate a new InterpolatedXStringNode node.
*/
static pm_interpolated_x_string_node_t *
pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_interpolated_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_x_string_node_t);
+ pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
*node = (pm_interpolated_x_string_node_t) {
- {
- .type = PM_INTERPOLATED_X_STRING_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = { 0 }
@@ -3967,7 +4978,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
static inline void
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
- pm_node_list_append(&node->parts, part);
+ pm_interpolated_node_append(UP(node), &node->parts, part);
node->base.location.end = part->location.end;
}
@@ -3978,18 +4989,42 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
}
/**
+ * Create a local variable read that is reading the implicit 'it' variable.
+ */
+static pm_it_local_variable_read_node_t *
+pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+ pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
+
+ *node = (pm_it_local_variable_read_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
+ };
+
+ return node;
+}
+
+/**
+ * Allocate and initialize a new ItParametersNode node.
+ */
+static pm_it_parameters_node_t *
+pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
+ pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
+
+ *node = (pm_it_parameters_node_t) {
+ .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
+ };
+
+ return node;
+}
+
+/**
* Allocate a new KeywordHashNode node.
*/
static pm_keyword_hash_node_t *
pm_keyword_hash_node_create(pm_parser_t *parser) {
- pm_keyword_hash_node_t *node = PM_ALLOC_NODE(parser, pm_keyword_hash_node_t);
+ pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
*node = (pm_keyword_hash_node_t) {
- .base = {
- .type = PM_KEYWORD_HASH_NODE,
- .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS
- },
+ .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
.elements = { 0 }
};
@@ -4001,11 +5036,10 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
*/
static void
pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
- // If the element being added is not an AssocNode or does not have a symbol key, then
- // we want to turn the STATIC_KEYS flag off.
- // TODO: Rename the flag to SYMBOL_KEYS instead.
+ // If the element being added is not an AssocNode or does not have a symbol
+ // key, then we want to turn the SYMBOL_KEYS flag off.
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
- pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
+ pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
}
pm_node_list_append(&hash->elements, element);
@@ -4020,16 +5054,10 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el
*/
static pm_required_keyword_parameter_node_t *
pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_required_keyword_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_required_keyword_parameter_node_t);
+ pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
*node = (pm_required_keyword_parameter_node_t) {
- {
- .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
- .location = {
- .start = name->start,
- .end = name->end
- },
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
.name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
.name_loc = PM_LOCATION_TOKEN_VALUE(name),
};
@@ -4042,16 +5070,10 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_optional_keyword_parameter_node_t *
pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
- pm_optional_keyword_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_optional_keyword_parameter_node_t);
+ pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
*node = (pm_optional_keyword_parameter_node_t) {
- {
- .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
- .location = {
- .start = name->start,
- .end = value->location.end
- },
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
.name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
.name_loc = PM_LOCATION_TOKEN_VALUE(name),
.value = value
@@ -4065,16 +5087,14 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_keyword_rest_parameter_node_t *
pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_keyword_rest_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_keyword_rest_parameter_node_t);
+ pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
*node = (pm_keyword_rest_parameter_node_t) {
- {
- .type = PM_KEYWORD_REST_PARAMETER_NODE,
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
+ .base = (
+ (name->type == PM_TOKEN_NOT_PROVIDED)
+ ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
+ : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
+ ),
.name = pm_parser_optional_constant_id_token(parser, name),
.name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -4090,25 +5110,17 @@ static pm_lambda_node_t *
pm_lambda_node_create(
pm_parser_t *parser,
pm_constant_id_list_t *locals,
- uint32_t locals_body_index,
const pm_token_t *operator,
const pm_token_t *opening,
const pm_token_t *closing,
pm_node_t *parameters,
pm_node_t *body
) {
- pm_lambda_node_t *node = PM_ALLOC_NODE(parser, pm_lambda_node_t);
+ pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
*node = (pm_lambda_node_t) {
- {
- .type = PM_LAMBDA_NODE,
- .location = {
- .start = operator->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
.locals = *locals,
- .locals_body_index = locals_body_index,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4124,18 +5136,12 @@ pm_lambda_node_create(
*/
static pm_local_variable_and_write_node_t *
pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
- assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
+ assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_local_variable_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_and_write_node_t);
+ pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
*node = (pm_local_variable_and_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
.name_loc = target->location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
@@ -4151,21 +5157,15 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_local_variable_operator_write_node_t *
pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
- pm_local_variable_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_operator_write_node_t);
+ pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
*node = (pm_local_variable_operator_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
.name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
.name = name,
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
.depth = depth
};
@@ -4177,18 +5177,12 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
*/
static pm_local_variable_or_write_node_t *
pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
- assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
+ assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_local_variable_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_or_write_node_t);
+ pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
*node = (pm_local_variable_or_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
.name_loc = target->location,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.value = value,
@@ -4200,23 +5194,16 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
}
/**
- * Allocate a new LocalVariableReadNode node.
+ * Allocate a new LocalVariableReadNode node with constant_id.
*/
static pm_local_variable_read_node_t *
-pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
- pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
+pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
+ if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
- if (parser->current_param_name == name_id) {
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
- }
-
- pm_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_read_node_t);
+ pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
*node = (pm_local_variable_read_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
.name = name_id,
.depth = depth
};
@@ -4225,20 +5212,34 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
}
/**
+ * Allocate and initialize a new LocalVariableReadNode node.
+ */
+static pm_local_variable_read_node_t *
+pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
+}
+
+/**
+ * Allocate and initialize a new LocalVariableReadNode node for a missing local
+ * variable. (This will only happen when there is a syntax error.)
+ */
+static pm_local_variable_read_node_t *
+pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
+}
+
+/**
* Allocate and initialize a new LocalVariableWriteNode node.
*/
static pm_local_variable_write_node_t *
pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
- pm_local_variable_write_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_write_node_t);
+ pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_local_variable_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_WRITE_NODE,
- .location = {
- .start = name_loc->start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
.name = name,
.depth = depth,
.value = value,
@@ -4250,6 +5251,14 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
}
/**
+ * Returns true if the given bounds comprise `it`.
+ */
+static inline bool
+pm_token_is_it(const uint8_t *start, const uint8_t *end) {
+ return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
+}
+
+/**
* Returns true if the given bounds comprise a numbered parameter (i.e., they
* are of the form /^_\d$/).
*/
@@ -4274,14 +5283,12 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
* name and depth.
*/
static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
- pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
+pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
+ pm_refute_numbered_parameter(parser, location->start, location->end);
+ pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
*node = (pm_local_variable_target_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_TARGET_NODE,
- .location = *location
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
.name = name,
.depth = depth
};
@@ -4290,52 +5297,16 @@ pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_locati
}
/**
- * Allocate and initialize a new LocalVariableTargetNode node.
- */
-static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_refute_numbered_parameter(parser, name->start, name->end);
-
- return pm_local_variable_target_node_create_values(
- parser,
- &(pm_location_t) { .start = name->start, .end = name->end },
- pm_parser_constant_id_token(parser, name),
- 0
- );
-}
-
-/**
- * Allocate and initialize a new LocalVariableTargetNode node with the given depth.
- */
-static pm_local_variable_target_node_t *
-pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
- pm_refute_numbered_parameter(parser, name->start, name->end);
-
- return pm_local_variable_target_node_create_values(
- parser,
- &(pm_location_t) { .start = name->start, .end = name->end },
- pm_parser_constant_id_token(parser, name),
- depth
- );
-}
-
-/**
* Allocate and initialize a new MatchPredicateNode node.
*/
static pm_match_predicate_node_t *
pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_predicate_node_t *node = PM_ALLOC_NODE(parser, pm_match_predicate_node_t);
+ pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
*node = (pm_match_predicate_node_t) {
- {
- .type = PM_MATCH_PREDICATE_NODE,
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
.value = value,
.pattern = pattern,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -4351,16 +5322,10 @@ static pm_match_required_node_t *
pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_required_node_t *node = PM_ALLOC_NODE(parser, pm_match_required_node_t);
+ pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
*node = (pm_match_required_node_t) {
- {
- .type = PM_MATCH_REQUIRED_NODE,
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
.value = value,
.pattern = pattern,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -4374,13 +5339,10 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *
*/
static pm_match_write_node_t *
pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
- pm_match_write_node_t *node = PM_ALLOC_NODE(parser, pm_match_write_node_t);
+ pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
*node = (pm_match_write_node_t) {
- {
- .type = PM_MATCH_WRITE_NODE,
- .location = call->base.location
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
.call = call,
.targets = { 0 }
};
@@ -4393,16 +5355,10 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
*/
static pm_module_node_t *
pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_module_node_t *node = PM_ALLOC_NODE(parser, pm_module_node_t);
+ pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
*node = (pm_module_node_t) {
- {
- .type = PM_MODULE_NODE,
- .location = {
- .start = module_keyword->start,
- .end = end_keyword->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
.locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
.module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
.constant_path = constant_path,
@@ -4419,18 +5375,15 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const
*/
static pm_multi_target_node_t *
pm_multi_target_node_create(pm_parser_t *parser) {
- pm_multi_target_node_t *node = PM_ALLOC_NODE(parser, pm_multi_target_node_t);
+ pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
*node = (pm_multi_target_node_t) {
- {
- .type = PM_MULTI_TARGET_NODE,
- .location = { .start = NULL, .end = NULL }
- },
+ .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
.lefts = { 0 },
.rest = NULL,
.rights = { 0 },
- .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .lparen_loc = { 0 },
+ .rparen_loc = { 0 }
};
return node;
@@ -4441,13 +5394,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
*/
static void
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
if (node->rest == NULL) {
node->rest = target;
} else {
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
pm_node_list_append(&node->rights, target);
}
+ } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
+ if (node->rest == NULL) {
+ node->rest = target;
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
+ pm_node_list_append(&node->rights, target);
+ }
} else if (node->rest == NULL) {
pm_node_list_append(&node->lefts, target);
} else {
@@ -4486,16 +5446,11 @@ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t
*/
static pm_multi_write_node_t *
pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_multi_write_node_t *node = PM_ALLOC_NODE(parser, pm_multi_write_node_t);
+ pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
+ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
*node = (pm_multi_write_node_t) {
- {
- .type = PM_MULTI_WRITE_NODE,
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
.lefts = target->lefts,
.rest = target->rest,
.rights = target->rights,
@@ -4507,7 +5462,7 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
// Explicitly do not call pm_node_destroy here because we want to keep
// around all of the information within the MultiWriteNode node.
- free(target);
+ xfree(target);
return node;
}
@@ -4518,16 +5473,14 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
static pm_next_node_t *
pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
- pm_next_node_t *node = PM_ALLOC_NODE(parser, pm_next_node_t);
+ pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
*node = (pm_next_node_t) {
- {
- .type = PM_NEXT_NODE,
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
+ .base = (
+ (arguments == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
+ ),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.arguments = arguments
};
@@ -4541,13 +5494,11 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments
static pm_nil_node_t *
pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_NIL);
- pm_nil_node_t *node = PM_ALLOC_NODE(parser, pm_nil_node_t);
+ pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
- *node = (pm_nil_node_t) {{
- .type = PM_NIL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_nil_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+ };
return node;
}
@@ -4559,16 +5510,10 @@ static pm_no_keywords_parameter_node_t *
pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
- pm_no_keywords_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_no_keywords_parameter_node_t);
+ pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
*node = (pm_no_keywords_parameter_node_t) {
- {
- .type = PM_NO_KEYWORDS_PARAMETER_NODE,
- .location = {
- .start = operator->start,
- .end = keyword->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
};
@@ -4581,13 +5526,10 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper
*/
static pm_numbered_parameters_node_t *
pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
- pm_numbered_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_numbered_parameters_node_t);
+ pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
*node = (pm_numbered_parameters_node_t) {
- {
- .type = PM_NUMBERED_PARAMETERS_NODE,
- .location = *location
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
.maximum = maximum
};
@@ -4595,19 +5537,65 @@ pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *loc
}
/**
+ * The maximum numbered reference value is defined as the maximum value that an
+ * integer can hold minus 1 bit for CRuby instruction sequence operand tagging.
+ */
+#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
+
+/**
+ * Parse the decimal number represented by the range of bytes. Returns
+ * 0 if the number fails to parse or if the number is greater than the maximum
+ * value representable by a numbered reference. This function assumes that the
+ * range of bytes has already been validated to contain only decimal digits.
+ */
+static uint32_t
+pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
+ const uint8_t *start = token->start + 1;
+ const uint8_t *end = token->end;
+
+ ptrdiff_t diff = end - start;
+ assert(diff > 0);
+#if PTRDIFF_MAX > SIZE_MAX
+ assert(diff < (ptrdiff_t) SIZE_MAX);
+#endif
+ size_t length = (size_t) diff;
+
+ char *digits = xcalloc(length + 1, sizeof(char));
+ memcpy(digits, start, length);
+ digits[length] = '\0';
+
+ char *endptr;
+ errno = 0;
+ unsigned long value = strtoul(digits, &endptr, 10);
+
+ if ((digits == endptr) || (*endptr != '\0')) {
+ pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+ value = 0;
+ }
+
+ xfree(digits);
+
+ if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
+ PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
+ value = 0;
+ }
+
+ return (uint32_t) value;
+}
+
+#undef NTH_REF_MAX
+
+/**
* Allocate and initialize a new NthReferenceReadNode node.
*/
static pm_numbered_reference_read_node_t *
pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
- pm_numbered_reference_read_node_t *node = PM_ALLOC_NODE(parser, pm_numbered_reference_read_node_t);
+ pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
*node = (pm_numbered_reference_read_node_t) {
- {
- .type = PM_NUMBERED_REFERENCE_READ_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .number = parse_decimal_number(parser, name->start + 1, name->end)
+ .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
+ .number = pm_numbered_reference_read_node_number(parser, name)
};
return node;
@@ -4618,16 +5606,10 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na
*/
static pm_optional_parameter_node_t *
pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
- pm_optional_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_optional_parameter_node_t);
+ pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
*node = (pm_optional_parameter_node_t) {
- {
- .type = PM_OPTIONAL_PARAMETER_NODE,
- .location = {
- .start = name->start,
- .end = value->location.end
- }
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
.name = pm_parser_constant_id_token(parser, name),
.name_loc = PM_LOCATION_TOKEN_VALUE(name),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -4644,16 +5626,10 @@ static pm_or_node_t *
pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_or_node_t *node = PM_ALLOC_NODE(parser, pm_or_node_t);
+ pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
*node = (pm_or_node_t) {
- {
- .type = PM_OR_NODE,
- .location = {
- .start = left->location.start,
- .end = right->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
.left = left,
.right = right,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -4667,13 +5643,10 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat
*/
static pm_parameters_node_t *
pm_parameters_node_create(pm_parser_t *parser) {
- pm_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_parameters_node_t);
+ pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
*node = (pm_parameters_node_t) {
- {
- .type = PM_PARAMETERS_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
- },
+ .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
.rest = NULL,
.keyword_rest = NULL,
.block = NULL,
@@ -4718,8 +5691,8 @@ pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *par
*/
static void
pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
- pm_parameters_node_location_set(params, (pm_node_t *) param);
- pm_node_list_append(&params->optionals, (pm_node_t *) param);
+ pm_parameters_node_location_set(params, UP(param));
+ pm_node_list_append(&params->optionals, UP(param));
}
/**
@@ -4765,7 +5738,7 @@ pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *par
static void
pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
assert(params->block == NULL);
- pm_parameters_node_location_set(params, (pm_node_t *) param);
+ pm_parameters_node_location_set(params, UP(param));
params->block = param;
}
@@ -4774,16 +5747,10 @@ pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_no
*/
static pm_program_node_t *
pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
- pm_program_node_t *node = PM_ALLOC_NODE(parser, pm_program_node_t);
+ pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
*node = (pm_program_node_t) {
- {
- .type = PM_PROGRAM_NODE,
- .location = {
- .start = statements == NULL ? parser->start : statements->base.location.start,
- .end = statements == NULL ? parser->end : statements->base.location.end
- }
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
.locals = *locals,
.statements = statements
};
@@ -4795,17 +5762,11 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
* Allocate and initialize new ParenthesesNode node.
*/
static pm_parentheses_node_t *
-pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
- pm_parentheses_node_t *node = PM_ALLOC_NODE(parser, pm_parentheses_node_t);
+pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
+ pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
*node = (pm_parentheses_node_t) {
- {
- .type = PM_PARENTHESES_NODE,
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
.body = body,
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
@@ -4819,16 +5780,10 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no
*/
static pm_pinned_expression_node_t *
pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
- pm_pinned_expression_node_t *node = PM_ALLOC_NODE(parser, pm_pinned_expression_node_t);
+ pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
*node = (pm_pinned_expression_node_t) {
- {
- .type = PM_PINNED_EXPRESSION_NODE,
- .location = {
- .start = operator->start,
- .end = rparen->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
.expression = expression,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
@@ -4843,16 +5798,10 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con
*/
static pm_pinned_variable_node_t *
pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_pinned_variable_node_t *node = PM_ALLOC_NODE(parser, pm_pinned_variable_node_t);
+ pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
*node = (pm_pinned_variable_node_t) {
- {
- .type = PM_PINNED_VARIABLE_NODE,
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
.variable = variable,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
};
@@ -4865,16 +5814,10 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator,
*/
static pm_post_execution_node_t *
pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_post_execution_node_t *node = PM_ALLOC_NODE(parser, pm_post_execution_node_t);
+ pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
*node = (pm_post_execution_node_t) {
- {
- .type = PM_POST_EXECUTION_NODE,
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
.statements = statements,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -4889,16 +5832,10 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co
*/
static pm_pre_execution_node_t *
pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_pre_execution_node_t *node = PM_ALLOC_NODE(parser, pm_pre_execution_node_t);
+ pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
*node = (pm_pre_execution_node_t) {
- {
- .type = PM_PRE_EXECUTION_NODE,
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
.statements = statements,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -4916,10 +5853,10 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
pm_assert_value_expression(parser, left);
pm_assert_value_expression(parser, right);
- pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
+ pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
pm_node_flags_t flags = 0;
- // Indicate that this node an exclusive range if the operator is `...`.
+ // Indicate that this node is an exclusive range if the operator is `...`.
if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
flags |= PM_RANGE_FLAGS_EXCLUDE_END;
}
@@ -4935,14 +5872,7 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
}
*node = (pm_range_node_t) {
- {
- .type = PM_RANGE_NODE,
- .flags = flags,
- .location = {
- .start = (left == NULL ? operator->start : left->location.start),
- .end = (right == NULL ? operator->end : right->location.end)
- }
- },
+ .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
.left = left,
.right = right,
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -4957,9 +5887,12 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
static pm_redo_node_t *
pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_REDO);
- pm_redo_node_t *node = PM_ALLOC_NODE(parser, pm_redo_node_t);
+ pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
+
+ *node = (pm_redo_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
+ };
- *node = (pm_redo_node_t) {{ .type = PM_REDO_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
return node;
}
@@ -4969,17 +5902,11 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_regular_expression_node_t *
pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_regular_expression_node_t);
+ pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
+ pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
*node = (pm_regular_expression_node_t) {
- {
- .type = PM_REGULAR_EXPRESSION_NODE,
- .flags = pm_regular_expression_flags_create(closing) | PM_NODE_FLAG_STATIC_LITERAL,
- .location = {
- .start = MIN(opening->start, closing->start),
- .end = MAX(opening->end, closing->end)
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -5002,13 +5929,10 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_required_parameter_node_t *
pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
- pm_required_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_required_parameter_node_t);
+ pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
*node = (pm_required_parameter_node_t) {
- {
- .type = PM_REQUIRED_PARAMETER_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
.name = pm_parser_constant_id_token(parser, token)
};
@@ -5020,16 +5944,10 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token)
*/
static pm_rescue_modifier_node_t *
pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
- pm_rescue_modifier_node_t *node = PM_ALLOC_NODE(parser, pm_rescue_modifier_node_t);
+ pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
*node = (pm_rescue_modifier_node_t) {
- {
- .type = PM_RESCUE_MODIFIER_NODE,
- .location = {
- .start = expression->location.start,
- .end = rescue_expression->location.end
- }
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
.expression = expression,
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.rescue_expression = rescue_expression
@@ -5039,22 +5957,20 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const
}
/**
- * Allocate and initiliaze a new RescueNode node.
+ * Allocate and initialize a new RescueNode node.
*/
static pm_rescue_node_t *
pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_rescue_node_t *node = PM_ALLOC_NODE(parser, pm_rescue_node_t);
+ pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
*node = (pm_rescue_node_t) {
- {
- .type = PM_RESCUE_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(keyword)
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .operator_loc = { 0 },
+ .then_keyword_loc = { 0 },
.reference = NULL,
.statements = NULL,
- .consequent = NULL,
+ .subsequent = NULL,
.exceptions = { 0 }
};
@@ -5087,12 +6003,12 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat
}
/**
- * Set the consequent of a rescue node, and update the location.
+ * Set the subsequent of a rescue node, and update the location.
*/
static void
-pm_rescue_node_consequent_set(pm_rescue_node_t *node, pm_rescue_node_t *consequent) {
- node->consequent = consequent;
- node->base.location.end = consequent->base.location.end;
+pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
+ node->subsequent = subsequent;
+ node->base.location.end = subsequent->base.location.end;
}
/**
@@ -5109,16 +6025,14 @@ pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
*/
static pm_rest_parameter_node_t *
pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_rest_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_rest_parameter_node_t);
+ pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
*node = (pm_rest_parameter_node_t) {
- {
- .type = PM_REST_PARAMETER_NODE,
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- }
- },
+ .base = (
+ (name->type == PM_TOKEN_NOT_PROVIDED)
+ ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
+ : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
+ ),
.name = pm_parser_optional_constant_id_token(parser, name),
.name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
@@ -5133,9 +6047,12 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c
static pm_retry_node_t *
pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_RETRY);
- pm_retry_node_t *node = PM_ALLOC_NODE(parser, pm_retry_node_t);
+ pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
+
+ *node = (pm_retry_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
+ };
- *node = (pm_retry_node_t) {{ .type = PM_RETRY_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
return node;
}
@@ -5144,16 +6061,14 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_return_node_t *
pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
- pm_return_node_t *node = PM_ALLOC_NODE(parser, pm_return_node_t);
+ pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
*node = (pm_return_node_t) {
- {
- .type = PM_RETURN_NODE,
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
+ .base = (
+ (arguments == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
+ ),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.arguments = arguments
};
@@ -5167,12 +6082,26 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
static pm_self_node_t *
pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_SELF);
- pm_self_node_t *node = PM_ALLOC_NODE(parser, pm_self_node_t);
+ pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
- *node = (pm_self_node_t) {{
- .type = PM_SELF_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_self_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
+ };
+
+ return node;
+}
+
+/**
+ * Allocate and initialize a new ShareableConstantNode node.
+ */
+static pm_shareable_constant_node_t *
+pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
+ pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
+
+ *node = (pm_shareable_constant_node_t) {
+ .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
+ .write = write
+ };
return node;
}
@@ -5182,16 +6111,10 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_singleton_class_node_t *
pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_singleton_class_node_t *node = PM_ALLOC_NODE(parser, pm_singleton_class_node_t);
+ pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
*node = (pm_singleton_class_node_t) {
- {
- .type = PM_SINGLETON_CLASS_NODE,
- .location = {
- .start = class_keyword->start,
- .end = end_keyword->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
.locals = *locals,
.class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
@@ -5209,13 +6132,11 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local
static pm_source_encoding_node_t *
pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
- pm_source_encoding_node_t *node = PM_ALLOC_NODE(parser, pm_source_encoding_node_t);
+ pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
- *node = (pm_source_encoding_node_t) {{
- .type = PM_SOURCE_ENCODING_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_source_encoding_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+ };
return node;
}
@@ -5225,16 +6146,23 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_source_file_node_t*
pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
- pm_source_file_node_t *node = PM_ALLOC_NODE(parser, pm_source_file_node_t);
+ pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
+ pm_node_flags_t flags = 0;
+
+ switch (parser->frozen_string_literal) {
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+ flags |= PM_STRING_FLAGS_MUTABLE;
+ break;
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+ flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+ break;
+ }
+
*node = (pm_source_file_node_t) {
- {
- .type = PM_SOURCE_FILE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
- },
- .filepath = parser->filepath_string,
+ .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
+ .filepath = parser->filepath
};
return node;
@@ -5246,13 +6174,11 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
static pm_source_line_node_t *
pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___LINE__);
- pm_source_line_node_t *node = PM_ALLOC_NODE(parser, pm_source_line_node_t);
+ pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
- *node = (pm_source_line_node_t) {{
- .type = PM_SOURCE_LINE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_source_line_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+ };
return node;
}
@@ -5262,16 +6188,14 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_splat_node_t *
pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_splat_node_t *node = PM_ALLOC_NODE(parser, pm_splat_node_t);
+ pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
*node = (pm_splat_node_t) {
- {
- .type = PM_SPLAT_NODE,
- .location = {
- .start = operator->start,
- .end = (expression == NULL ? operator->end : expression->location.end)
- }
- },
+ .base = (
+ (expression == NULL)
+ ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
+ : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
+ ),
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
.expression = expression
};
@@ -5284,13 +6208,10 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t
*/
static pm_statements_node_t *
pm_statements_node_create(pm_parser_t *parser) {
- pm_statements_node_t *node = PM_ALLOC_NODE(parser, pm_statements_node_t);
+ pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
*node = (pm_statements_node_t) {
- {
- .type = PM_STATEMENTS_NODE,
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
+ .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
.body = { 0 }
};
@@ -5314,20 +6235,54 @@ pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start
}
/**
- * Append a new node to the given StatementsNode node's body.
+ * Update the location of the statements node based on the statement that is
+ * being added to the list.
*/
-static void
-pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement) {
+static inline void
+pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
node->base.location.start = statement->location.start;
}
+
if (statement->location.end > node->base.location.end) {
node->base.location.end = statement->location.end;
}
+}
+
+/**
+ * Append a new node to the given StatementsNode node's body.
+ */
+static void
+pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
+ pm_statements_node_body_update(node, statement);
+
+ if (node->body.size > 0) {
+ const pm_node_t *previous = node->body.nodes[node->body.size - 1];
+
+ switch (PM_NODE_TYPE(previous)) {
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ case PM_REDO_NODE:
+ case PM_RETRY_NODE:
+ case PM_RETURN_NODE:
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
+ break;
+ default:
+ break;
+ }
+ }
pm_node_list_append(&node->body, statement);
+ if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
+}
- // Every statement gets marked as a place where a newline can occur.
+/**
+ * Prepend a new node to the given StatementsNode node's body.
+ */
+static void
+pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
+ pm_statements_node_body_update(node, statement);
+ pm_node_list_prepend(&node->body, statement);
pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
}
@@ -5336,22 +6291,23 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
*/
static inline pm_string_node_t *
pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
- pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
+ pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
- if (parser->frozen_string_literal) {
- flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+ switch (parser->frozen_string_literal) {
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+ flags = PM_STRING_FLAGS_MUTABLE;
+ break;
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+ flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+ break;
}
+ const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
+ const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
+
*node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
- }
- },
+ .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
@@ -5386,7 +6342,7 @@ pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
static pm_super_node_t *
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
- pm_super_node_t *node = PM_ALLOC_NODE(parser, pm_super_node_t);
+ pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
const uint8_t *end = pm_arguments_end(arguments);
if (end == NULL) {
@@ -5394,13 +6350,7 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
}
*node = (pm_super_node_t) {
- {
- .type = PM_SUPER_NODE,
- .location = {
- .start = keyword->start,
- .end = end,
- }
- },
+ .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.lparen_loc = arguments->opening_loc,
.arguments = arguments->arguments,
@@ -5412,22 +6362,228 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
}
/**
+ * Read through the contents of a string and check if it consists solely of
+ * US-ASCII code points.
+ */
+static bool
+pm_ascii_only_p(const pm_string_t *contents) {
+ const size_t length = pm_string_length(contents);
+ const uint8_t *source = pm_string_source(contents);
+
+ for (size_t index = 0; index < length; index++) {
+ if (source[index] & 0x80) return false;
+ }
+
+ return true;
+}
+
+/**
+ * Validate that the contents of the given symbol are all valid UTF-8.
+ */
+static void
+parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
+
+ if (width == 0) {
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ break;
+ }
+
+ cursor += width;
+ }
+}
+
+/**
+ * Validate that the contents of the given symbol are all valid in the encoding
+ * of the parser.
+ */
+static void
+parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+ const pm_encoding_t *encoding = parser->encoding;
+
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+ size_t width = encoding->char_width(cursor, end - cursor);
+
+ if (width == 0) {
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ break;
+ }
+
+ cursor += width;
+ }
+}
+
+/**
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
+ * points. Otherwise, the encoding may be explicitly set with an escape
+ * sequence.
+ *
+ * If the validate flag is set, then it will check the contents of the symbol
+ * to ensure that all characters are valid in the encoding.
+ */
+static inline pm_node_flags_t
+parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
+ if (parser->explicit_encoding != NULL) {
+ // A Symbol may optionally have its encoding explicitly set. This will
+ // happen if an escape sequence results in a non-ASCII code point.
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
+ return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
+ } else if (validate) {
+ parse_symbol_encoding_validate_other(parser, location, contents);
+ }
+ } else if (pm_ascii_only_p(contents)) {
+ // Ruby stipulates that all source files must use an ASCII-compatible
+ // encoding. Thus, all symbols appearing in source are eligible for
+ // "downgrading" to US-ASCII.
+ return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
+ } else if (validate) {
+ parse_symbol_encoding_validate_other(parser, location, contents);
+ }
+
+ return 0;
+}
+
+static pm_node_flags_t
+parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
+ assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
+ (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
+ (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
+ (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
+
+ // There's special validation logic used if a string does not contain any character escape sequences.
+ if (parser->explicit_encoding == NULL) {
+ // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
+ // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
+ // the US-ASCII encoding.
+ if (ascii_only) {
+ return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
+ }
+
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ if (!ascii_only) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ }
+ } else if (parser->encoding != modifier_encoding) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
+
+ if (modifier == 'n' && !ascii_only) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
+ }
+ }
+
+ return flags;
+ }
+
+ // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
+ bool mixed_encoding = false;
+
+ if (mixed_encoding) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+ } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+ // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
+ bool valid_string_in_modifier_encoding = true;
+
+ if (!valid_string_in_modifier_encoding) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+ }
+ } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
+ if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
+ }
+ }
+
+ // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
+ return flags;
+}
+
+/**
+ * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
+ * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
+ * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
+ * may be explicitly set with an escape sequence.
+ */
+static pm_node_flags_t
+parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
+ // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
+ bool valid_unicode_range = true;
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+ return flags;
+ }
+
+ // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
+ // to multi-byte characters are allowed.
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
+ // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
+ // following error message appearing twice. We do the same for compatibility.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ }
+
+ /**
+ * Start checking modifier flags. We need to process these before considering any explicit encodings that may have
+ * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the
+ * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set
+ * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and
+ * determine the encoding that way.
+ */
+
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
+ return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
+ }
+
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
+ return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
+ }
+
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
+ return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
+ }
+
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
+ return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
+ }
+
+ // At this point no encoding modifiers will be present on the regular expression as they would have already
+ // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
+ // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
+ if (ascii_only) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
+ }
+
+ // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
+ // or by specifying a modifier.
+ //
+ // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
+ if (parser->explicit_encoding != NULL) {
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
+ }
+ }
+
+ return 0;
+}
+
+/**
* Allocate and initialize a new SymbolNode node with the given unescaped
* string.
*/
static pm_symbol_node_t *
-pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
+pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
+ pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+
+ const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
+ const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
*node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
- }
- },
+ .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.value_loc = PM_LOCATION_TOKEN_VALUE(value),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
@@ -5442,7 +6598,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
*/
static inline pm_symbol_node_t *
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
- return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
+ return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
}
/**
@@ -5450,7 +6606,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
*/
static pm_symbol_node_t *
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
parser->current_string = PM_STRING_EMPTY;
return node;
}
@@ -5472,6 +6628,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
assert((label.end - label.start) >= 0);
pm_string_shared_init(&node->unescaped, label.start, label.end);
+ pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
+
break;
}
case PM_TOKEN_MISSING: {
@@ -5492,6 +6650,23 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
}
/**
+ * Allocate and initialize a new synthesized SymbolNode node.
+ */
+static pm_symbol_node_t *
+pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
+ pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+
+ *node = (pm_symbol_node_t) {
+ .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
+ .value_loc = PM_LOCATION_NULL_VALUE(parser),
+ .unescaped = { 0 }
+ };
+
+ pm_string_constant_init(&node->unescaped, content, strlen(content));
+ return node;
+}
+
+/**
* Check if the given node is a label in a hash.
*/
static bool
@@ -5517,27 +6692,23 @@ pm_symbol_node_label_p(pm_node_t *node) {
*/
static pm_symbol_node_t *
pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
- pm_symbol_node_t *new_node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
+ pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
*new_node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.value_loc = node->content_loc,
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = node->unescaped
};
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
+ pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
+
// We are explicitly _not_ using pm_node_destroy here because we don't want
// to trash the unescaped string. We could instead copy the string if we
// know that it is owned, but we're taking the fast path for now.
- free(node);
+ xfree(node);
return new_node;
}
@@ -5547,19 +6718,20 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
*/
static pm_string_node_t *
pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
- pm_string_node_t *new_node = PM_ALLOC_NODE(parser, pm_string_node_t);
+ pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
- if (parser->frozen_string_literal) {
- flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+ switch (parser->frozen_string_literal) {
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+ flags = PM_STRING_FLAGS_MUTABLE;
+ break;
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+ flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+ break;
}
*new_node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .location = node->base.location
- },
+ .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
.opening_loc = node->opening_loc,
.content_loc = node->value_loc,
.closing_loc = node->closing_loc,
@@ -5569,7 +6741,7 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
// We are explicitly _not_ using pm_node_destroy here because we don't want
// to trash the unescaped string. We could instead copy the string if we
// know that it is owned, but we're taking the fast path for now.
- free(node);
+ xfree(node);
return new_node;
}
@@ -5580,13 +6752,25 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
static pm_true_node_t *
pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_TRUE);
- pm_true_node_t *node = PM_ALLOC_NODE(parser, pm_true_node_t);
+ pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
- *node = (pm_true_node_t) {{
- .type = PM_TRUE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ *node = (pm_true_node_t) {
+ .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+ };
+
+ return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized TrueNode node.
+ */
+static pm_true_node_t *
+pm_true_node_synthesized_create(pm_parser_t *parser) {
+ pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
+
+ *node = (pm_true_node_t) {
+ .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
+ };
return node;
}
@@ -5597,13 +6781,10 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
static pm_undef_node_t *
pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
- pm_undef_node_t *node = PM_ALLOC_NODE(parser, pm_undef_node_t);
+ pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
*node = (pm_undef_node_t) {
- {
- .type = PM_UNDEF_NODE,
- .location = PM_LOCATION_TOKEN_VALUE(token),
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
.names = { 0 }
};
@@ -5625,31 +6806,19 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
*/
static pm_unless_node_t *
pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
- pm_conditional_predicate(predicate);
- pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- const uint8_t *end;
- if (statements != NULL) {
- end = statements->base.location.end;
- } else {
- end = predicate->location.end;
- }
+ pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
+ pm_node_t *end = statements == NULL ? predicate : UP(statements);
*node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.predicate = predicate,
.then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
.statements = statements,
- .consequent = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .else_clause = NULL,
+ .end_keyword_loc = { 0 }
};
return node;
@@ -5660,27 +6829,20 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
*/
static pm_unless_node_t *
pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
- pm_conditional_predicate(predicate);
- pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, statement);
+ pm_statements_node_body_append(parser, statements, statement, true);
*node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
.predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .then_keyword_loc = { 0 },
.statements = statements,
- .consequent = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
+ .else_clause = NULL,
+ .end_keyword_loc = { 0 }
};
return node;
@@ -5693,22 +6855,40 @@ pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end
}
/**
+ * Loop modifiers could potentially modify an expression that contains block
+ * exits. In this case we need to loop through them and remove them from the
+ * list of block exits so that they do not later get marked as invalid.
+ */
+static void
+pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
+ assert(parser->current_block_exits != NULL);
+
+ // All of the block exits that we want to remove should be within the
+ // statements, and since we are modifying the statements, we shouldn't have
+ // to check the end location.
+ const uint8_t *start = statements->base.location.start;
+
+ for (size_t index = parser->current_block_exits->size; index > 0; index--) {
+ pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
+ if (block_exit->location.start < start) break;
+
+ // Implicitly remove from the list by lowering the size.
+ parser->current_block_exits->size--;
+ }
+}
+
+/**
* Allocate a new UntilNode node.
*/
static pm_until_node_t *
-pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_ALLOC_NODE(parser, pm_until_node_t);
+pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+ pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
*node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .location = {
- .start = keyword->start,
- .end = closing->end,
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.predicate = predicate,
.statements = statements
@@ -5722,19 +6902,15 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_until_node_t *
pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_ALLOC_NODE(parser, pm_until_node_t);
+ pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_loop_modifier_block_exits(parser, statements);
*node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end,
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .do_keyword_loc = { 0 },
+ .closing_loc = { 0 },
.predicate = predicate,
.statements = statements
};
@@ -5747,18 +6923,13 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_when_node_t *
pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_when_node_t *node = PM_ALLOC_NODE(parser, pm_when_node_t);
+ pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
*node = (pm_when_node_t) {
- {
- .type = PM_WHEN_NODE,
- .location = {
- .start = keyword->start,
- .end = NULL
- }
- },
+ .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.statements = NULL,
+ .then_keyword_loc = { 0 },
.conditions = { 0 }
};
@@ -5775,6 +6946,15 @@ pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
}
/**
+ * Set the location of the then keyword of a when node.
+ */
+static inline void
+pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
+ node->base.location.end = then_keyword->end;
+ node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
+}
+
+/**
* Set the statements list of a when node.
*/
static void
@@ -5790,19 +6970,14 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
* Allocate a new WhileNode node.
*/
static pm_while_node_t *
-pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_ALLOC_NODE(parser, pm_while_node_t);
+pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+ pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
*node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .location = {
- .start = keyword->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.predicate = predicate,
.statements = statements
@@ -5816,19 +6991,34 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_while_node_t *
pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_ALLOC_NODE(parser, pm_while_node_t);
+ pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+ pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+ pm_loop_modifier_block_exits(parser, statements);
*node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end
- },
- },
+ .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+ .do_keyword_loc = { 0 },
+ .closing_loc = { 0 },
+ .predicate = predicate,
+ .statements = statements
+ };
+
+ return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized while loop.
+ */
+static pm_while_node_t *
+pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
+ pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+
+ *node = (pm_while_node_t) {
+ .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
+ .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+ .closing_loc = PM_LOCATION_NULL_VALUE(parser),
.predicate = predicate,
.statements = statements
};
@@ -5842,17 +7032,10 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_x_string_node_t *
pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_x_string_node_t);
+ pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
*node = (pm_x_string_node_t) {
- {
- .type = PM_X_STRING_NODE,
- .flags = PM_STRING_FLAGS_FROZEN,
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
+ .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -5875,7 +7058,7 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
*/
static pm_yield_node_t *
pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
- pm_yield_node_t *node = PM_ALLOC_NODE(parser, pm_yield_node_t);
+ pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
const uint8_t *end;
if (rparen_loc->start != NULL) {
@@ -5889,13 +7072,7 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
}
*node = (pm_yield_node_t) {
- {
- .type = PM_YIELD_NODE,
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
+ .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
.lparen_loc = *lparen_loc,
.arguments = arguments,
@@ -5905,60 +7082,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
return node;
}
-#undef PM_ALLOC_NODE
-
-/******************************************************************************/
-/* Scope-related functions */
-/******************************************************************************/
-
-/**
- * Allocate and initialize a new scope. Push it onto the scope stack.
- */
-static bool
-pm_parser_scope_push(pm_parser_t *parser, bool closed) {
- pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
- if (scope == NULL) return false;
-
- *scope = (pm_scope_t) {
- .previous = parser->current_scope,
- .closed = closed,
- .explicit_params = false,
- .numbered_parameters = 0,
- };
-
- pm_constant_id_list_init(&scope->locals);
- parser->current_scope = scope;
-
- return true;
-}
-
-/**
- * Save the current param name as the return value and set it to the given
- * constant id.
- */
-static inline pm_constant_id_t
-pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
- pm_constant_id_t saved_param_name = parser->current_param_name;
- parser->current_param_name = current_param_name;
- return saved_param_name;
-}
-
-/**
- * Save the current param name as the return value and clear it.
- */
-static inline pm_constant_id_t
-pm_parser_current_param_name_unset(pm_parser_t *parser) {
- return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
-}
-
-/**
- * Restore the current param name from the given value.
- */
-static inline void
-pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
- parser->current_param_name = saved_param_name;
-}
-
/**
* Check if any of the currently visible scopes contain a local variable
* described by the given constant id.
@@ -5969,7 +7092,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
int depth = 0;
while (scope != NULL) {
- if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
+ if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
if (scope->closed) break;
scope = scope->previous;
@@ -5993,45 +7116,45 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
* Add a constant id to the local table of the current scope.
*/
static inline void
-pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
- if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
- pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
- }
-}
-
-/**
- * Set the numbered_parameters value of the current scope.
- */
-static inline void
-pm_parser_numbered_parameters_set(pm_parser_t *parser, uint8_t numbered_parameters) {
- parser->current_scope->numbered_parameters = numbered_parameters;
+pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+ pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
}
/**
* Add a local variable from a location to the current scope.
*/
static pm_constant_id_t
-pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
- if (constant_id != 0) pm_parser_local_add(parser, constant_id);
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
return constant_id;
}
/**
* Add a local variable from a token to the current scope.
*/
-static inline void
-pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token) {
- pm_parser_local_add_location(parser, token->start, token->end);
+static inline pm_constant_id_t
+pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
+ return pm_parser_local_add_location(parser, token->start, token->end, reads);
}
/**
* Add a local variable from an owned string to the current scope.
*/
static pm_constant_id_t
-pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t length) {
+pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
- if (constant_id != 0) pm_parser_local_add(parser, constant_id);
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
+ return constant_id;
+}
+
+/**
+ * Add a local variable from a constant string to the current scope.
+ */
+static pm_constant_id_t
+pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
+ pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
return constant_id;
}
@@ -6052,10 +7175,10 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
// whether it's already in the current scope.
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
- if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
+ if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
// Add an error if the parameter doesn't start with _ and has been seen before
if ((name->start < name->end) && (*name->start != '_')) {
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
}
return true;
}
@@ -6063,120 +7186,45 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
}
/**
- * Pop the current scope off the scope stack. Note that we specifically do not
- * free the associated constant list because we assume that we have already
- * transferred ownership of the list to the AST somewhere.
+ * Pop the current scope off the scope stack.
*/
static void
pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
- free(scope);
+ pm_locals_free(&scope->locals);
+ pm_node_list_free(&scope->implicit_parameters);
+ xfree(scope);
}
/******************************************************************************/
-/* Basic character checks */
+/* Stack helpers */
/******************************************************************************/
/**
- * This function is used extremely frequently to lex all of the identifiers in a
- * source file, so it's important that it be as fast as possible. For this
- * reason we have the encoding_changed boolean to check if we need to go through
- * the function pointer or can just directly use the UTF-8 functions.
+ * Pushes a value onto the stack.
*/
-static inline size_t
-char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
- if (parser->encoding_changed) {
- size_t width;
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
- return width;
- } else if (*b == '_') {
- return 1;
- } else if (*b >= 0x80) {
- return parser->encoding->char_width(b, parser->end - b);
- } else {
- return 0;
- }
- } else if (*b < 0x80) {
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
- } else {
- return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
- }
+static inline void
+pm_state_stack_push(pm_state_stack_t *stack, bool value) {
+ *stack = (*stack << 1) | (value & 1);
}
/**
- * Similar to char_is_identifier but this function assumes that the encoding
- * has not been changed.
+ * Pops a value off the stack.
*/
-static inline size_t
-char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
- if (*b < 0x80) {
- return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
- } else {
- return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
- }
+static inline void
+pm_state_stack_pop(pm_state_stack_t *stack) {
+ *stack >>= 1;
}
/**
- * Like the above, this function is also used extremely frequently to lex all of
- * the identifiers in a source file once the first character has been found. So
- * it's important that it be as fast as possible.
+ * Returns the value at the top of the stack.
*/
-static inline size_t
-char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
- if (parser->encoding_changed) {
- size_t width;
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
- return width;
- } else if (*b == '_') {
- return 1;
- } else if (*b >= 0x80) {
- return parser->encoding->char_width(b, parser->end - b);
- } else {
- return 0;
- }
- }
- return char_is_identifier_utf8(b, parser->end);
-}
-
-// Here we're defining a perfect hash for the characters that are allowed in
-// global names. This is used to quickly check the next character after a $ to
-// see if it's a valid character for a global name.
-#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
-#define PUNCT(idx) ( \
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
- BIT('0', idx))
-
-const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
-
-#undef BIT
-#undef PUNCT
-
static inline bool
-char_is_global_name_punctuation(const uint8_t b) {
- const unsigned int i = (const unsigned int) b;
- if (i <= 0x20 || 0x7e < i) return false;
-
- return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
+pm_state_stack_p(const pm_state_stack_t *stack) {
+ return *stack & 1;
}
-static inline bool
-token_is_setter_name(pm_token_t *token) {
- return (
- (token->type == PM_TOKEN_IDENTIFIER) &&
- (token->end - token->start >= 2) &&
- (token->end[-1] == '=')
- );
-}
-
-/******************************************************************************/
-/* Stack helpers */
-/******************************************************************************/
-
static inline void
pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
// Use the negation of the value to prevent stack overflow.
@@ -6217,7 +7265,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
* is beyond the end of the source then return '\0'.
*/
static inline uint8_t
-peek_at(pm_parser_t *parser, const uint8_t *cursor) {
+peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
if (cursor < parser->end) {
return *cursor;
} else {
@@ -6240,13 +7288,13 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
* that position is beyond the end of the source then return '\0'.
*/
static inline uint8_t
-peek(pm_parser_t *parser) {
+peek(const pm_parser_t *parser) {
return peek_at(parser, parser->current.end);
}
/**
* If the character to be read matches the given value, then returns true and
- * advanced the current pointer.
+ * advances the current pointer.
*/
static inline bool
match(pm_parser_t *parser, uint8_t value) {
@@ -6306,6 +7354,14 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
}
/**
+ * This is equivalent to the predicate of warn_balanced in CRuby.
+ */
+static inline bool
+ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
+ return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
+}
+
+/**
* Here we're going to check if this is a "magic" comment, and perform whatever
* actions are necessary for it here.
*/
@@ -6314,12 +7370,12 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
const pm_encoding_t *encoding = pm_encoding_find(start, end);
if (encoding != NULL) {
- if (encoding != PM_ENCODING_UTF_8_ENTRY) {
+ if (parser->encoding != encoding) {
parser->encoding = encoding;
- parser->encoding_changed = true;
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
}
+ parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
return true;
}
@@ -6382,16 +7438,24 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
}
}
+typedef enum {
+ PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
+ PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
+ PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
+} pm_magic_comment_boolean_value_t;
+
/**
* Check if this is a magic comment that includes the frozen_string_literal
* pragma. If it does, set that field on the parser.
*/
-static void
-parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
- parser->frozen_string_literal = true;
- } else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
- parser->frozen_string_literal = false;
+static pm_magic_comment_boolean_value_t
+parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
+ if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
+ return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
+ } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
+ return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
+ } else {
+ return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
}
}
@@ -6480,6 +7544,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
}
value_end = cursor;
+ if (cursor < end && *cursor == '"') cursor++;
} else {
value_start = cursor;
while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
@@ -6497,32 +7562,32 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// underscores. We only need to do this if there _is_ a dash in the key.
pm_string_t key;
const size_t key_length = (size_t) (key_end - key_start);
- const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, parser->encoding);
+ const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
if (dash == NULL) {
pm_string_shared_init(&key, key_start, key_end);
} else {
- size_t width = (size_t) (key_end - key_start);
- uint8_t *buffer = malloc(width);
+ uint8_t *buffer = xmalloc(key_length);
if (buffer == NULL) break;
- memcpy(buffer, key_start, width);
+ memcpy(buffer, key_start, key_length);
buffer[dash - key_start] = '_';
while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
buffer[dash - key_start] = '_';
}
- pm_string_owned_init(&key, buffer, width);
+ pm_string_owned_init(&key, buffer, key_length);
}
// Finally, we can start checking the key against the list of known
// magic comment keys, and potentially change state based on that.
const uint8_t *key_source = pm_string_source(&key);
+ uint32_t value_length = (uint32_t) (value_end - value_start);
// We only want to attempt to compare against encoding comments if it's
// the first line in the file (or the second in the case of a shebang).
- if (parser->current.start == parser->encoding_comment_start) {
+ if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
if (
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
@@ -6531,11 +7596,82 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
}
}
- // We only want to handle frozen string literal comments if it's before
- // any semantic tokens have been seen.
- if (!semantic_token_seen) {
- if (key_length == 21 && pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
- parser_lex_magic_comment_frozen_string_literal_value(parser, value_start, value_end);
+ if (key_length == 11) {
+ if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
+ switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
+ PM_PARSER_WARN_TOKEN_FORMAT(
+ parser,
+ parser->current,
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+ (int) key_length,
+ (const char *) key_source,
+ (int) value_length,
+ (const char *) value_start
+ );
+ break;
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
+ parser->warn_mismatched_indentation = false;
+ break;
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
+ parser->warn_mismatched_indentation = true;
+ break;
+ }
+ }
+ } else if (key_length == 21) {
+ if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
+ // We only want to handle frozen string literal comments if it's
+ // before any semantic tokens have been seen.
+ if (semantic_token_seen) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
+ } else {
+ switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
+ PM_PARSER_WARN_TOKEN_FORMAT(
+ parser,
+ parser->current,
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+ (int) key_length,
+ (const char *) key_source,
+ (int) value_length,
+ (const char *) value_start
+ );
+ break;
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
+ parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
+ break;
+ case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
+ parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED;
+ break;
+ }
+ }
+ }
+ } else if (key_length == 24) {
+ if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
+ const uint8_t *cursor = parser->current.start;
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
+
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
+ pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
+ } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
+ pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
+ } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
+ pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
+ } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
+ pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
+ } else {
+ PM_PARSER_WARN_TOKEN_FORMAT(
+ parser,
+ parser->current,
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+ (int) key_length,
+ (const char *) key_source,
+ (int) value_length,
+ (const char *) value_start
+ );
+ }
}
}
@@ -6545,11 +7681,11 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// Allocate a new magic comment node to append to the parser's list.
pm_magic_comment_t *magic_comment;
- if ((magic_comment = (pm_magic_comment_t *) calloc(sizeof(pm_magic_comment_t), 1)) != NULL) {
+ if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
magic_comment->key_start = key_start;
magic_comment->value_start = value_start;
magic_comment->key_length = (uint32_t) key_length;
- magic_comment->value_length = (uint32_t) (value_end - value_start);
+ magic_comment->value_length = value_length;
pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
}
}
@@ -6561,79 +7697,88 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
/* Context manipulations */
/******************************************************************************/
-static bool
-context_terminator(pm_context_t context, pm_token_t *token) {
- switch (context) {
- case PM_CONTEXT_MAIN:
- case PM_CONTEXT_DEF_PARAMS:
- return token->type == PM_TOKEN_EOF;
- case PM_CONTEXT_DEFAULT_PARAMS:
- return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_PREEXE:
- case PM_CONTEXT_POSTEXE:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_MODULE:
- case PM_CONTEXT_CLASS:
- case PM_CONTEXT_SCLASS:
- case PM_CONTEXT_LAMBDA_DO_END:
- case PM_CONTEXT_DEF:
- case PM_CONTEXT_BLOCK_KEYWORDS:
- return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
- case PM_CONTEXT_WHILE:
- case PM_CONTEXT_UNTIL:
- case PM_CONTEXT_ELSE:
- case PM_CONTEXT_FOR:
- case PM_CONTEXT_ENSURE:
- case PM_CONTEXT_ENSURE_DEF:
- return token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_FOR_INDEX:
- return token->type == PM_TOKEN_KEYWORD_IN;
- case PM_CONTEXT_CASE_WHEN:
- return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_CASE_IN:
- return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_IF:
- case PM_CONTEXT_ELSIF:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_UNLESS:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_EMBEXPR:
- return token->type == PM_TOKEN_EMBEXPR_END;
- case PM_CONTEXT_BLOCK_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PARENS:
- return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_BEGIN:
- case PM_CONTEXT_RESCUE:
- case PM_CONTEXT_RESCUE_DEF:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_RESCUE_ELSE:
- case PM_CONTEXT_RESCUE_ELSE_DEF:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_LAMBDA_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PREDICATE:
- return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
- }
+static const uint32_t context_terminators[] = {
+ [PM_CONTEXT_NONE] = 0,
+ [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
+ [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
+ [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
+ [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
+ [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
+ [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
+};
- return false;
+static inline bool
+context_terminator(pm_context_t context, pm_token_t *token) {
+ return token->type < 32 && (context_terminators[context] & (1U << token->type));
}
-static bool
-context_recoverable(pm_parser_t *parser, pm_token_t *token) {
+/**
+ * Returns the context that the given token is found to be terminating, or
+ * returns PM_CONTEXT_NONE.
+ */
+static pm_context_t
+context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
- if (context_terminator(context_node->context, token)) return true;
+ if (context_terminator(context_node->context, token)) return context_node->context;
context_node = context_node->prev;
}
- return false;
+ return PM_CONTEXT_NONE;
}
static bool
context_push(pm_parser_t *parser, pm_context_t context) {
- pm_context_node_t *context_node = (pm_context_node_t *) malloc(sizeof(pm_context_node_t));
+ pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
if (context_node == NULL) return false;
*context_node = (pm_context_node_t) { .context = context, .prev = NULL };
@@ -6651,12 +7796,12 @@ context_push(pm_parser_t *parser, pm_context_t context) {
static void
context_pop(pm_parser_t *parser) {
pm_context_node_t *prev = parser->current_context->prev;
- free(parser->current_context);
+ xfree(parser->current_context);
parser->current_context = prev;
}
static bool
-context_p(pm_parser_t *parser, pm_context_t context) {
+context_p(const pm_parser_t *parser, pm_context_t context) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
@@ -6668,20 +7813,29 @@ context_p(pm_parser_t *parser, pm_context_t context) {
}
static bool
-context_def_p(pm_parser_t *parser) {
+context_def_p(const pm_parser_t *parser) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
switch (context_node->context) {
case PM_CONTEXT_DEF:
case PM_CONTEXT_DEF_PARAMS:
- case PM_CONTEXT_ENSURE_DEF:
- case PM_CONTEXT_RESCUE_DEF:
- case PM_CONTEXT_RESCUE_ELSE_DEF:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_DEF_ELSE:
return true;
case PM_CONTEXT_CLASS:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_CLASS_ELSE:
case PM_CONTEXT_MODULE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_MODULE_ELSE:
case PM_CONTEXT_SCLASS:
+ case PM_CONTEXT_SCLASS_ENSURE:
+ case PM_CONTEXT_SCLASS_RESCUE:
+ case PM_CONTEXT_SCLASS_ELSE:
return false;
default:
context_node = context_node->prev;
@@ -6691,14 +7845,85 @@ context_def_p(pm_parser_t *parser) {
return false;
}
+/**
+ * Returns a human readable string for the given context, used in error
+ * messages.
+ */
+static const char *
+context_human(pm_context_t context) {
+ switch (context) {
+ case PM_CONTEXT_NONE:
+ assert(false && "unreachable");
+ return "";
+ case PM_CONTEXT_BEGIN: return "begin statement";
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+ case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
+ case PM_CONTEXT_CLASS: return "class definition";
+ case PM_CONTEXT_DEF: return "method definition";
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
+ case PM_CONTEXT_DEFINED: return "'defined?' expression";
+ case PM_CONTEXT_ELSE:
+ case PM_CONTEXT_BEGIN_ELSE:
+ case PM_CONTEXT_BLOCK_ELSE:
+ case PM_CONTEXT_CLASS_ELSE:
+ case PM_CONTEXT_DEF_ELSE:
+ case PM_CONTEXT_LAMBDA_ELSE:
+ case PM_CONTEXT_MODULE_ELSE:
+ case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_LAMBDA_ENSURE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
+ case PM_CONTEXT_FOR: return "for loop";
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
+ case PM_CONTEXT_IF: return "if statement";
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
+ case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
+ case PM_CONTEXT_MAIN: return "top level context";
+ case PM_CONTEXT_MODULE: return "module definition";
+ case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
+ case PM_CONTEXT_PARENS: return "parentheses";
+ case PM_CONTEXT_POSTEXE: return "'END' block";
+ case PM_CONTEXT_PREDICATE: return "predicate";
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
+ case PM_CONTEXT_BEGIN_RESCUE:
+ case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_RESCUE_MODIFIER:
+ case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
+ case PM_CONTEXT_TERNARY: return "ternary expression";
+ case PM_CONTEXT_UNLESS: return "unless statement";
+ case PM_CONTEXT_UNTIL: return "until statement";
+ case PM_CONTEXT_WHILE: return "while statement";
+ }
+
+ assert(false && "unreachable");
+ return "";
+}
+
/******************************************************************************/
/* Specific token lexers */
/******************************************************************************/
-static void
-pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
+static inline void
+pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
if (invalid != NULL) {
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
}
}
@@ -6706,7 +7931,7 @@ static size_t
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -6714,7 +7939,7 @@ static size_t
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -6722,7 +7947,7 @@ static size_t
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -6730,7 +7955,7 @@ static size_t
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
- pm_strspn_number_validate(parser, invalid);
+ pm_strspn_number_validate(parser, string, length, invalid);
return length;
}
@@ -6746,26 +7971,33 @@ lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
type = PM_TOKEN_FLOAT;
} else {
- // If we had a . and then something else, then it's not a float suffix on
- // a number it's a method call or something else.
+ // If we had a . and then something else, then it's not a float
+ // suffix on a number it's a method call or something else.
return type;
}
}
// Here we're going to attempt to parse the optional exponent portion of a
// float. If it's not there, it's okay and we'll just continue on.
- if (match(parser, 'e') || match(parser, 'E')) {
- (void) (match(parser, '+') || match(parser, '-'));
- *seen_e = true;
+ if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
+ if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
+ parser->current.end += 2;
- if (pm_char_is_decimal_digit(peek(parser))) {
+ if (pm_char_is_decimal_digit(peek(parser))) {
+ parser->current.end++;
+ parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+ } else {
+ pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
+ }
+ } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
parser->current.end++;
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
- type = PM_TOKEN_FLOAT;
} else {
- pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
- type = PM_TOKEN_FLOAT;
+ return type;
}
+
+ *seen_e = true;
+ type = PM_TOKEN_FLOAT;
}
return type;
@@ -6785,6 +8017,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
if (pm_char_is_decimal_digit(peek(parser))) {
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
} else {
+ match(parser, '_');
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
}
@@ -6797,6 +8030,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
if (pm_char_is_binary_digit(peek(parser))) {
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
} else {
+ match(parser, '_');
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
}
@@ -6810,6 +8044,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
if (pm_char_is_octal_digit(peek(parser))) {
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
} else {
+ match(parser, '_');
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
}
@@ -6837,6 +8072,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
if (pm_char_is_hexadecimal_digit(peek(parser))) {
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
} else {
+ match(parser, '_');
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
}
@@ -6865,6 +8101,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
type = lex_optional_float_suffix(parser, seen_e);
}
+ // At this point we have a completed number, but we want to provide the user
+ // with a good experience if they put an additional .xxx fractional
+ // component on the end, so we'll check for that here.
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
+ const uint8_t *fraction_start = parser->current.end;
+ const uint8_t *fraction_end = parser->current.end + 2;
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+ }
+
return type;
}
@@ -6916,10 +8162,14 @@ lex_numeric(pm_parser_t *parser) {
static pm_token_type_t
lex_global_variable(pm_parser_t *parser) {
if (parser->current.end >= parser->end) {
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
+ pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
return PM_TOKEN_GLOBAL_VARIABLE;
}
+ // True if multiple characters are allowed after the declaration of the
+ // global variable. Not true when it starts with "$-".
+ bool allow_multiple = true;
+
switch (*parser->current.end) {
case '~': // $~: match-data
case '*': // $*: argv
@@ -6951,13 +8201,14 @@ lex_global_variable(pm_parser_t *parser) {
parser->current.end++;
size_t width;
- if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
+ if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
do {
parser->current.end += width;
- } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
+ } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
// $0 isn't allowed to be followed by anything.
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -6977,18 +8228,25 @@ lex_global_variable(pm_parser_t *parser) {
case '-':
parser->current.end++;
- /* fallthrough */
+ allow_multiple = false;
+ PRISM_FALLTHROUGH
default: {
size_t width;
- if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
+ if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
do {
parser->current.end += width;
- } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
+ } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
+ } else if (pm_char_is_whitespace(peek(parser))) {
+ // If we get here, then we have a $ followed by whitespace,
+ // which is not allowed.
+ pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
} else {
- // If we get here, then we have a $ followed by something that isn't
- // recognized as a global variable.
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
+ // If we get here, then we have a $ followed by something that
+ // isn't recognized as a global variable.
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -7043,11 +8301,11 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
bool encoding_changed = parser->encoding_changed;
if (encoding_changed) {
- while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
+ while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
current_end += width;
}
} else {
- while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
+ while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
current_end += width;
}
}
@@ -7205,7 +8463,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
return PM_TOKEN_STRING_CONTENT;
}
- // Now we'll check against the character the follows the #. If it constitutes
+ // Now we'll check against the character that follows the #. If it constitutes
// valid interplation, we'll handle that, otherwise we'll return
// PM_TOKEN_NOT_PROVIDED.
switch (pound[1]) {
@@ -7221,7 +8479,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
const uint8_t *variable = pound + 2;
if (*variable == '@' && pound + 3 < parser->end) variable++;
- if (char_is_identifier_start(parser, variable)) {
+ if (char_is_identifier_start(parser, variable, parser->end - variable)) {
// At this point we're sure that we've either hit an embedded instance
// or class variable. In this case we'll first need to check if we've
// already consumed content.
@@ -7237,7 +8495,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
return PM_TOKEN_EMBVAR;
}
- // If we didn't get an valid interpolation, then this is just regular
+ // If we didn't get a valid interpolation, then this is just regular
// string content. This is like if we get "#@-". In this case the caller
// should keep lexing.
parser->current.end = pound + 1;
@@ -7270,7 +8528,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// or a global name punctuation character, then we've hit an embedded
// global variable.
if (
- char_is_identifier_start(parser, check) ||
+ char_is_identifier_start(parser, check, parser->end - check) ||
(pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
) {
// In this case we've hit an embedded global variable. First check to
@@ -7359,12 +8617,24 @@ escape_hexadecimal_digit(const uint8_t value) {
* validated.
*/
static inline uint32_t
-escape_unicode(const uint8_t *string, size_t length) {
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
uint32_t value = 0;
for (size_t index = 0; index < length; index++) {
if (index != 0) value <<= 4;
value |= escape_hexadecimal_digit(string[index]);
}
+
+ // Here we're going to verify that the value is actually a valid Unicode
+ // codepoint and not a surrogate pair.
+ if (value >= 0xD800 && value <= 0xDFFF) {
+ if (error_location != NULL) {
+ pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
+ } else {
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
+ return 0xFFFD;
+ }
+
return value;
}
@@ -7373,7 +8643,7 @@ escape_unicode(const uint8_t *string, size_t length) {
*/
static inline uint8_t
escape_byte(uint8_t value, const uint8_t flags) {
- if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
+ if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
return value;
}
@@ -7394,21 +8664,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
}
- if (value <= 0x7F) { // 0xxxxxxx
- pm_buffer_append_byte(buffer, (uint8_t) value);
- } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
- pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
- } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
- pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
- } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
- pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
- } else {
+ if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
pm_buffer_append_byte(buffer, 0xEF);
pm_buffer_append_byte(buffer, 0xBF);
@@ -7449,88 +8705,122 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
* source so that the regular expression engine will perform its own unescaping.
*/
static inline void
-escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
+escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
+ }
- uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
- uint8_t byte2 = (uint8_t) (byte & 0xF);
+ escape_write_byte_encoded(parser, buffer, byte);
+}
- if (byte1 >= 0xA) {
- pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
- } else {
- pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
- }
+/**
+ * Write each byte of the given escaped character into the buffer.
+ */
+static inline void
+escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
+ size_t width;
+ if (parser->encoding_changed) {
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ } else {
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+ }
- if (byte2 >= 0xA) {
- pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
- } else {
- pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
- }
+ if (width == 1) {
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
+ } else if (width > 1) {
+ // Valid multibyte character. Just ignore escape.
+ pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
+ pm_buffer_append_bytes(b, parser->current.end, width);
+ parser->current.end += width;
} else {
- escape_write_byte_encoded(parser, buffer, byte);
+ // Assume the next character wasn't meant to be part of this escape
+ // sequence since it is invalid. Add an error and move on.
+ parser->current.end++;
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
}
}
/**
+ * Warn about using a space or a tab character in an escape, as opposed to using
+ * \\s or \\t. Note that we can quite copy the source because the warning
+ * message replaces \\c with \\C.
+ */
+static void
+escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
+#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
+
+ PM_PARSER_WARN_TOKEN_FORMAT(
+ parser,
+ parser->current,
+ PM_WARN_INVALID_CHARACTER,
+ FLAG(flags),
+ FLAG(flag),
+ type
+ );
+
+#undef FLAG
+}
+
+/**
* Read the value of an escape into the buffer.
*/
static void
-escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
- switch (peek(parser)) {
+escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
+ uint8_t peeked = peek(parser);
+ switch (peeked) {
case '\\': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
return;
}
case '\'': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
return;
}
case 'a': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
return;
}
case 'b': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
return;
}
case 'e': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
return;
}
case 'f': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
return;
}
case 'n': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
return;
}
case 'r': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
return;
}
case 's': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
return;
}
case 't': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
return;
}
case 'v': {
parser->current.end++;
- escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
return;
}
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
@@ -7547,7 +8837,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
}
- escape_write_byte_encoded(parser, buffer, value);
+ value = escape_byte(value, flags);
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
return;
}
case 'x': {
@@ -7566,11 +8857,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
parser->current.end++;
}
+ value = escape_byte(value, flags);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
- } else {
- escape_write_byte_encoded(parser, buffer, value);
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
+ pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
+ } else {
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ }
}
+
+ escape_write_byte_encoded(parser, buffer, value);
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
}
@@ -7581,27 +8877,27 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
const uint8_t *start = parser->current.end - 1;
parser->current.end++;
- if (
- (parser->current.end + 4 <= parser->end) &&
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
- pm_char_is_hexadecimal_digit(parser->current.end[3])
- ) {
- uint32_t value = escape_unicode(parser->current.end, 4);
-
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
- } else {
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
- }
-
- parser->current.end += 4;
+ if (parser->current.end == parser->end) {
+ const uint8_t *start = parser->current.end - 2;
+ PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
} else if (peek(parser) == '{') {
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
-
parser->current.end++;
- parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
+
+ size_t whitespace;
+ while (true) {
+ if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
+ parser->current.end += whitespace;
+ } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
+ // This is super hacky, but it gets us nicer error
+ // messages because we can still pass it off to the
+ // regular expression engine even if we hit an
+ // unterminated regular expression.
+ parser->current.end += 2;
+ } else {
+ break;
+ }
+ }
const uint8_t *extra_codepoints_start = NULL;
int codepoints_count = 0;
@@ -7615,7 +8911,17 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
} else if (hexadecimal_length == 0) {
// there are not hexadecimal characters
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
+
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // If this is a regular expression, we are going to
+ // let the regular expression engine handle this
+ // error instead of us.
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ } else {
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
+ pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ }
+
return;
}
@@ -7625,12 +8931,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
extra_codepoints_start = unicode_start;
}
- if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
- escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
- }
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
+ escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
- parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
}
// ?\u{nnnn} character literal should contain only one codepoint
@@ -7639,23 +8943,65 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
}
- if (peek(parser) == '}') {
+ if (parser->current.end == parser->end) {
+ PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
+ } else if (peek(parser) == '}') {
parser->current.end++;
} else {
- pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // If this is a regular expression, we are going to let
+ // the regular expression engine handle this error
+ // instead of us.
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ } else {
+ pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ }
}
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
+ pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
}
} else {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
+
+ if (length == 0) {
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ } else {
+ const uint8_t *start = parser->current.end - 2;
+ PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ }
+ } else if (length == 4) {
+ uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
+
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
+ }
+
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
+ parser->current.end += 4;
+ } else {
+ parser->current.end += length;
+
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // If this is a regular expression, we are going to let
+ // the regular expression engine handle this error
+ // instead of us.
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+ } else {
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
+ }
}
return;
}
case 'c': {
parser->current.end++;
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+ }
+
if (parser->current.end == parser->end) {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
@@ -7665,16 +9011,28 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+ parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
+
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ return;
+ case ' ':
+ parser->current.end++;
+ escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ return;
+ case '\t':
parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ escape_read_warn(parser, flags, 0, "\\t");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
default: {
if (!char_is_ascii_printable(peeked)) {
@@ -7683,15 +9041,20 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
}
case 'C': {
parser->current.end++;
+ if (flags & PM_ESCAPE_FLAG_CONTROL) {
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+ }
+
if (peek(parser) != '-') {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
@@ -7705,33 +9068,51 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
- if (flags & PM_ESCAPE_FLAG_CONTROL) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+ parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
+
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ return;
+ case ' ':
parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ return;
+ case '\t':
+ parser->current.end++;
+ escape_read_warn(parser, flags, 0, "\\t");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
default: {
if (!char_is_ascii_printable(peeked)) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
}
case 'M': {
parser->current.end++;
+ if (flags & PM_ESCAPE_FLAG_META) {
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
+ }
+
if (peek(parser) != '-') {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
return;
}
@@ -7742,24 +9123,38 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
uint8_t peeked = peek(parser);
- if (peeked == '\\') {
- if (flags & PM_ESCAPE_FLAG_META) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
+ switch (peeked) {
+ case '\\':
+ parser->current.end++;
+
+ if (match(parser, 'u') || match(parser, 'U')) {
+ pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ return;
+ }
+
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
return;
- }
- parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
- return;
- }
+ case ' ':
+ parser->current.end++;
+ escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+ return;
+ case '\t':
+ parser->current.end++;
+ escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+ return;
+ default:
+ if (!char_is_ascii_printable(peeked)) {
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ return;
+ }
- if (!char_is_ascii_printable(peeked)) {
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
- return;
+ parser->current.end++;
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+ return;
}
-
- parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
- return;
}
case '\r': {
if (peek_offset(parser, 1) == '\n') {
@@ -7767,11 +9162,18 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
return;
}
+ PRISM_FALLTHROUGH
}
- /* fallthrough */
default: {
+ if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ return;
+ }
if (parser->current.end < parser->end) {
- escape_write_byte_encoded(parser, buffer, *parser->current.end++);
+ escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
+ } else {
+ pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
}
return;
}
@@ -7829,7 +9231,7 @@ lex_question_mark(pm_parser_t *parser) {
pm_buffer_t buffer;
pm_buffer_init_capacity(&buffer, 3);
- escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
+ escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
return PM_TOKEN_CHARACTER_LITERAL;
@@ -7842,7 +9244,7 @@ lex_question_mark(pm_parser_t *parser) {
!(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
(
(parser->current.end + encoding_width >= parser->end) ||
- !char_is_identifier(parser, parser->current.end + encoding_width)
+ !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
)
) {
lex_state_set(parser, PM_LEX_STATE_END);
@@ -7862,18 +9264,26 @@ lex_question_mark(pm_parser_t *parser) {
static pm_token_type_t
lex_at_variable(pm_parser_t *parser) {
pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
- size_t width;
+ const uint8_t *end = parser->end;
- if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
+ size_t width;
+ if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
parser->current.end += width;
- while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
+ while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
parser->current.end += width;
}
- } else if (type == PM_TOKEN_CLASS_VARIABLE) {
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
+ } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
+ pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
+ }
+
+ size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
} else {
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
+ pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
+ pm_parser_err_token(parser, &parser->current, diag_id);
}
// If we're lexing an embedded variable, then we need to pop back into the
@@ -7900,7 +9310,7 @@ parser_lex_callback(pm_parser_t *parser) {
*/
static inline pm_comment_t *
parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
- pm_comment_t *comment = (pm_comment_t *) calloc(sizeof(pm_comment_t), 1);
+ pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
if (comment == NULL) return NULL;
*comment = (pm_comment_t) {
@@ -7935,15 +9345,23 @@ lex_embdoc(pm_parser_t *parser) {
pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
if (comment == NULL) return PM_TOKEN_EOF;
- // Now, loop until we find the end of the embedded documentation or the end of
- // the file.
+ // Now, loop until we find the end of the embedded documentation or the end
+ // of the file.
while (parser->current.end + 4 <= parser->end) {
parser->current.start = parser->current.end;
- // If we've hit the end of the embedded documentation then we'll return that
- // token here.
- if (memcmp(parser->current.end, "=end", 4) == 0 &&
- (parser->current.end + 4 == parser->end || pm_char_is_whitespace(parser->current.end[4]))) {
+ // If we've hit the end of the embedded documentation then we'll return
+ // that token here.
+ if (
+ (memcmp(parser->current.end, "=end", 4) == 0) &&
+ (
+ (parser->current.end + 4 == parser->end) || // end of file
+ pm_char_is_whitespace(parser->current.end[4]) || // whitespace
+ (parser->current.end[4] == '\0') || // NUL or end of script
+ (parser->current.end[4] == '\004') || // ^D
+ (parser->current.end[4] == '\032') // ^Z
+ )
+ ) {
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
@@ -8013,6 +9431,20 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
}
/**
+ * Returns true if the parser has lexed the last token on the current line.
+*/
+static bool
+parser_end_of_line_p(const pm_parser_t *parser) {
+ const uint8_t *cursor = parser->current.end;
+
+ while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
+ if (!pm_char_is_inline_whitespace(*cursor++)) return false;
+ }
+
+ return true;
+}
+
+/**
* When we're lexing certain types (strings, symbols, lists, etc.) we have
* string content associated with the tokens. For example:
*
@@ -8025,7 +9457,7 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
* "foo\n"
*
* then the bytes in the string are "f", "o", "o", "\", "n", but we want to
- * provide out consumers with the string content "f", "o", "o", "\n". In these
+ * provide our consumers with the string content "f", "o", "o", "\n". In these
* cases, when we find the first escape sequence, we initialize a pm_buffer_t
* to keep track of the string content. Then in the parser, it will
* automatically attach the string content to the node that it belongs to.
@@ -8045,23 +9477,98 @@ typedef struct {
} pm_token_buffer_t;
/**
+ * In order to properly set a regular expression's encoding and to validate
+ * the byte sequence for the underlying encoding we must process any escape
+ * sequences. The unescaped byte sequence will be stored in `buffer` just like
+ * for other string-like types. However, we also need to store the regular
+ * expression's source string. That string may be different from what we see
+ * during lexing because some escape sequences rewrite the source.
+ *
+ * This value will only be initialized for regular expressions and only if we
+ * receive an escape sequence. It will contain the regular expression's source
+ * string's byte sequence.
+ */
+typedef struct {
+ /** The embedded base buffer. */
+ pm_token_buffer_t base;
+
+ /** The buffer holding the regexp source. */
+ pm_buffer_t regexp_buffer;
+} pm_regexp_token_buffer_t;
+
+/**
* Push the given byte into the token buffer.
*/
static inline void
-pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
+pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->buffer, byte);
}
+static inline void
+pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
+ pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
+}
+
+/**
+ * Return the width of the character at the end of the current token.
+ */
+static inline size_t
+parser_char_width(const pm_parser_t *parser) {
+ size_t width;
+ if (parser->encoding_changed) {
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ } else {
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+ }
+
+ // TODO: If the character is invalid in the given encoding, then we'll just
+ // push one byte into the buffer. This should actually be an error.
+ return (width == 0 ? 1 : width);
+}
+
+/**
+ * Push an escaped character into the token buffer.
+ */
+static void
+pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
+ size_t width = parser_char_width(parser);
+ pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
+ parser->current.end += width;
+}
+
+static void
+pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
+ size_t width = parser_char_width(parser);
+ pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
+ parser->current.end += width;
+}
+
+static bool
+pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
+ for (size_t index = 0; index < length; index++) {
+ if (value[index] & 0x80) return false;
+ }
+
+ return true;
+}
+
/**
* When we're about to return from lexing the current token and we know for sure
* that we have found an escape sequence, this function is called to copy the
- *
* contents of the token buffer into the current string on the parser so that it
* can be attached to the correct node.
*/
static inline void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
- pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
+ pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
+}
+
+static inline void
+pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+ pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
+ parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
+ pm_buffer_free(&token_buffer->regexp_buffer);
}
/**
@@ -8070,7 +9577,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
* string. If we haven't pushed anything into the buffer, this means that we
* never found an escape sequence, so we can directly reference the bounds of
* the current string. Either way, at the return of this function it is expected
- *
* that parser->current_string is established in such a way that it can be
* attached to a node.
*/
@@ -8084,12 +9590,25 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
}
}
+static void
+pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+ if (token_buffer->base.cursor == NULL) {
+ pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+ parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
+ } else {
+ pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+ pm_regexp_token_buffer_copy(parser, token_buffer);
+ }
+}
+
+#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
+
/**
* When we've found an escape sequence, we need to copy everything up to this
* point into the buffer because we're about to provide a string that has
* different content than a direct slice of the source.
*
- *
* It is expected that the parser's current token end will be pointing at one
* byte past the backslash that starts the escape sequence.
*/
@@ -8097,16 +9616,39 @@ static void
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) {
- pm_buffer_init_capacity(&token_buffer->buffer, 16);
+ pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
start = parser->current.start;
} else {
start = token_buffer->cursor;
}
const uint8_t *end = parser->current.end - 1;
+ assert(end >= start);
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
+
+ token_buffer->cursor = end;
}
+static void
+pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+ const uint8_t *start;
+ if (token_buffer->base.cursor == NULL) {
+ pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ start = parser->current.start;
+ } else {
+ start = token_buffer->base.cursor;
+ }
+
+ const uint8_t *end = parser->current.end - 1;
+ pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
+
+ token_buffer->base.cursor = end;
+}
+
+#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
+
/**
* Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
* a tilde heredoc expands out tab characters to the nearest tab boundaries.
@@ -8162,9 +9704,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
}
- const uint8_t delimiter = *parser->current.end;
- parser->current.end += eol_length;
+ uint8_t delimiter = *parser->current.end;
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
+ // For example, %\r\nfoo\r\n should be "foo"
+ if (eol_length == 2) {
+ delimiter = *(parser->current.end + 1);
+ }
+
+ parser->current.end += eol_length;
return delimiter;
}
@@ -8238,6 +9786,7 @@ parser_lex(pm_parser_t *parser) {
if (match_eol_offset(parser, 1)) {
chomping = false;
} else {
+ pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
parser->current.end++;
space_seen = true;
}
@@ -8273,6 +9822,14 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
+ // If we hit EOF, but the EOF came immediately after a newline,
+ // set the start of the token to the newline. This way any EOF
+ // errors will be reported as happening on that line rather than
+ // a line after. For example "foo(\n" should report an error
+ // on line 1 even though EOF technically occurs on line 2.
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
+ parser->current.start -= 1;
+ }
LEX(PM_TOKEN_EOF);
}
@@ -8308,12 +9865,14 @@ parser_lex(pm_parser_t *parser) {
// pass and we're at the start of the file, then we need
// to do another pass to potentially find other patterns
// for encoding comments.
- if (length >= 10) parser_lex_magic_comment_encoding(parser);
+ if (length >= 10 && !parser->encoding_locked) {
+ parser_lex_magic_comment_encoding(parser);
+ }
}
lexed_comment = true;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '\r':
case '\n': {
parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -8355,7 +9914,7 @@ parser_lex(pm_parser_t *parser) {
parser->current.type = PM_TOKEN_NEWLINE;
return;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case PM_IGNORED_NEWLINE_ALL:
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
@@ -8392,14 +9951,37 @@ parser_lex(pm_parser_t *parser) {
following = next_newline(following, parser->end - following);
}
- // If the lex state was ignored, or we hit a '.' or a '&.',
- // we will lex the ignored newline
+ // If the lex state was ignored, we will lex the
+ // ignored newline.
+ if (lex_state_ignored_p(parser)) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+ // If we hit a '.' or a '&.' we will lex the ignored
+ // newline.
+ if (following && (
+ (peek_at(parser, following) == '.') ||
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+ )) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+
+ // If we are parsing as CRuby 4.0 or later and we
+ // hit a '&&' or a '||' then we will lex the ignored
+ // newline.
if (
- lex_state_ignored_p(parser) ||
- (following && (
- (peek_at(parser, following) == '.') ||
- (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
- ))
+ (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
+ following && (
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+ (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+ (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
+ (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
+ )
) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
@@ -8411,7 +9993,7 @@ parser_lex(pm_parser_t *parser) {
// we need to return the call operator.
if (next_content[0] == '.') {
// To match ripper, we need to emit an ignored newline even though
- // its a real newline in the case that we have a beginless range
+ // it's a real newline in the case that we have a beginless range
// on a subsequent line.
if (peek_at(parser, next_content + 1) == '.') {
if (!lexed_comment) parser_lex_ignored_newline(parser);
@@ -8439,6 +10021,63 @@ parser_lex(pm_parser_t *parser) {
parser->next_start = NULL;
LEX(PM_TOKEN_AMPERSAND_DOT);
}
+
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ // If we hit an && then we are in a logical chain
+ // and we need to return the logical operator.
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+ }
+
+ // If we hit a || then we are in a logical chain and
+ // we need to return the logical operator.
+ if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_PIPE_PIPE);
+ }
+
+ // If we hit an 'and' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'a' &&
+ peek_at(parser, next_content + 1) == 'n' &&
+ peek_at(parser, next_content + 2) == 'd' &&
+ !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 3;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_AND);
+ }
+
+ // If we hit a 'or' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'o' &&
+ peek_at(parser, next_content + 1) == 'r' &&
+ !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_OR);
+ }
+ }
}
// At this point we know this is a regular newline, and we can set the
@@ -8452,6 +10091,10 @@ parser_lex(pm_parser_t *parser) {
// ,
case ',':
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
LEX(PM_TOKEN_COMMA);
@@ -8569,8 +10212,13 @@ parser_lex(pm_parser_t *parser) {
pm_token_type_t type = PM_TOKEN_STAR_STAR;
- if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) {
+ if (lex_state_spcarg_p(parser, space_seen)) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
type = PM_TOKEN_USTAR_STAR;
+ } else if (lex_state_beg_p(parser)) {
+ type = PM_TOKEN_USTAR_STAR;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -8594,6 +10242,8 @@ parser_lex(pm_parser_t *parser) {
type = PM_TOKEN_USTAR;
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -8628,9 +10278,13 @@ parser_lex(pm_parser_t *parser) {
// = => =~ == === =begin
case '=':
- if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
+ if (
+ current_token_starts_line(parser) &&
+ (parser->current.end + 5 <= parser->end) &&
+ memcmp(parser->current.end, "begin", 5) == 0 &&
+ (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
+ ) {
pm_token_type_t type = lex_embdoc(parser);
-
if (type == PM_TOKEN_EOF) {
LEX(type);
}
@@ -8693,38 +10347,45 @@ parser_lex(pm_parser_t *parser) {
if (parser->current.end >= parser->end) {
parser->current.end = end;
- } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
+ } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
parser->current.end = end;
} else {
if (quote == PM_HEREDOC_QUOTE_NONE) {
parser->current.end += width;
- while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
+ while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
parser->current.end += width;
}
} else {
// If we have quotes, then we're going to go until we find the
// end quote.
while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
+ if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
parser->current.end++;
}
}
size_t ident_length = (size_t) (parser->current.end - ident_start);
+ bool ident_error = false;
+
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
- // TODO: handle unterminated heredoc
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+ ident_error = true;
}
parser->explicit_encoding = NULL;
lex_mode_push(parser, (pm_lex_mode_t) {
.mode = PM_LEX_HEREDOC,
.as.heredoc = {
- .ident_start = ident_start,
- .ident_length = ident_length,
+ .base = {
+ .ident_start = ident_start,
+ .ident_length = ident_length,
+ .quote = quote,
+ .indent = indent
+ },
.next_start = parser->current.end,
- .quote = quote,
- .indent = indent,
- .common_whitespace = (size_t) -1
+ .common_whitespace = NULL,
+ .line_continuation = false
}
});
@@ -8736,7 +10397,7 @@ parser_lex(pm_parser_t *parser) {
// this is not a valid heredoc declaration. In this case we
// will add an error, but we will still return a heredoc
// start.
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+ if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
body_start = parser->end;
} else {
// Otherwise, we want to indicate that the body of the
@@ -8759,6 +10420,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_LESS_LESS_EQUAL);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+ }
+
if (lex_state_operator_p(parser)) {
lex_state_set(parser, PM_LEX_STATE_ARG);
} else {
@@ -8867,8 +10532,22 @@ parser_lex(pm_parser_t *parser) {
}
pm_token_type_t type = PM_TOKEN_AMPERSAND;
- if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) {
+ if (lex_state_spcarg_p(parser, space_seen)) {
+ if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
+ } else {
+ const uint8_t delim = peek_offset(parser, 1);
+
+ if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
+ }
+ }
+
type = PM_TOKEN_UAMPERSAND;
+ } else if (lex_state_beg_p(parser)) {
+ type = PM_TOKEN_UAMPERSAND;
+ } else if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -8927,12 +10606,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_PLUS_EQUAL);
}
- bool spcarg = lex_state_spcarg_p(parser, space_seen);
- if (spcarg) {
- pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
- }
-
- if (lex_state_beg_p(parser) || spcarg) {
+ if (
+ lex_state_beg_p(parser) ||
+ (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
+ ) {
lex_state_set(parser, PM_LEX_STATE_BEG);
if (pm_char_is_decimal_digit(peek(parser))) {
@@ -8945,6 +10622,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_UPLUS);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG);
LEX(PM_TOKEN_PLUS);
}
@@ -8982,6 +10663,10 @@ parser_lex(pm_parser_t *parser) {
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG);
LEX(PM_TOKEN_MINUS);
}
@@ -8993,10 +10678,7 @@ parser_lex(pm_parser_t *parser) {
if (match(parser, '.')) {
if (match(parser, '.')) {
// If we're _not_ inside a range within default parameters
- if (
- !context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) &&
- context_p(parser, PM_CONTEXT_DEF_PARAMS)
- ) {
+ if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
if (lex_state_p(parser, PM_LEX_STATE_END)) {
lex_state_set(parser, PM_LEX_STATE_BEG);
} else {
@@ -9005,6 +10687,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_UDOT_DOT_DOT);
}
+ if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
+ }
+
lex_state_set(parser, PM_LEX_STATE_BEG);
LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
}
@@ -9076,6 +10762,10 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_REGEXP_BEGIN);
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+ }
+
if (lex_state_operator_p(parser)) {
lex_state_set(parser, PM_LEX_STATE_ARG);
} else {
@@ -9111,7 +10801,7 @@ parser_lex(pm_parser_t *parser) {
// operator because we don't want to move into the string
// lex mode unnecessarily.
if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
- pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
+ pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
LEX(PM_TOKEN_PERCENT);
}
@@ -9130,10 +10820,7 @@ parser_lex(pm_parser_t *parser) {
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
-
- if (parser->current.end < parser->end) {
- LEX(PM_TOKEN_STRING_BEGIN);
- }
+ LEX(PM_TOKEN_STRING_BEGIN);
}
// Delimiters for %-literals cannot be alphanumeric. We
@@ -9252,7 +10939,7 @@ parser_lex(pm_parser_t *parser) {
}
default:
// If we get to this point, then we have a % that is completely
- // unparseable. In this case we'll just drop it from the parser
+ // unparsable. In this case we'll just drop it from the parser
// and skip past it and hope that the next token is something
// that we can parse.
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
@@ -9260,6 +10947,10 @@ parser_lex(pm_parser_t *parser) {
}
}
+ if (ambiguous_operator_p(parser, space_seen)) {
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+ }
+
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
LEX(PM_TOKEN_PERCENT);
}
@@ -9285,13 +10976,50 @@ parser_lex(pm_parser_t *parser) {
default: {
if (*parser->current.start != '_') {
- size_t width = char_is_identifier_start(parser, parser->current.start);
+ size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
- // If this isn't the beginning of an identifier, then it's an invalid
- // token as we've exhausted all of the other options. We'll skip past
- // it and return the next token.
+ // If this isn't the beginning of an identifier, then
+ // it's an invalid token as we've exhausted all of the
+ // other options. We'll skip past it and return the next
+ // token after adding an appropriate error message.
if (!width) {
- pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
+ if (*parser->current.start >= 0x80) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
+ } else if (*parser->current.start == '\\') {
+ switch (peek_at(parser, parser->current.start + 1)) {
+ case ' ':
+ parser->current.end++;
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
+ break;
+ case '\f':
+ parser->current.end++;
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
+ break;
+ case '\t':
+ parser->current.end++;
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
+ break;
+ case '\v':
+ parser->current.end++;
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
+ break;
+ case '\r':
+ if (peek_at(parser, parser->current.start + 2) != '\n') {
+ parser->current.end++;
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
+ break;
+ }
+ PRISM_FALLTHROUGH
+ default:
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
+ break;
+ }
+ } else if (char_is_ascii_printable(*parser->current.start)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
+ }
+
goto lex_next_token;
}
@@ -9300,19 +11028,19 @@ parser_lex(pm_parser_t *parser) {
pm_token_type_t type = lex_identifier(parser, previous_command_start);
- // If we've hit a __END__ and it was at the start of the line or the
- // start of the file and it is followed by either a \n or a \r\n, then
- // this is the last token of the file.
+ // If we've hit a __END__ and it was at the start of the
+ // line or the start of the file and it is followed by
+ // either a \n or a \r\n, then this is the last token of the
+ // file.
if (
((parser->current.end - parser->current.start) == 7) &&
current_token_starts_line(parser) &&
(memcmp(parser->current.start, "__END__", 7) == 0) &&
(parser->current.end == parser->end || match_eol(parser))
- )
- {
- // Since we know we're about to add an __END__ comment, we know we
- // need at add all of the newlines to get the correct column
- // information for it.
+ ) {
+ // Since we know we're about to add an __END__ comment,
+ // we know we need to add all of the newlines to get the
+ // correct column information for it.
const uint8_t *cursor = parser->current.end;
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
pm_newline_list_append(&parser->newline_list, cursor++);
@@ -9398,19 +11126,13 @@ parser_lex(pm_parser_t *parser) {
// and then find the first one.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) {
- // If we hit a null byte, skip directly past it.
- if (*breakpoint == '\0') {
- breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
- continue;
- }
-
// If we hit whitespace, then we must have received content by
// now, so we can return an element of the list.
if (pm_char_is_whitespace(*breakpoint)) {
@@ -9426,7 +11148,7 @@ parser_lex(pm_parser_t *parser) {
// we need to continue on past it.
if (lex_mode->as.list.nesting > 0) {
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
lex_mode->as.list.nesting--;
continue;
}
@@ -9447,6 +11169,12 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_END);
}
+ // If we hit a null byte, skip directly past it.
+ if (*breakpoint == '\0') {
+ breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
+ continue;
+ }
+
// If we hit escapes, then we need to treat the next token
// literally. In this case we'll skip past the next character
// and find the next breakpoint.
@@ -9469,18 +11197,18 @@ parser_lex(pm_parser_t *parser) {
case '\t':
case '\v':
case '\\':
- pm_token_buffer_push(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
break;
case '\r':
parser->current.end++;
if (peek(parser) != '\n') {
- pm_token_buffer_push(&token_buffer, '\r');
+ pm_token_buffer_push_byte(&token_buffer, '\r');
break;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '\n':
- pm_token_buffer_push(&token_buffer, '\n');
+ pm_token_buffer_push_byte(&token_buffer, '\n');
if (parser->heredoc_end) {
// ... if we are on the same line as a heredoc,
@@ -9498,21 +11226,20 @@ parser_lex(pm_parser_t *parser) {
break;
default:
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
- pm_token_buffer_push(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.list.interpolation) {
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, peeked);
- parser->current.end++;
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_escaped(&token_buffer, parser);
}
break;
}
token_buffer.cursor = parser->current.end;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
continue;
}
@@ -9525,7 +11252,7 @@ parser_lex(pm_parser_t *parser) {
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
continue;
}
@@ -9540,7 +11267,7 @@ parser_lex(pm_parser_t *parser) {
// and find the next breakpoint.
assert(*breakpoint == lex_mode->as.list.incrementor);
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
lex_mode->as.list.nesting++;
continue;
}
@@ -9566,8 +11293,8 @@ parser_lex(pm_parser_t *parser) {
parser->next_start = NULL;
}
- // We'll check if we're at the end of the file. If we are, then we need to
- // return the EOF token.
+ // We'll check if we're at the end of the file. If we are, then we
+ // need to return the EOF token.
if (parser->current.end >= parser->end) {
LEX(PM_TOKEN_EOF);
}
@@ -9579,46 +11306,35 @@ parser_lex(pm_parser_t *parser) {
// regular expression. We'll use strpbrk to find the first of these
// characters.
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ pm_regexp_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) {
- // If we hit a null byte, skip directly past it.
- if (*breakpoint == '\0') {
- parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- continue;
- }
-
- // If we've hit a newline, then we need to track that in the
- // list of newlines.
- if (*breakpoint == '\n') {
- // For the special case of a newline-terminated regular expression, we will pass
- // through this branch twice -- once with PM_TOKEN_REGEXP_BEGIN and then again
- // with PM_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
- // tracking it only in the REGEXP_BEGIN case.
- if (
- !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != PM_TOKEN_REGEXP_BEGIN)
- && parser->heredoc_end == NULL
- ) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ uint8_t term = lex_mode->as.regexp.terminator;
+ bool is_terminator = (*breakpoint == term);
+
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
+ // For example: `%\nfoo\r\n`
+ // The string should be "foo", not "foo\r"
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+ if (term == '\n') {
+ is_terminator = true;
}
- if (lex_mode->as.regexp.terminator != '\n') {
- // If the terminator is not a newline, then we can set
- // the next breakpoint and continue.
- parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- continue;
+ // If the terminator is a CR, but we see a CRLF, we need to
+ // treat the CRLF as a newline, meaning this is _not_ the
+ // terminator
+ if (term == '\r') {
+ is_terminator = false;
}
}
// If we hit the terminator, we need to determine what kind of
// token to return.
- if (*breakpoint == lex_mode->as.regexp.terminator) {
+ if (is_terminator) {
if (lex_mode->as.regexp.nesting > 0) {
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
lex_mode->as.regexp.nesting--;
continue;
}
@@ -9628,13 +11344,27 @@ parser_lex(pm_parser_t *parser) {
// first.
if (breakpoint > parser->current.start) {
parser->current.end = breakpoint;
- pm_token_buffer_flush(parser, &token_buffer);
+ pm_regexp_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
+ // Check here if we need to track the newline.
+ size_t eol_length = match_eol_at(parser, breakpoint);
+ if (eol_length) {
+ parser->current.end = breakpoint + eol_length;
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ }
+ } else {
+ parser->current.end = breakpoint + 1;
+ }
+
// Since we've hit the terminator of the regular expression,
// we now need to parse the options.
- parser->current.end = breakpoint + 1;
parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
lex_mode_pop(parser);
@@ -9642,124 +11372,163 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_REGEXP_END);
}
- // If we hit escapes, then we need to treat the next token
- // literally. In this case we'll skip past the next character
+ // If we've hit the incrementor, then we need to skip past it
// and find the next breakpoint.
- if (*breakpoint == '\\') {
+ if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ lex_mode->as.regexp.nesting++;
+ continue;
+ }
- // If we've hit the end of the file, then break out of the
- // loop by setting the breakpoint to NULL.
- if (parser->current.end == parser->end) {
- breakpoint = NULL;
- continue;
- }
-
- pm_token_buffer_escape(parser, &token_buffer);
- uint8_t peeked = peek(parser);
+ switch (*breakpoint) {
+ case '\0':
+ // If we hit a null byte, skip directly past it.
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ break;
+ case '\r':
+ if (peek_at(parser, breakpoint + 1) != '\n') {
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ break;
+ }
- switch (peeked) {
- case '\r':
- parser->current.end++;
- if (peek(parser) != '\n') {
- if (lex_mode->as.regexp.terminator != '\r') {
- pm_token_buffer_push(&token_buffer, '\\');
- }
- pm_token_buffer_push(&token_buffer, '\r');
- break;
- }
- /* fallthrough */
- case '\n':
- if (parser->heredoc_end) {
- // ... if we are on the same line as a heredoc,
- // flush the heredoc and continue parsing after
- // heredoc_end.
- parser_flush_heredoc_end(parser);
- pm_token_buffer_copy(parser, &token_buffer);
- LEX(PM_TOKEN_STRING_CONTENT);
- } else {
- // ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
- }
+ breakpoint++;
+ parser->current.end = breakpoint;
+ pm_regexp_token_buffer_escape(parser, &token_buffer);
+ token_buffer.base.cursor = breakpoint;
- parser->current.end++;
+ PRISM_FALLTHROUGH
+ case '\n':
+ // If we've hit a newline, then we need to track that in
+ // the list of newlines.
+ if (parser->heredoc_end == NULL) {
+ pm_newline_list_append(&parser->newline_list, breakpoint);
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
break;
- case 'c':
- case 'C':
- case 'M':
- case 'u':
- case 'x':
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
+ }
+
+ parser->current.end = breakpoint + 1;
+ parser_flush_heredoc_end(parser);
+ pm_regexp_token_buffer_flush(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
+ case '\\': {
+ // If we hit escapes, then we need to treat the next
+ // token literally. In this case we'll skip past the
+ // next character and find the next breakpoint.
+ parser->current.end = breakpoint + 1;
+
+ // If we've hit the end of the file, then break out of
+ // the loop by setting the breakpoint to NULL.
+ if (parser->current.end == parser->end) {
+ breakpoint = NULL;
break;
- default:
- if (lex_mode->as.regexp.terminator == peeked) {
- // Some characters when they are used as the
- // terminator also receive an escape. They are
- // enumerated here.
- switch (peeked) {
- case '$': case ')': case '*': case '+':
- case '.': case '>': case '?': case ']':
- case '^': case '|': case '}':
- pm_token_buffer_push(&token_buffer, '\\');
- break;
- default:
- break;
+ }
+
+ pm_regexp_token_buffer_escape(parser, &token_buffer);
+ uint8_t peeked = peek(parser);
+
+ switch (peeked) {
+ case '\r':
+ parser->current.end++;
+ if (peek(parser) != '\n') {
+ if (lex_mode->as.regexp.terminator != '\r') {
+ pm_token_buffer_push_byte(&token_buffer.base, '\\');
+ }
+ pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
+ pm_token_buffer_push_byte(&token_buffer.base, '\r');
+ break;
+ }
+ PRISM_FALLTHROUGH
+ case '\n':
+ if (parser->heredoc_end) {
+ // ... if we are on the same line as a heredoc,
+ // flush the heredoc and continue parsing after
+ // heredoc_end.
+ parser_flush_heredoc_end(parser);
+ pm_regexp_token_buffer_copy(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
+ } else {
+ // ... else track the newline.
+ pm_newline_list_append(&parser->newline_list, parser->current.end);
}
- pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++;
break;
- }
-
- if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, peeked);
- parser->current.end++;
- break;
- }
+ case 'c':
+ case 'C':
+ case 'M':
+ case 'u':
+ case 'x':
+ escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
+ break;
+ default:
+ if (lex_mode->as.regexp.terminator == peeked) {
+ // Some characters when they are used as the
+ // terminator also receive an escape. They are
+ // enumerated here.
+ switch (peeked) {
+ case '$': case ')': case '*': case '+':
+ case '.': case '>': case '?': case ']':
+ case '^': case '|': case '}':
+ pm_token_buffer_push_byte(&token_buffer.base, '\\');
+ break;
+ default:
+ break;
+ }
- token_buffer.cursor = parser->current.end;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- continue;
- }
+ pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer.base, peeked);
+ parser->current.end++;
+ break;
+ }
- // If we hit a #, then we will attempt to lex interpolation.
- if (*breakpoint == '#') {
- pm_token_type_t type = lex_interpolation(parser, breakpoint);
+ if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
+ pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
+ break;
+ }
- if (type == PM_TOKEN_NOT_PROVIDED) {
- // If we haven't returned at this point then we had
- // something that looked like an interpolated class or
- // instance variable like "#@" but wasn't actually. In
- // this case we'll just skip to the next breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- continue;
+ token_buffer.base.cursor = parser->current.end;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ break;
}
+ case '#': {
+ // If we hit a #, then we will attempt to lex
+ // interpolation.
+ pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_STRING_CONTENT) {
- pm_token_buffer_flush(parser, &token_buffer);
- }
+ if (type == PM_TOKEN_NOT_PROVIDED) {
+ // If we haven't returned at this point then we had
+ // something that looked like an interpolated class or
+ // instance variable like "#@" but wasn't actually. In
+ // this case we'll just skip to the next breakpoint.
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ break;
+ }
- LEX(type);
- }
+ if (type == PM_TOKEN_STRING_CONTENT) {
+ pm_regexp_token_buffer_flush(parser, &token_buffer);
+ }
- // If we've hit the incrementor, then we need to skip past it
- // and find the next breakpoint.
- assert(*breakpoint == lex_mode->as.regexp.incrementor);
- parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- lex_mode->as.regexp.nesting++;
- continue;
+ LEX(type);
+ }
+ default:
+ assert(false && "unreachable");
+ break;
+ }
}
if (parser->current.end > parser->current.start) {
- pm_token_buffer_flush(parser, &token_buffer);
+ pm_regexp_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
// If we were unable to find a breakpoint, then this token hits the
// end of the file.
parser->current.end = parser->end;
- pm_token_buffer_flush(parser, &token_buffer);
+ pm_regexp_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
case PM_LEX_STRING: {
@@ -9782,11 +11551,11 @@ parser_lex(pm_parser_t *parser) {
// string. We'll use strpbrk to find the first of these characters.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
@@ -9794,19 +11563,38 @@ parser_lex(pm_parser_t *parser) {
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
lex_mode->as.string.nesting++;
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
continue;
}
+ uint8_t term = lex_mode->as.string.terminator;
+ bool is_terminator = (*breakpoint == term);
+
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
+ // For example: `%r\nfoo\r\n`
+ // The string should be /foo/, not /foo\r/
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+ if (term == '\n') {
+ is_terminator = true;
+ }
+
+ // If the terminator is a CR, but we see a CRLF, we need to
+ // treat the CRLF as a newline, meaning this is _not_ the
+ // terminator
+ if (term == '\r') {
+ is_terminator = false;
+ }
+ }
+
// Note that we have to check the terminator here first because we could
// potentially be parsing a % string that has a # character as the
// terminator.
- if (*breakpoint == lex_mode->as.string.terminator) {
+ if (is_terminator) {
// If this terminator doesn't actually close the string, then we need
// to continue on past it.
if (lex_mode->as.string.nesting > 0) {
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
lex_mode->as.string.nesting--;
continue;
}
@@ -9824,7 +11612,13 @@ parser_lex(pm_parser_t *parser) {
size_t eol_length = match_eol_at(parser, breakpoint);
if (eol_length) {
parser->current.end = breakpoint + eol_length;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ }
} else {
parser->current.end = breakpoint + 1;
}
@@ -9836,34 +11630,55 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_LABEL_END);
}
+ // When the delimiter itself is a newline, we won't
+ // get a chance to flush heredocs in the usual places since
+ // the newline is already consumed.
+ if (term == '\n' && parser->heredoc_end) {
+ parser_flush_heredoc_end(parser);
+ }
+
lex_state_set(parser, PM_LEX_STATE_END);
lex_mode_pop(parser);
LEX(PM_TOKEN_STRING_END);
}
- // When we hit a newline, we need to flush any potential heredocs. Note
- // that this has to happen after we check for the terminator in case the
- // terminator is a newline character.
- if (*breakpoint == '\n') {
- if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
- parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- continue;
- } else {
- parser->current.end = breakpoint + 1;
- parser_flush_heredoc_end(parser);
- pm_token_buffer_flush(parser, &token_buffer);
- LEX(PM_TOKEN_STRING_CONTENT);
- }
- }
-
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
+ case '\r':
+ if (peek_at(parser, breakpoint + 1) != '\n') {
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+ break;
+ }
+
+ // If we hit a \r\n sequence, then we need to treat it
+ // as a newline.
+ breakpoint++;
+ parser->current.end = breakpoint;
+ pm_token_buffer_escape(parser, &token_buffer);
+ token_buffer.cursor = breakpoint;
+
+ PRISM_FALLTHROUGH
+ case '\n':
+ // When we hit a newline, we need to flush any potential
+ // heredocs. Note that this has to happen after we check
+ // for the terminator in case the terminator is a
+ // newline character.
+ if (parser->heredoc_end == NULL) {
+ pm_newline_list_append(&parser->newline_list, breakpoint);
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+ break;
+ }
+
+ parser->current.end = breakpoint + 1;
+ parser_flush_heredoc_end(parser);
+ pm_token_buffer_flush(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
case '\\': {
// Here we hit escapes.
parser->current.end = breakpoint + 1;
@@ -9880,23 +11695,23 @@ parser_lex(pm_parser_t *parser) {
switch (peeked) {
case '\\':
- pm_token_buffer_push(&token_buffer, '\\');
+ pm_token_buffer_push_byte(&token_buffer, '\\');
parser->current.end++;
break;
case '\r':
parser->current.end++;
if (peek(parser) != '\n') {
if (!lex_mode->as.string.interpolation) {
- pm_token_buffer_push(&token_buffer, '\\');
+ pm_token_buffer_push_byte(&token_buffer, '\\');
}
- pm_token_buffer_push(&token_buffer, '\r');
+ pm_token_buffer_push_byte(&token_buffer, '\r');
break;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '\n':
if (!lex_mode->as.string.interpolation) {
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, '\n');
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_byte(&token_buffer, '\n');
}
if (parser->heredoc_end) {
@@ -9915,24 +11730,23 @@ parser_lex(pm_parser_t *parser) {
break;
default:
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
- pm_token_buffer_push(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
- pm_token_buffer_push(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.string.interpolation) {
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, peeked);
- parser->current.end++;
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_escaped(&token_buffer, parser);
}
break;
}
token_buffer.cursor = parser->current.end;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
}
case '#': {
@@ -9943,7 +11757,7 @@ parser_lex(pm_parser_t *parser) {
// looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next
// breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
}
@@ -9983,27 +11797,33 @@ parser_lex(pm_parser_t *parser) {
// Now let's grab the information about the identifier off of the
// current lex mode.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
+
+ bool line_continuation = lex_mode->as.heredoc.line_continuation;
+ lex_mode->as.heredoc.line_continuation = false;
// We'll check if we're at the end of the file. If we are, then we
// will add an error (because we weren't able to find the
// terminator) but still continue parsing so that content after the
// declaration of the heredoc can be parsed.
if (parser->current.end >= parser->end) {
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+ pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
parser->next_start = lex_mode->as.heredoc.next_start;
parser->heredoc_end = parser->current.end;
lex_state_set(parser, PM_LEX_STATE_END);
+ lex_mode_pop(parser);
LEX(PM_TOKEN_HEREDOC_END);
}
- const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
- size_t ident_length = lex_mode->as.heredoc.ident_length;
+ const uint8_t *ident_start = heredoc_lex_mode->ident_start;
+ size_t ident_length = heredoc_lex_mode->ident_length;
// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
if (current_token_starts_line(parser)) {
const uint8_t *start = parser->current.start;
- if (start + ident_length <= parser->end) {
+
+ if (!line_continuation && (start + ident_length <= parser->end)) {
const uint8_t *newline = next_newline(start, parser->end - start);
const uint8_t *ident_end = newline;
const uint8_t *terminator_end = newline;
@@ -10021,10 +11841,7 @@ parser_lex(pm_parser_t *parser) {
const uint8_t *terminator_start = ident_end - ident_length;
const uint8_t *cursor = start;
- if (
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
- ) {
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
cursor++;
}
@@ -10047,41 +11864,58 @@ parser_lex(pm_parser_t *parser) {
}
lex_state_set(parser, PM_LEX_STATE_END);
+ lex_mode_pop(parser);
LEX(PM_TOKEN_HEREDOC_END);
}
}
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
if (
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
- (lex_mode->as.heredoc.common_whitespace > whitespace) &&
+ heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
+ lex_mode->as.heredoc.common_whitespace != NULL &&
+ (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
peek_at(parser, start) != '\n'
) {
- lex_mode->as.heredoc.common_whitespace = whitespace;
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
}
}
// Otherwise we'll be parsing string content. These are the places
// where we need to split up the content of the heredoc. We'll use
// strpbrk to find the first of these characters.
- uint8_t breakpoints[] = "\n\\#";
+ uint8_t breakpoints[] = "\r\n\\#";
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
+ pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
- breakpoints[2] = '\0';
+ breakpoints[3] = '\0';
}
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
- bool was_escaped_newline = false;
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+ pm_token_buffer_t token_buffer = { 0 };
+ bool was_line_continuation = false;
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
+ case '\r':
+ parser->current.end = breakpoint + 1;
+
+ if (peek_at(parser, breakpoint + 1) != '\n') {
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+ break;
+ }
+
+ // If we hit a \r\n sequence, then we want to replace it
+ // with a single \n character in the final string.
+ breakpoint++;
+ pm_token_buffer_escape(parser, &token_buffer);
+ token_buffer.cursor = breakpoint;
+
+ PRISM_FALLTHROUGH
case '\n': {
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
parser_flush_heredoc_end(parser);
@@ -10096,7 +11930,7 @@ parser_lex(pm_parser_t *parser) {
// some leading whitespace.
const uint8_t *start = breakpoint + 1;
- if (!was_escaped_newline && (start + ident_length <= parser->end)) {
+ if (!was_line_continuation && (start + ident_length <= parser->end)) {
// We want to match the terminator starting from the end of the line in case
// there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
const uint8_t *newline = next_newline(start, parser->end - start);
@@ -10114,8 +11948,7 @@ parser_lex(pm_parser_t *parser) {
// leading whitespace if we have a - or ~ heredoc.
const uint8_t *cursor = start;
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
- lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
cursor++;
}
@@ -10131,37 +11964,33 @@ parser_lex(pm_parser_t *parser) {
}
}
- size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
+ size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
// If we have hit a newline that is followed by a valid
// terminator, then we need to return the content of the
// heredoc here as string content. Then, the next time a
// token is lexed, it will match again and return the
// end of the heredoc.
-
- if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
- if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
- lex_mode->as.heredoc.common_whitespace = whitespace;
+ if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
+ if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
+ *lex_mode->as.heredoc.common_whitespace = whitespace;
}
parser->current.end = breakpoint + 1;
-
- if (!was_escaped_newline) {
- pm_token_buffer_flush(parser, &token_buffer);
- LEX(PM_TOKEN_STRING_CONTENT);
- }
+ pm_token_buffer_flush(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
}
// Otherwise we hit a newline and it wasn't followed by
// a terminator, so we can continue parsing.
parser->current.end = breakpoint + 1;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
}
case '\\': {
// If we hit an escape, then we need to skip past
// however many characters the escape takes up. However
- // it's important that if \n or \r\n are escaped that we
+ // it's important that if \n or \r\n are escaped, we
// stop looping before the newline and not after the
// newline so that we can still potentially find the
// terminator of the heredoc.
@@ -10182,21 +12011,20 @@ parser_lex(pm_parser_t *parser) {
case '\r':
parser->current.end++;
if (peek(parser) != '\n') {
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, '\r');
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_byte(&token_buffer, '\r');
break;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '\n':
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, '\n');
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_byte(&token_buffer, '\n');
token_buffer.cursor = parser->current.end + 1;
breakpoint = parser->current.end;
continue;
default:
- parser->current.end++;
- pm_token_buffer_push(&token_buffer, '\\');
- pm_token_buffer_push(&token_buffer, peeked);
+ pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_escaped(&token_buffer, parser);
break;
}
} else {
@@ -10204,23 +12032,45 @@ parser_lex(pm_parser_t *parser) {
case '\r':
parser->current.end++;
if (peek(parser) != '\n') {
- pm_token_buffer_push(&token_buffer, '\r');
+ pm_token_buffer_push_byte(&token_buffer, '\r');
break;
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '\n':
- was_escaped_newline = true;
+ // If we are in a tilde here, we should
+ // break out of the loop and return the
+ // string content.
+ if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
+ const uint8_t *end = parser->current.end;
+
+ if (parser->heredoc_end == NULL) {
+ pm_newline_list_append(&parser->newline_list, end);
+ }
+
+ // Here we want the buffer to only
+ // include up to the backslash.
+ parser->current.end = breakpoint;
+ pm_token_buffer_flush(parser, &token_buffer);
+
+ // Now we can advance the end of the
+ // token past the newline.
+ parser->current.end = end + 1;
+ lex_mode->as.heredoc.line_continuation = true;
+ LEX(PM_TOKEN_STRING_CONTENT);
+ }
+
+ was_line_continuation = true;
token_buffer.cursor = parser->current.end + 1;
breakpoint = parser->current.end;
continue;
default:
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
break;
}
}
token_buffer.cursor = parser->current.end;
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
}
case '#': {
@@ -10232,7 +12082,7 @@ parser_lex(pm_parser_t *parser) {
// or instance variable like "#@" but wasn't
// actually. In this case we'll just skip to the
// next breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
}
@@ -10246,7 +12096,7 @@ parser_lex(pm_parser_t *parser) {
assert(false && "unreachable");
}
- was_escaped_newline = false;
+ was_line_continuation = false;
}
if (parser->current.end > parser->current.start) {
@@ -10283,8 +12133,8 @@ parser_lex(pm_parser_t *parser) {
typedef enum {
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
PM_BINDING_POWER_STATEMENT = 2,
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
PM_BINDING_POWER_COMPOSITION = 8, // and or
PM_BINDING_POWER_NOT = 10, // not
PM_BINDING_POWER_MATCH = 12, // => in
@@ -10338,15 +12188,15 @@ typedef struct {
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
+ // rescue
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
+
// if unless until while
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
- // rescue
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
-
// and or
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -10355,7 +12205,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
[PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
[PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
- // &&= &= ^= = >>= <<= -= %= |= += /= *= **=
+ // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
[PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
[PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
[PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
@@ -10419,7 +12269,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
[PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
[PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
[PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
- [PM_TOKEN_USTAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
+ [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
// -@
[PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
@@ -10481,22 +12331,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
}
/**
- * Returns true if the current token is any of the five given types.
- */
-static inline bool
-match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
-}
-
-/**
- * Returns true if the current token is any of the six given types.
- */
-static inline bool
-match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
-}
-
-/**
* Returns true if the current token is any of the seven given types.
*/
static inline bool
@@ -10541,19 +12375,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
}
/**
- * If the current token is any of the three given types, lex forward by one
- * token and return true. Otherwise return false.
- */
-static inline bool
-accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
- if (match3(parser, type1, type2, type3)) {
- parser_lex(parser);
- return true;
- }
- return false;
-}
-
-/**
* This function indicates that the parser expects a token in a specific
* position. For example, if you're parsing a BEGIN block, you know that a { is
* expected immediately after the keyword. In that case you would call this
@@ -10591,28 +12412,46 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
}
/**
- * This function is the same as expect2, but it expects one of three token types.
+ * A special expect1 that expects a heredoc terminator and handles popping the
+ * lex mode accordingly.
*/
static void
-expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
- if (accept3(parser, type1, type2, type3)) return;
+expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+ parser_lex(parser);
+ } else {
+ pm_parser_err_heredoc_term(parser, ident_start, ident_length);
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+}
- const uint8_t *location = parser->previous.end;
- pm_parser_err(parser, location, location, diag_id);
+/**
+ * A special expect1 that attaches the error to the opening token location
+ * rather than the current position. This is useful for errors about missing
+ * closing tokens, where we want to point to the line with the opening token
+ * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file.
+ */
+static void
+expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
+ if (accept1(parser, type)) return;
- parser->previous.start = location;
+ pm_parser_err(parser, opening->start, opening->end, diag_id);
+
+ parser->previous.start = parser->previous.end;
parser->previous.type = PM_TOKEN_MISSING;
}
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
/**
- * This is a wrapper of parse_expression, which also checks whether the resulting node is value expression.
+ * This is a wrapper of parse_expression, which also checks whether the
+ * resulting node is a value expression.
*/
static pm_node_t *
-parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
- pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, diag_id);
+parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
pm_assert_value_expression(parser, node);
return node;
}
@@ -10632,7 +12471,6 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bo
* CRuby parsers that are generated would resolve this by using a lookahead and
* potentially backtracking. We attempt to do this by just looking at the next
* token and making a decision based on that. I am not sure if this is going to
- *
* work in all cases, it may need to be refactored later. But it appears to work
* for now.
*/
@@ -10665,7 +12503,7 @@ token_begins_expression_p(pm_token_type_t type) {
case PM_TOKEN_SEMICOLON:
// The reason we need this short-circuit is because we're using the
// binding powers table to tell us if the subsequent token could
- // potentially be the start of an expression . If there _is_ a binding
+ // potentially be the start of an expression. If there _is_ a binding
// power for one of these tokens, then we should remove it from this list
// and let it be handled by the default case below.
assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
@@ -10697,14 +12535,89 @@ token_begins_expression_p(pm_token_type_t type) {
* prefixed by the * operator.
*/
static pm_node_t *
-parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
+parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
- return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ return UP(pm_splat_node_create(parser, &operator, expression));
+ }
+
+ return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
+}
+
+static bool
+pm_node_unreference_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ /* When we are about to destroy a set of nodes that could potentially
+ * contain block exits for the current scope, we need to check if they
+ * are contained in the list of block exits and remove them if they are.
+ */
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ case PM_REDO_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ size_t index = 0;
+
+ while (index < parser->current_block_exits->size) {
+ pm_node_t *block_exit = parser->current_block_exits->nodes[index];
+
+ if (block_exit == node) {
+ if (index + 1 < parser->current_block_exits->size) {
+ memmove(
+ &parser->current_block_exits->nodes[index],
+ &parser->current_block_exits->nodes[index + 1],
+ (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
+ );
+ }
+ parser->current_block_exits->size--;
+
+ /* Note returning true here because these nodes could have
+ * arguments that are themselves block exits. */
+ return true;
+ }
+
+ index++;
+ }
+
+ return true;
+ }
+ /* When an implicit local variable is written to or targeted, it becomes
+ * a regular, named local variable. This branch removes it from the list
+ * of implicit parameters when that happens. */
+ case PM_LOCAL_VARIABLE_READ_NODE:
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ if (implicit_parameters->nodes[index] == node) {
+ /* If the node is not the last one in the list, we need to
+ * shift the remaining nodes down to fill the gap. This is
+ * extremely unlikely to happen. */
+ if (index != implicit_parameters->size - 1) {
+ memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+ }
+
+ implicit_parameters->size--;
+ break;
+ }
+ }
+
+ return false;
+ }
+ default:
+ return true;
}
+}
- return parse_value_expression(parser, binding_power, accepts_command_call, diag_id);
+/**
+ * When we are about to destroy a set of nodes that could potentially be
+ * referenced by one or more lists on the parser, then remove them from those
+ * lists so we don't get a use-after-free.
+ */
+static void
+pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, pm_node_unreference_each, parser);
}
/**
@@ -10719,7 +12632,7 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
// append an =.
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
size_t length = constant->length;
- uint8_t *name = calloc(length + 1, sizeof(uint8_t));
+ uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
if (name == NULL) return;
memcpy(name, constant->start, length);
@@ -10732,62 +12645,137 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
}
/**
+ * Certain expressions are not targetable, but in order to provide a better
+ * experience we give a specific error message. In order to maintain as much
+ * information in the tree as possible, we replace them with local variable
+ * writes.
+ */
+static pm_node_t *
+parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
+ switch (PM_NODE_TYPE(target)) {
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
+ default: break;
+ }
+
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
+
+ pm_node_destroy(parser, target);
+ return UP(result);
+}
+
+/**
* Convert the given node into a valid target node.
+ *
+ * @param multiple Whether or not this target is part of a larger set of
+ * targets. If it is, then the &. operator is not allowed.
+ * @param splat Whether or not this target is a child of a splat target. If it
+ * is, then fewer patterns are allowed.
*/
static pm_node_t *
-parse_target(pm_parser_t *parser, pm_node_t *target) {
+parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
switch (PM_NODE_TYPE(target)) {
case PM_MISSING_NODE:
return target;
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_FALSE_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_NIL_NODE:
+ case PM_SELF_NODE:
+ case PM_TRUE_NODE: {
+ // In these special cases, we have specific error messages and we
+ // will replace them with local variable writes.
+ return parse_unwriteable_target(parser, target);
+ }
case PM_CLASS_VARIABLE_READ_NODE:
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
return target;
case PM_CONSTANT_PATH_NODE:
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
target->type = PM_CONSTANT_PATH_TARGET_NODE;
+
return target;
case PM_CONSTANT_READ_NODE:
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
target->type = PM_CONSTANT_TARGET_NODE;
+
return target;
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
return target;
case PM_GLOBAL_VARIABLE_READ_NODE:
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
return target;
- case PM_LOCAL_VARIABLE_READ_NODE:
+ case PM_LOCAL_VARIABLE_READ_NODE: {
if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
- PM_PARSER_ERR_NODE_FORMAT(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
- } else {
- assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
- target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
+ pm_node_unreference(parser, target);
}
+ const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
+ uint32_t name = cast->name;
+ uint32_t depth = cast->depth;
+ pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
+
+ assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
+ target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
+
return target;
+ }
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
+
+ pm_node_unreference(parser, target);
+ pm_node_destroy(parser, target);
+
+ return node;
+ }
case PM_INSTANCE_VARIABLE_READ_NODE:
assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
return target;
case PM_MULTI_TARGET_NODE:
+ if (splat_parent) {
+ // Multi target is not accepted in all positions. If this is one
+ // of them, then we need to add an error.
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+
return target;
case PM_SPLAT_NODE: {
pm_splat_node_t *splat = (pm_splat_node_t *) target;
if (splat->expression != NULL) {
- splat->expression = parse_target(parser, splat->expression);
+ splat->expression = parse_target(parser, splat->expression, multiple, true);
}
- return (pm_node_t *) splat;
+ return UP(splat);
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
// If we have no arguments to the call node and we need this to be a
- // target then this is either a method call or a local variable write.
+ // target then this is either a method call or a local variable
+ // write.
if (
(call->message_loc.start != NULL) &&
(call->message_loc.end[-1] != '!') &&
@@ -10806,36 +12794,32 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
- const pm_location_t message = call->message_loc;
+ const pm_location_t message_loc = call->message_loc;
- pm_parser_local_add_location(parser, message.start, message.end);
+ pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
pm_node_destroy(parser, target);
- uint32_t depth = 0;
- const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
- target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
-
- assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
- target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
-
- pm_refute_numbered_parameter(parser, message.start, message.end);
- return target;
+ return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
}
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
+ }
+
parse_write_name(parser, &call->name);
- return (pm_node_t *) pm_call_target_node_create(parser, call);
+ return UP(pm_call_target_node_create(parser, call));
}
}
// If there is no call operator and the message is "[]" then this is
// an aref expression, and we can transform it into an aset
// expression.
- if (pm_call_node_index_p(call)) {
- return (pm_node_t *) pm_index_target_node_create(parser, call);
+ if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
+ return UP(pm_index_target_node_create(parser, call));
}
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
default:
// In this case we have a node that we don't know how to convert
// into a target. We need to treat it as an error. For now, we'll
@@ -10850,10 +12834,11 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
* assignment.
*/
static pm_node_t *
-parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
- pm_node_t *result = parse_target(parser, target);
+parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
+ pm_node_t *result = parse_target(parser, target, multiple, false);
- // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
+ // parens after the targets.
if (
!match1(parser, PM_TOKEN_EQUAL) &&
!(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
@@ -10866,6 +12851,21 @@ parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
}
/**
+ * Potentially wrap a constant write node in a shareable constant node depending
+ * on the current state.
+ */
+static pm_node_t *
+parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
+ pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
+
+ if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
+ return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
+ }
+
+ return write;
+}
+
+/**
* Convert the given node into a valid write node.
*/
static pm_node_t *
@@ -10877,46 +12877,72 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
case PM_CLASS_VARIABLE_READ_NODE: {
pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
+ }
+ case PM_CONSTANT_PATH_NODE: {
+ pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
+
+ if (context_def_p(parser)) {
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
+ }
+
+ return parse_shareable_constant_write(parser, node);
}
- case PM_CONSTANT_PATH_NODE:
- return (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
case PM_CONSTANT_READ_NODE: {
- pm_constant_write_node_t *node = pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
+ pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
+
if (context_def_p(parser)) {
- pm_parser_err_node(parser, (pm_node_t *) node, PM_ERR_WRITE_TARGET_IN_METHOD);
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
}
+
pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return parse_shareable_constant_write(parser, node);
}
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
- /* fallthrough */
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
+ PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
- pm_constant_id_t constant_id = local_read->name;
+ pm_constant_id_t name = local_read->name;
+ pm_location_t name_loc = target->location;
+
uint32_t depth = local_read->depth;
+ pm_scope_t *scope = pm_parser_scope_find(parser, depth);
- pm_location_t name_loc = target->location;
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+ pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
+ PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
+ pm_node_unreference(parser, target);
+ }
+
+ pm_locals_unread(&scope->locals, name);
+ pm_node_destroy(parser, target);
+
+ return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
+ }
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
+
+ pm_node_unreference(parser, target);
pm_node_destroy(parser, target);
- return (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, depth, value, &name_loc, operator);
+ return node;
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
- pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
+ pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
pm_node_destroy(parser, target);
return write_node;
}
case PM_MULTI_TARGET_NODE:
- return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
+ return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
case PM_SPLAT_NODE: {
pm_splat_node_t *splat = (pm_splat_node_t *) target;
@@ -10925,9 +12951,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
}
pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
+ pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
- return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
+ return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
@@ -10955,17 +12981,17 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// =, so we know it's a local variable write.
const pm_location_t message = call->message_loc;
- pm_parser_local_add_location(parser, message.start, message.end);
+ pm_parser_local_add_location(parser, message.start, message.end, 0);
pm_node_destroy(parser, target);
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
- target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
+ target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
pm_refute_numbered_parameter(parser, message.start, message.end);
return target;
}
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
// When we get here, we have a method call, because it was
// previously marked as a method call but now we have an =. This
// looks like:
@@ -10981,17 +13007,19 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
pm_arguments_node_arguments_append(arguments, value);
call->base.location.end = arguments->base.location.end;
+ call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
parse_write_name(parser, &call->name);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
- return (pm_node_t *) call;
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+
+ return UP(call);
}
}
// If there is no call operator and the message is "[]" then this is
// an aref expression, and we can transform it into an aset
// expression.
- if (pm_call_node_index_p(call)) {
+ if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
if (call->arguments == NULL) {
call->arguments = pm_arguments_node_create(parser);
}
@@ -11001,18 +13029,29 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// Replace the name with "[]=".
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
+ call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
+
+ // Ensure that the arguments for []= don't contain keywords
+ pm_index_arguments_check(parser, call->arguments, call->block);
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+
return target;
}
- // If there are arguments on the call node, then it can't be a method
- // call ending with = or a local variable write, so it must be a
- // syntax error. In this case we'll fall through to our default
+ // If there are arguments on the call node, then it can't be a
+ // method call ending with = or a local variable write, so it must
+ // be a syntax error. In this case we'll fall through to our default
// handling. We need to free the value that we parsed because there
// is no way for us to attach it to the tree at this point.
+ //
+ // Since it is possible for the value to contain an implicit
+ // parameter somewhere in its subtree, we need to walk it and remove
+ // any implicit parameters from the list of implicit parameters for
+ // the current scope.
+ pm_node_unreference(parser, value);
pm_node_destroy(parser, value);
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
default:
// In this case we have a node that we don't know how to convert into a
// target. We need to treat it as an error. For now, we'll mark it as an
@@ -11023,6 +13062,32 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
}
/**
+ * Certain expressions are not writable, but in order to provide a better
+ * experience we give a specific error message. In order to maintain as much
+ * information in the tree as possible, we replace them with local variable
+ * writes.
+ */
+static pm_node_t *
+parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
+ switch (PM_NODE_TYPE(target)) {
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
+ case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
+ case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
+ case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
+ case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
+ case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
+ case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
+ default: break;
+ }
+
+ pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
+ pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
+
+ pm_node_destroy(parser, target);
+ return UP(result);
+}
+
+/**
* Parse a list of targets for assignment. This is used in the case of a for
* loop or a multi-assignment. For example, in the following code:
*
@@ -11033,11 +13098,11 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
* target node or a multi-target node.
*/
static pm_node_t *
-parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
+parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
while (accept1(parser, PM_TOKEN_COMMA)) {
if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -11052,28 +13117,35 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
- name = parse_target(parser, name);
+ name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_target(parser, name, true, true);
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
pm_multi_target_node_targets_append(parser, result, splat);
has_rest = true;
+ } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+ context_push(parser, PM_CONTEXT_MULTI_TARGET);
+ pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ target = parse_target(parser, target, true, false);
+
+ pm_multi_target_node_targets_append(parser, result, target);
+ context_pop(parser);
} else if (token_begins_expression_p(parser->current.type)) {
- pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
- target = parse_target(parser, target);
+ pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
} else if (!match1(parser, PM_TOKEN_EOF)) {
// If we get here, then we have a trailing , in a multi target node.
- // We'll set the implicit rest flag to indicate this.
- pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ // We'll add an implicit rest node to represent this.
+ pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, result, rest);
break;
}
}
- return (pm_node_t *) result;
+ return UP(result);
}
/**
@@ -11081,8 +13153,9 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
* assignment.
*/
static pm_node_t *
-parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
- pm_node_t *result = parse_targets(parser, first_target, binding_power);
+parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
+ pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
+ accept1(parser, PM_TOKEN_NEWLINE);
// Ensure that we have either an = or a ) after the targets.
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -11096,9 +13169,9 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
* Parse a list of statements separated by newlines or semicolons.
*/
static pm_statements_node_t *
-parse_statements(pm_parser_t *parser, pm_context_t context) {
- // First, skip past any optional terminators that might be at the beginning of
- // the statements.
+parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
+ // First, skip past any optional terminators that might be at the beginning
+ // of the statements.
while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
// If we have a terminator, then we can just return NULL.
@@ -11111,23 +13184,23 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
context_push(parser, context);
while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
- pm_statements_node_body_append(statements, node);
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_statements_node_body_append(parser, statements, node, true);
- // If we're recovering from a syntax error, then we need to stop parsing the
- // statements now.
+ // If we're recovering from a syntax error, then we need to stop parsing
+ // the statements now.
if (parser->recovering) {
- // If this is the level of context where the recovery has happened, then
- // we can mark the parser as done recovering.
+ // If this is the level of context where the recovery has happened,
+ // then we can mark the parser as done recovering.
if (context_terminator(context, &parser->current)) parser->recovering = false;
break;
}
- // If we have a terminator, then we will parse all consequtive terminators
- // and then continue parsing the statements list.
+ // If we have a terminator, then we will parse all consecutive
+ // terminators and then continue parsing the statements list.
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- // If we have a terminator, then we will continue parsing the statements
- // list.
+ // If we have a terminator, then we will continue parsing the
+ // statements list.
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
@@ -11135,40 +13208,112 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
continue;
}
- // At this point we have a list of statements that are not terminated by a
- // newline or semicolon. At this point we need to check if we're at the end
- // of the statements list. If we are, then we should break out of the loop.
+ // At this point we have a list of statements that are not terminated by
+ // a newline or semicolon. At this point we need to check if we're at
+ // the end of the statements list. If we are, then we should break out
+ // of the loop.
if (context_terminator(context, &parser->current)) break;
// At this point, we have a syntax error, because the statement was not
// terminated by a newline or semicolon, and we're not at the end of the
- // statements list. Ideally we should scan forward to determine if we should
- // insert a missing terminator or break out of parsing the statements list
- // at this point.
+ // statements list. Ideally we should scan forward to determine if we
+ // should insert a missing terminator or break out of parsing the
+ // statements list at this point.
//
- // We don't have that yet, so instead we'll do a more naive approach. If we
- // were unable to parse an expression, then we will skip past this token and
- // continue parsing the statements list. Otherwise we'll add an error and
- // continue parsing the statements list.
+ // We don't have that yet, so instead we'll do a more naive approach. If
+ // we were unable to parse an expression, then we will skip past this
+ // token and continue parsing the statements list. Otherwise we'll add
+ // an error and continue parsing the statements list.
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
parser_lex(parser);
+ // If we are at the end of the file, then we need to stop parsing
+ // the statements entirely at this point. Mark the parser as
+ // recovering, as we know that EOF closes the top-level context, and
+ // then break out of the loop.
+ if (match1(parser, PM_TOKEN_EOF)) {
+ parser->recovering = true;
+ break;
+ }
+
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
- } else {
- expect1(parser, PM_TOKEN_NEWLINE, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
+ } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
+ // This is an inlined version of accept1 because the error that we
+ // want to add has varargs. If this happens again, we should
+ // probably extract a helper function.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
}
}
context_pop(parser);
+ bool last_value = true;
+ switch (context) {
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_DEF_ENSURE:
+ last_value = false;
+ break;
+ default:
+ break;
+ }
+ pm_void_statements_check(parser, statements, last_value);
+
return statements;
}
/**
- * Parse all of the elements of a hash. returns true if a double splat was found.
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
+
+ if (duplicated != NULL) {
+ pm_buffer_t buffer = { 0 };
+ pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
+
+ pm_diagnostic_list_append_format(
+ &parser->warning_list,
+ duplicated->location.start,
+ duplicated->location.end,
+ PM_WARN_DUPLICATED_HASH_KEY,
+ (int) pm_buffer_length(&buffer),
+ pm_buffer_value(&buffer),
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+ );
+
+ pm_buffer_free(&buffer);
+ }
+}
+
+/**
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+ pm_node_t *previous;
+
+ if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
+ pm_diagnostic_list_append_format(
+ &parser->warning_list,
+ node->location.start,
+ node->location.end,
+ PM_WARN_DUPLICATED_WHEN_CLAUSE,
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
+ pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
+ );
+ }
+}
+
+/**
+ * Parse all of the elements of a hash. Return true if a double splat was found.
*/
static bool
-parse_assocs(pm_parser_t *parser, pm_node_t *node) {
+parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
bool contains_keyword_splat = false;
@@ -11181,13 +13326,20 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
pm_token_t operator = parser->previous;
pm_node_t *value = NULL;
- if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
- pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
+ if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
+ // If we're about to parse a nested hash that is being
+ // pushed into this hash directly with **, then we want the
+ // inner hash to share the static literals with the outer
+ // hash.
+ parser->current_hash_keys = literals;
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ } else if (token_begins_expression_p(parser->current.type)) {
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ } else {
+ pm_parser_scope_forwarding_keywords_check(parser, &operator);
}
- element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ element = UP(pm_assoc_splat_node_create(parser, value, &operator));
contains_keyword_splat = true;
break;
}
@@ -11195,38 +13347,54 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
pm_token_t label = parser->current;
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
+ pm_hash_key_static_literals_add(parser, literals, key);
+
pm_token_t operator = not_provided(parser);
pm_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
} else {
if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
- value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+ value = UP(pm_constant_read_node_create(parser, &constant));
} else {
- int depth = pm_parser_local_depth(parser, &((pm_token_t) { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }));
+ int depth = -1;
pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
+ if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
+ } else {
+ depth = pm_parser_local_depth(parser, &identifier);
+ }
+
if (depth == -1) {
- value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
+ value = UP(pm_call_node_variable_call_create(parser, &identifier));
} else {
- value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
+ value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
}
}
value->location.end++;
- value = (pm_node_t *) pm_implicit_node_create(parser, value);
+ value = UP(pm_implicit_node_create(parser, value));
}
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ element = UP(pm_assoc_node_create(parser, key, &operator, value));
break;
}
default: {
- pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
- pm_token_t operator;
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
+ // Hash keys that are strings are automatically frozen. We will
+ // mark that here.
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+ pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ pm_hash_key_static_literals_add(parser, literals, key);
+
+ pm_token_t operator;
if (pm_symbol_node_label_p(key)) {
operator = not_provided(parser);
} else {
@@ -11234,8 +13402,8 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
operator = parser->previous;
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ element = UP(pm_assoc_node_create(parser, key, &operator, value));
break;
}
}
@@ -11260,9 +13428,34 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
// Otherwise by default we will exit out of this loop.
break;
}
+
return contains_keyword_splat;
}
+static inline bool
+argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
+ if (pm_symbol_node_label_p(argument)) {
+ return true;
+ }
+
+ switch (PM_NODE_TYPE(argument)) {
+ case PM_CALL_NODE: {
+ pm_call_node_t *cast = (pm_call_node_t *) argument;
+ if (cast->opening_loc.start == NULL && cast->arguments != NULL) {
+ if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+ return false;
+ }
+ if (cast->block != NULL) {
+ return false;
+ }
+ }
+ break;
+ }
+ default: break;
+ }
+ return accept1(parser, PM_TOKEN_EQUAL_GREATER);
+}
+
/**
* Append an argument to a list of arguments.
*/
@@ -11279,11 +13472,11 @@ parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t
* Parse a list of arguments.
*/
static void
-parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator) {
+parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
- // First we need to check if the next token is one that could be the start of
- // an argument. If it's not, then we can just return.
+ // First we need to check if the next token is one that could be the start
+ // of an argument. If it's not, then we can just return.
if (
match2(parser, terminator, PM_TOKEN_EOF) ||
(binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
@@ -11298,9 +13491,6 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
bool parsed_forwarding_arguments = false;
while (!match1(parser, PM_TOKEN_EOF)) {
- if (parsed_block_argument) {
- pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
- }
if (parsed_forwarding_arguments) {
pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
}
@@ -11315,14 +13505,20 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
- argument = (pm_node_t *) hash;
+ argument = UP(hash);
+
+ pm_static_literals_t hash_keys = { 0 };
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
- bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
- parsed_bare_hash = true;
parse_arguments_append(parser, arguments, argument);
- if (contains_keyword_splat) {
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
- }
+
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), flags);
+
+ pm_static_literals_free(&hash_keys);
+ parsed_bare_hash = true;
+
break;
}
case PM_TOKEN_UAMPERSAND: {
@@ -11331,24 +13527,22 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_node_t *expression = NULL;
if (token_begins_expression_p(parser->current.type)) {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
} else {
- if (pm_parser_local_depth(parser, &operator) == -1) {
- // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
- pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
- if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
- }
- }
+ pm_parser_scope_forwarding_block_check(parser, &operator);
}
- argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
+ argument = UP(pm_block_argument_node_create(parser, &operator, expression));
if (parsed_block_argument) {
parse_arguments_append(parser, arguments, argument);
} else {
arguments->block = argument;
}
+ if (match1(parser, PM_TOKEN_COMMA)) {
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
+ }
+
parsed_block_argument = true;
break;
}
@@ -11357,19 +13551,19 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_token_t operator = parser->previous;
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
+ argument = UP(pm_splat_node_create(parser, &operator, NULL));
+ if (parsed_bare_hash) {
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
-
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
} else {
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
if (parsed_bare_hash) {
pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ argument = UP(pm_splat_node_create(parser, &operator, expression));
}
parse_arguments_append(parser, arguments, argument);
@@ -11380,35 +13574,46 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
parser_lex(parser);
if (token_begins_expression_p(parser->current.type)) {
- // If the token begins an expression then this ... was not actually
- // argument forwarding but was instead a range.
+ // If the token begins an expression then this ... was
+ // not actually argument forwarding but was instead a
+ // range.
pm_token_t operator = parser->previous;
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
- } else {
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+
+ // If we parse a range, we need to validate that we
+ // didn't accidentally violate the nonassoc rules of the
+ // ... operator.
+ if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
+ pm_range_node_t *range = (pm_range_node_t *) right;
+ pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
}
+
+ argument = UP(pm_range_node_create(parser, NULL, &operator, right));
+ } else {
+ pm_parser_scope_forwarding_all_check(parser, &parser->previous);
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
}
- argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
+ argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
parse_arguments_append(parser, arguments, argument);
+ pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
arguments->has_forwarding = true;
parsed_forwarding_arguments = true;
break;
}
}
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
default: {
if (argument == NULL) {
- argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
}
+ bool contains_keywords = false;
bool contains_keyword_splat = false;
- if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+
+ if (argument_allowed_for_bare_hash(parser, argument)){
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
}
@@ -11421,32 +13626,38 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
+ contains_keywords = true;
- // Finish parsing the one we are part way through
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
+ // Create the set of static literals for this hash.
+ pm_static_literals_t hash_keys = { 0 };
+ pm_hash_key_static_literals_add(parser, &hash_keys, argument);
+
+ // Finish parsing the one we are part way through.
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
- argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
pm_keyword_hash_node_elements_append(bare_hash, argument);
- argument = (pm_node_t *) bare_hash;
+ argument = UP(bare_hash);
// Then parse more if we have a comma
if (accept1(parser, PM_TOKEN_COMMA) && (
token_begins_expression_p(parser->current.type) ||
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
)) {
- contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
}
+ pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
- } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
- // TODO: Could we solve this with binding powers instead?
- pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
}
parse_arguments_append(parser, arguments, argument);
- if (contains_keyword_splat) {
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
- }
+
+ pm_node_flags_t flags = 0;
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), flags);
+
break;
}
}
@@ -11456,23 +13667,43 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
// If parsing the argument failed, we need to stop parsing arguments.
if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
- // If the terminator of these arguments is not EOF, then we have a specific
- // token we're looking for. In that case we can accept a newline here
- // because it is not functioning as a statement terminator.
- if (terminator != PM_TOKEN_EOF) accept1(parser, PM_TOKEN_NEWLINE);
+ // If the terminator of these arguments is not EOF, then we have a
+ // specific token we're looking for. In that case we can accept a
+ // newline here because it is not functioning as a statement terminator.
+ bool accepted_newline = false;
+ if (terminator != PM_TOKEN_EOF) {
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
+ }
if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
- // If we previously were on a comma and we just parsed a bare hash, then
- // we want to continue parsing arguments. This is because the comma was
- // grabbed up by the hash parser.
+ // If we previously were on a comma and we just parsed a bare hash,
+ // then we want to continue parsing arguments. This is because the
+ // comma was grabbed up by the hash parser.
+ } else if (accept1(parser, PM_TOKEN_COMMA)) {
+ // If there was a comma, then we need to check if we also accepted a
+ // newline. If we did, then this is a syntax error.
+ if (accepted_newline) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ }
+
+ // If this is a command call and an argument takes a block,
+ // there can be no further arguments. For example,
+ // `foo(bar 1 do end, 2)` should be rejected.
+ if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
+ pm_call_node_t *call = (pm_call_node_t *) argument;
+ if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ break;
+ }
+ }
} else {
- // If there is no comma at the end of the argument list then we're done
- // parsing arguments and can break out of this loop.
- if (!accept1(parser, PM_TOKEN_COMMA)) break;
+ // If there is no comma at the end of the argument list then we're
+ // done parsing arguments and can break out of this loop.
+ break;
}
- // If we hit the terminator, then that means we have a trailing comma so we
- // can accept that output as well.
+ // If we hit the terminator, then that means we have a trailing comma so
+ // we can accept that output as well.
if (match1(parser, terminator)) break;
}
}
@@ -11502,42 +13733,43 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
// commas, so here we'll assume this is a mistake of the user not
// knowing it's not allowed here.
if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, node, param);
pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
break;
}
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
- param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ param = UP(parse_required_destructured_parameter(parser));
} else if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t star = parser->previous;
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
pm_token_t name = parser->previous;
- value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ value = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(value);
}
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
}
- param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
+ param = UP(pm_splat_node_create(parser, &star, value));
} else {
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
pm_token_t name = parser->previous;
- param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ param = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(param);
}
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
}
pm_multi_target_node_targets_append(parser, node, param);
} while (accept1(parser, PM_TOKEN_COMMA));
+ accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
pm_multi_target_node_closing_set(node, &parser->previous);
@@ -11558,7 +13790,6 @@ typedef enum {
PM_PARAMETERS_ORDER_OPTIONAL,
PM_PARAMETERS_ORDER_NAMED,
PM_PARAMETERS_ORDER_NONE,
-
} pm_parameters_order_t;
/**
@@ -11583,31 +13814,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
* Check if current parameter follows valid parameters ordering. If not it adds
* an error to the list without stopping the parsing, otherwise sets the
* parameters state to the one corresponding to the current parameter.
+ *
+ * It returns true if it was successful, and false otherwise.
*/
-static void
+static bool
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
pm_parameters_order_t state = parameters_ordering[token->type];
- if (state == PM_PARAMETERS_NO_CHANGE) return;
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
// If we see another ordered argument after a optional argument
- // we only continue parsing ordered arguments until we stop seeing ordered arguments
+ // we only continue parsing ordered arguments until we stop seeing ordered arguments.
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
- return;
+ return true;
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
- return;
+ return true;
}
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
- }
-
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
+ return false;
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
+ return false;
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
// We know what transition we failed on, so we can provide a better error here.
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
- } else if (state < *current) {
- *current = state;
+ return false;
}
+
+ if (state < *current) *current = state;
+ return true;
}
/**
@@ -11619,19 +13856,23 @@ parse_parameters(
pm_binding_power_t binding_power,
bool uses_parentheses,
bool allows_trailing_comma,
- bool allows_forwarding_parameters
+ bool allows_forwarding_parameters,
+ bool accepts_blocks_in_defaults,
+ bool in_block,
+ uint16_t depth
) {
- pm_parameters_node_t *params = pm_parameters_node_create(parser);
- bool looping = true;
-
pm_do_loop_stack_push(parser, false);
+
+ pm_parameters_node_t *params = pm_parameters_node_create(parser);
pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
- do {
+ while (true) {
+ bool parsing = true;
+
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
update_parameter_state(parser, &parser->current, &order);
- pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ pm_node_t *param = UP(parse_required_destructured_parameter(parser));
if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_parameters_node_requireds_append(params, param);
@@ -11652,24 +13893,21 @@ parse_parameters(
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
name = parser->previous;
repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
} else {
name = not_provided(parser);
-
- if (allows_forwarding_parameters) {
- pm_parser_local_add_token(parser, &operator);
- }
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
}
pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
if (params->block == NULL) {
pm_parameters_node_block_set(params, param);
} else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
+ pm_parameters_node_posts_append(params, UP(param));
}
break;
@@ -11679,29 +13917,22 @@ parse_parameters(
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
}
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
- update_parameter_state(parser, &parser->current, &order);
- parser_lex(parser);
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
+ parser_lex(parser);
- if (allows_forwarding_parameters) {
- pm_parser_local_add_token(parser, &parser->previous);
- }
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
- if (params->keyword_rest != NULL) {
- // If we already have a keyword rest parameter, then we replace it with the
- // forwarding parameter and move the keyword rest parameter to the posts list.
- pm_node_t *keyword_rest = params->keyword_rest;
- pm_parameters_node_posts_append(params, keyword_rest);
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
- params->keyword_rest = NULL;
- }
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
- } else {
- update_parameter_state(parser, &parser->current, &order);
- parser_lex(parser);
+ if (params->keyword_rest != NULL) {
+ // If we already have a keyword rest parameter, then we replace it with the
+ // forwarding parameter and move the keyword rest parameter to the posts list.
+ pm_node_t *keyword_rest = params->keyword_rest;
+ pm_parameters_node_posts_append(params, keyword_rest);
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
+ params->keyword_rest = NULL;
}
+ pm_parameters_node_keyword_rest_set(params, UP(param));
break;
}
case PM_TOKEN_CLASS_VARIABLE:
@@ -11738,81 +13969,107 @@ parse_parameters(
pm_token_t name = parser->previous;
bool repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
- if (accept1(parser, PM_TOKEN_EQUAL)) {
- pm_token_t operator = parser->previous;
+ if (match1(parser, PM_TOKEN_EQUAL)) {
+ pm_token_t operator = parser->current;
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
+ parser_lex(parser);
+
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
- pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
+ if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
+ if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
+
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
pm_parameters_node_optionals_append(params, param);
- pm_parser_current_param_name_restore(parser, saved_param_name);
+ // If the value of the parameter increased the number of
+ // reads of that parameter, then we need to warn that we
+ // have a circular definition.
+ if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
+ }
+
context_pop(parser);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
// parameters entirely now.
if (parser->recovering) {
- looping = false;
+ parsing = false;
break;
}
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_requireds_append(params, (pm_node_t *) param);
+ pm_parameters_node_requireds_append(params, UP(param));
} else {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parameters_node_posts_append(params, UP(param));
}
break;
}
case PM_TOKEN_LABEL: {
- if (!uses_parentheses) parser->in_keyword_arg = true;
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
update_parameter_state(parser, &parser->current, &order);
+
+ context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
parser_lex(parser);
pm_token_t name = parser->previous;
pm_token_t local = name;
local.end -= 1;
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+ }
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
- pm_parser_local_add_token(parser, &local);
+ pm_parser_local_add_token(parser, &local, 1);
switch (parser->current.type) {
case PM_TOKEN_COMMA:
case PM_TOKEN_PARENTHESIS_RIGHT:
case PM_TOKEN_PIPE: {
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ context_pop(parser);
+
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
+
pm_parameters_node_keywords_append(params, param);
break;
}
case PM_TOKEN_SEMICOLON:
case PM_TOKEN_NEWLINE: {
+ context_pop(parser);
+
if (uses_parentheses) {
- looping = false;
+ parsing = false;
break;
}
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
+
pm_parameters_node_keywords_append(params, param);
break;
}
@@ -11820,30 +14077,35 @@ parse_parameters(
pm_node_t *param;
if (token_begins_expression_p(parser->current.type)) {
- context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
- pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
+ if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
+ pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
+ if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
- context_pop(parser);
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
+ }
- param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
+ param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
}
else {
- param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ param = UP(pm_required_keyword_parameter_node_create(parser, &name));
}
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
+
+ context_pop(parser);
pm_parameters_node_keywords_append(params, param);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
// parameters entirely now.
if (parser->recovering) {
- looping = false;
+ parsing = false;
break;
}
}
@@ -11860,22 +14122,21 @@ parse_parameters(
pm_token_t operator = parser->previous;
pm_token_t name;
bool repeated = false;
+
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
name = parser->previous;
repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
} else {
name = not_provided(parser);
-
- if (allows_forwarding_parameters) {
- pm_parser_local_add_token(parser, &operator);
- }
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
}
- pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
+ pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
+
if (params->rest == NULL) {
pm_parameters_node_rest_set(params, param);
} else {
@@ -11887,6 +14148,7 @@ parse_parameters(
}
case PM_TOKEN_STAR_STAR:
case PM_TOKEN_USTAR_STAR: {
+ pm_parameters_order_t previous_order = order;
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
@@ -11894,7 +14156,11 @@ parse_parameters(
pm_node_t *param;
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
- param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
+ }
+
+ param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
} else {
pm_token_t name;
@@ -11902,16 +14168,13 @@ parse_parameters(
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
name = parser->previous;
repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
} else {
name = not_provided(parser);
-
- if (allows_forwarding_parameters) {
- pm_parser_local_add_token(parser, &operator);
- }
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
}
- param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
+ param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -11928,36 +14191,53 @@ parse_parameters(
}
default:
if (parser->previous.type == PM_TOKEN_COMMA) {
- if (allows_trailing_comma) {
+ if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
// If we get here, then we have a trailing comma in a
// block parameter list.
- pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
if (params->rest == NULL) {
pm_parameters_node_rest_set(params, param);
} else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
+ pm_parameters_node_posts_append(params, UP(param));
}
} else {
pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
}
}
- looping = false;
+ parsing = false;
break;
}
- if (looping && uses_parentheses) {
- accept1(parser, PM_TOKEN_NEWLINE);
+ // If we hit some kind of issue while parsing the parameter, this would
+ // have been set to false. In that case, we need to break out of the
+ // loop.
+ if (!parsing) break;
+
+ bool accepted_newline = false;
+ if (uses_parentheses) {
+ accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
}
- } while (looping && accept1(parser, PM_TOKEN_COMMA));
+
+ if (accept1(parser, PM_TOKEN_COMMA)) {
+ // If there was a comma, but we also accepted a newline, then this
+ // is a syntax error.
+ if (accepted_newline) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ }
+ } else {
+ // If there was no comma, then we're done parsing parameters.
+ break;
+ }
+ }
pm_do_loop_stack_pop(parser);
// If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
if (params->base.location.start == params->base.location.end) {
- pm_node_destroy(parser, (pm_node_t *) params);
+ pm_node_destroy(parser, UP(params));
return NULL;
}
@@ -11965,14 +14245,128 @@ parse_parameters(
}
/**
+ * Accepts a parser returns the index of the last newline in the file that was
+ * ecorded before the current token within the newline list.
+ */
+static size_t
+token_newline_index(const pm_parser_t *parser) {
+ if (parser->heredoc_end == NULL) {
+ // This is the common case. In this case we can look at the previously
+ // recorded newline in the newline list and subtract from the current
+ // offset.
+ return parser->newline_list.size - 1;
+ } else {
+ // This is unlikely. This is the case that we have already parsed the
+ // start of a heredoc, so we cannot rely on looking at the previous
+ // offset of the newline list, and instead must go through the whole
+ // process of a binary search for the line number.
+ return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
+ }
+}
+
+/**
+ * Accepts a parser, a newline index, and a token and returns the column. The
+ * important piece of this is that it expands tabs out to the next tab stop.
+ */
+static int64_t
+token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
+ const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
+ const uint8_t *end = token->start;
+
+ // Skip over the BOM if it is present.
+ if (
+ newline_index == 0 &&
+ parser->start[0] == 0xef &&
+ parser->start[1] == 0xbb &&
+ parser->start[2] == 0xbf
+ ) cursor += 3;
+
+ int64_t column = 0;
+ for (; cursor < end; cursor++) {
+ switch (*cursor) {
+ case '\t':
+ column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
+ break;
+ case ' ':
+ column++;
+ break;
+ default:
+ column++;
+ if (break_on_non_space) return -1;
+ break;
+ }
+ }
+
+ return column;
+}
+
+/**
+ * Accepts a parser, two newline indices, and pointers to two tokens. This
+ * function warns if the indentation of the two tokens does not match.
+ */
+static void
+parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
+ // If these warnings are disabled (unlikely), then we can just return.
+ if (!parser->warn_mismatched_indentation) return;
+
+ // If the tokens are on the same line, we do not warn.
+ size_t closing_newline_index = token_newline_index(parser);
+ if (opening_newline_index == closing_newline_index) return;
+
+ // If the opening token has anything other than spaces or tabs before it,
+ // then we do not warn. This is unless we are matching up an `if`/`end` pair
+ // and the `if` immediately follows an `else` keyword.
+ int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
+ if (!if_after_else && (opening_column == -1)) return;
+
+ // Get a reference to the closing token off the current parser. This assumes
+ // that the caller has placed this in the correct position.
+ pm_token_t *closing_token = &parser->current;
+
+ // If the tokens are at the same indentation, we do not warn.
+ int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
+ if ((closing_column == -1) || (opening_column == closing_column)) return;
+
+ // If the closing column is greater than the opening column and we are
+ // allowing indentation, then we do not warn.
+ if (allow_indent && (closing_column > opening_column)) return;
+
+ // Otherwise, add a warning.
+ PM_PARSER_WARN_FORMAT(
+ parser,
+ closing_token->start,
+ closing_token->end,
+ PM_WARN_INDENTATION_MISMATCH,
+ (int) (closing_token->end - closing_token->start),
+ (const char *) closing_token->start,
+ (int) (opening_token->end - opening_token->start),
+ (const char *) opening_token->start,
+ ((int32_t) opening_newline_index) + parser->start_line
+ );
+}
+
+typedef enum {
+ PM_RESCUES_BEGIN = 1,
+ PM_RESCUES_BLOCK,
+ PM_RESCUES_CLASS,
+ PM_RESCUES_DEF,
+ PM_RESCUES_LAMBDA,
+ PM_RESCUES_MODULE,
+ PM_RESCUES_SCLASS
+} pm_rescues_type_t;
+
+/**
* Parse any number of rescue clauses. This will form a linked list of if
* nodes pointing to each other from the top.
*/
static inline void
-parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
+parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
pm_rescue_node_t *current = NULL;
- while (accept1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
+ while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
+ if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+ parser_lex(parser);
+
pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
switch (parser->current.type) {
@@ -11983,8 +14377,8 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
parser_lex(parser);
pm_rescue_node_operator_set(rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
- reference = parse_target(parser, reference);
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
break;
@@ -11992,8 +14386,8 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
case PM_TOKEN_NEWLINE:
case PM_TOKEN_SEMICOLON:
case PM_TOKEN_KEYWORD_THEN:
- // Here we have a terminator for the rescue keyword, in which case we're
- // going to just continue on.
+ // Here we have a terminator for the rescue keyword, in which
+ // case we're going to just continue on.
break;
default: {
if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
@@ -12001,7 +14395,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
// we'll attempt to parse it here and any others delimited by commas.
do {
- pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION);
+ pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
pm_rescue_node_exceptions_append(rescue, expression);
// If we hit a newline, then this is the end of the rescue expression. We
@@ -12013,8 +14407,8 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
pm_rescue_node_operator_set(rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
- reference = parse_target(parser, reference);
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
break;
@@ -12025,17 +14419,32 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
}
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- accept1(parser, PM_TOKEN_KEYWORD_THEN);
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+ }
} else {
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
+ rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
}
if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- pm_statements_node_t *statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_DEF : PM_CONTEXT_RESCUE);
- if (statements) {
- pm_rescue_node_statements_set(rescue, statements);
+ pm_context_t context;
+
+ switch (type) {
+ case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
+ case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
+ case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
+ case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
+ case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
+ case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
+ case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
}
+
+ pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
+ if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
+
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
@@ -12043,49 +14452,92 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
if (current == NULL) {
pm_begin_node_rescue_clause_set(parent_node, rescue);
} else {
- pm_rescue_node_consequent_set(current, rescue);
+ pm_rescue_node_subsequent_set(current, rescue);
}
current = rescue;
}
// The end node locations on rescue nodes will not be set correctly
- // since we won't know the end until we've found all consequent
- // clauses. This sets the end location on all rescues once we know it
- if (current) {
+ // since we won't know the end until we've found all subsequent
+ // clauses. This sets the end location on all rescues once we know it.
+ if (current != NULL) {
const uint8_t *end_to_set = current->base.location.end;
- current = parent_node->rescue_clause;
- while (current) {
- current->base.location.end = end_to_set;
- current = current->consequent;
+ pm_rescue_node_t *clause = parent_node->rescue_clause;
+
+ while (clause != NULL) {
+ clause->base.location.end = end_to_set;
+ clause = clause->subsequent;
}
}
- if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
- pm_token_t else_keyword = parser->previous;
+ pm_token_t else_keyword;
+ if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+ if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+ opening_newline_index = token_newline_index(parser);
+
+ else_keyword = parser->current;
+ opening = &else_keyword;
+
+ parser_lex(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
pm_statements_node_t *else_statements = NULL;
if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
pm_accepts_block_stack_push(parser, true);
- else_statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_ELSE_DEF : PM_CONTEXT_RESCUE_ELSE);
+ pm_context_t context;
+
+ switch (type) {
+ case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
+ case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
+ case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
+ case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
+ case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
+ case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
+ case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
+ }
+
+ else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
+
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
pm_begin_node_else_clause_set(parent_node, else_clause);
+
+ // If we don't have a `current` rescue node, then this is a dangling
+ // else, and it's an error.
+ if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
}
- if (accept1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
- pm_token_t ensure_keyword = parser->previous;
+ if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
+ if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+ pm_token_t ensure_keyword = parser->current;
+
+ parser_lex(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
pm_statements_node_t *ensure_statements = NULL;
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- ensure_statements = parse_statements(parser, def_p ? PM_CONTEXT_ENSURE_DEF : PM_CONTEXT_ENSURE);
+ pm_context_t context;
+
+ switch (type) {
+ case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
+ case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
+ case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
+ case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
+ case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
+ case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
+ case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
+ }
+
+ ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
+
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
@@ -12093,7 +14545,8 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
}
- if (parser->current.type == PM_TOKEN_KEYWORD_END) {
+ if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+ if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
pm_begin_node_end_keyword_set(parent_node, &parser->current);
} else {
pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
@@ -12101,28 +14554,19 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
}
}
-static inline pm_begin_node_t *
-parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
- pm_token_t no_begin_token = not_provided(parser);
- pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
- parse_rescues(parser, begin_node, def_p);
+/**
+ * Parse a set of rescue clauses with an implicit begin (for example when on a
+ * class, module, def, etc.).
+ */
+static pm_begin_node_t *
+parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
+ pm_token_t begin_keyword = not_provided(parser);
+ pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
- // All nodes within a begin node are optional, so we look
- // for the earliest possible node that we can use to set
- // the BeginNode's start location
- const uint8_t *start = begin_node->base.location.start;
- if (begin_node->statements) {
- start = begin_node->statements->base.location.start;
- } else if (begin_node->rescue_clause) {
- start = begin_node->rescue_clause->base.location.start;
- } else if (begin_node->else_clause) {
- start = begin_node->else_clause->base.location.start;
- } else if (begin_node->ensure_clause) {
- start = begin_node->ensure_clause->base.location.start;
- }
+ parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
+ node->base.location.start = start;
- begin_node->base.location.start = start;
- return begin_node;
+ return node;
}
/**
@@ -12133,61 +14577,197 @@ parse_block_parameters(
pm_parser_t *parser,
bool allows_trailing_comma,
const pm_token_t *opening,
- bool is_lambda_literal
+ bool is_lambda_literal,
+ bool accepts_blocks_in_defaults,
+ uint16_t depth
) {
pm_parameters_node_t *parameters = NULL;
if (!match1(parser, PM_TOKEN_SEMICOLON)) {
+ if (!is_lambda_literal) {
+ context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
+ }
parameters = parse_parameters(
parser,
is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
false,
allows_trailing_comma,
- false
+ false,
+ accepts_blocks_in_defaults,
+ true,
+ (uint16_t) (depth + 1)
);
+ if (!is_lambda_literal) {
+ context_pop(parser);
+ }
}
pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
- if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
- do {
- expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
- bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
- pm_parser_local_add_token(parser, &parser->previous);
+ if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
+ accept1(parser, PM_TOKEN_NEWLINE);
- pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
- if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)local);
- }
- pm_block_parameters_node_append_local(block_parameters, local);
- } while (accept1(parser, PM_TOKEN_COMMA));
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ do {
+ switch (parser->current.type) {
+ case PM_TOKEN_CONSTANT:
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+ parser_lex(parser);
+ break;
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
+ parser_lex(parser);
+ break;
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
+ parser_lex(parser);
+ break;
+ case PM_TOKEN_CLASS_VARIABLE:
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
+ parser_lex(parser);
+ break;
+ default:
+ expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
+ break;
+ }
+
+ bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
+ pm_parser_local_add_token(parser, &parser->previous, 1);
+
+ pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
+ if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
+
+ pm_block_parameters_node_append_local(block_parameters, local);
+ } while (accept1(parser, PM_TOKEN_COMMA));
+ }
}
return block_parameters;
}
/**
+ * Return true if any of the visible scopes to the current context are using
+ * numbered parameters.
+ */
+static bool
+outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+ if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
+ }
+
+ return false;
+}
+
+/**
+ * These are the names of the various numbered parameters. We have them here so
+ * that when we insert them into the constant pool we can use a constant string
+ * and not have to allocate.
+ */
+static const char * const pm_numbered_parameter_names[] = {
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
+};
+
+/**
+ * Return the node that should be used in the parameters field of a block-like
+ * (block or lambda) node, depending on the kind of parameters that were
+ * declared in the current scope.
+ */
+static pm_node_t *
+parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ // If we have ordinary parameters, then we will return them as the set of
+ // parameters.
+ if (parameters != NULL) {
+ // If we also have implicit parameters, then this is an error.
+ if (implicit_parameters->size > 0) {
+ pm_node_t *node = implicit_parameters->nodes[0];
+
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
+ } else {
+ assert(false && "unreachable");
+ }
+ }
+
+ return parameters;
+ }
+
+ // If we don't have any implicit parameters, then the set of parameters is
+ // NULL.
+ if (implicit_parameters->size == 0) {
+ return NULL;
+ }
+
+ // If we don't have ordinary parameters, then we now must validate our set
+ // of implicit parameters. We can only have numbered parameters or it, but
+ // they cannot be mixed.
+ uint8_t numbered_parameter = 0;
+ bool it_parameter = false;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ pm_node_t *node = implicit_parameters->nodes[index];
+
+ if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+ if (it_parameter) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
+ } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
+ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
+ } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+ } else {
+ assert(false && "unreachable");
+ }
+ } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ if (numbered_parameter > 0) {
+ pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
+ } else {
+ it_parameter = true;
+ }
+ }
+ }
+
+ if (numbered_parameter > 0) {
+ // Go through the parent scopes and mark them as being disallowed from
+ // using numbered parameters because this inner scope is using them.
+ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+ scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
+ }
+
+ const pm_location_t location = { .start = opening->start, .end = closing->end };
+ return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
+ }
+
+ if (it_parameter) {
+ return UP(pm_it_parameters_node_create(parser, opening, closing));
+ }
+
+ return NULL;
+}
+
+/**
* Parse a block.
*/
static pm_block_node_t *
-parse_block(pm_parser_t *parser) {
+parse_block(pm_parser_t *parser, uint16_t depth) {
pm_token_t opening = parser->previous;
accept1(parser, PM_TOKEN_NEWLINE);
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
pm_accepts_block_stack_push(parser, true);
pm_parser_scope_push(parser, false);
pm_block_parameters_node_t *block_parameters = NULL;
if (accept1(parser, PM_TOKEN_PIPE)) {
- parser->current_scope->explicit_params = true;
pm_token_t block_parameters_opening = parser->previous;
-
if (match1(parser, PM_TOKEN_PIPE)) {
block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
parser->command_start = true;
parser_lex(parser);
} else {
- block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
+ block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
parser->command_start = true;
expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
@@ -12196,52 +14776,40 @@ parse_block(pm_parser_t *parser) {
pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
}
- uint32_t locals_body_index = 0;
-
- if (block_parameters) {
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
- }
-
accept1(parser, PM_TOKEN_NEWLINE);
pm_node_t *statements = NULL;
if (opening.type == PM_TOKEN_BRACE_LEFT) {
if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES);
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
}
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
} else {
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS);
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
+ statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
}
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
}
- pm_node_t *parameters = (pm_node_t *) block_parameters;
- uint8_t maximum = parser->current_scope->numbered_parameters;
-
- if (parameters == NULL && (maximum > 0)) {
- parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
- locals_body_index = maximum;
- }
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
- pm_constant_id_list_t locals = parser->current_scope->locals;
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
- return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
+ return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
}
/**
@@ -12250,7 +14818,7 @@ parse_block(pm_parser_t *parser) {
* arguments, or blocks).
*/
static bool
-parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call) {
+parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
bool found = false;
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
@@ -12261,10 +14829,15 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
} else {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_ARGUMENT_TERM_PAREN);
- pm_accepts_block_stack_pop(parser);
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
+
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+ pm_accepts_block_stack_pop(parser);
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
}
} else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
@@ -12274,13 +14847,13 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
// If we get here, then the subsequent token cannot be used as an infix
// operator. In this case we assume the subsequent token is part of an
// argument to this method call.
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
// If we have done with the arguments and still not consumed the comma,
// then we have a trailing comma where we need to check whether it is
// allowed or not.
if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
- pm_parser_err_previous(parser, PM_ERR_EXPECT_ARGUMENT);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
}
pm_accepts_block_stack_pop(parser);
@@ -12294,31 +14867,26 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
found |= true;
- block = parse_block(parser);
+ block = parse_block(parser, (uint16_t) (depth + 1));
pm_arguments_validate_block(parser, arguments, block);
} else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
found |= true;
- block = parse_block(parser);
+ block = parse_block(parser, (uint16_t) (depth + 1));
}
if (block != NULL) {
if (arguments->block == NULL && !arguments->has_forwarding) {
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
} else {
- if (arguments->has_forwarding) {
- if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
- }
- } else {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
- }
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
+
if (arguments->block != NULL) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
}
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
}
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
}
}
}
@@ -12326,11 +14894,246 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
return found;
}
+/**
+ * Check that the return is allowed in the current context. If it isn't, add an
+ * error to the parser.
+ */
+static void
+parse_return(pm_parser_t *parser, pm_node_t *node) {
+ bool in_sclass = false;
+ for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
+ switch (context_node->context) {
+ case PM_CONTEXT_BEGIN_ELSE:
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_BEGIN_RESCUE:
+ case PM_CONTEXT_BEGIN:
+ case PM_CONTEXT_CASE_IN:
+ case PM_CONTEXT_CASE_WHEN:
+ case PM_CONTEXT_DEFAULT_PARAMS:
+ case PM_CONTEXT_DEFINED:
+ case PM_CONTEXT_ELSE:
+ case PM_CONTEXT_ELSIF:
+ case PM_CONTEXT_EMBEXPR:
+ case PM_CONTEXT_FOR_INDEX:
+ case PM_CONTEXT_FOR:
+ case PM_CONTEXT_IF:
+ case PM_CONTEXT_LOOP_PREDICATE:
+ case PM_CONTEXT_MAIN:
+ case PM_CONTEXT_MULTI_TARGET:
+ case PM_CONTEXT_PARENS:
+ case PM_CONTEXT_POSTEXE:
+ case PM_CONTEXT_PREDICATE:
+ case PM_CONTEXT_PREEXE:
+ case PM_CONTEXT_RESCUE_MODIFIER:
+ case PM_CONTEXT_TERNARY:
+ case PM_CONTEXT_UNLESS:
+ case PM_CONTEXT_UNTIL:
+ case PM_CONTEXT_WHILE:
+ // Keep iterating up the lists of contexts, because returns can
+ // see through these.
+ continue;
+ case PM_CONTEXT_SCLASS_ELSE:
+ case PM_CONTEXT_SCLASS_ENSURE:
+ case PM_CONTEXT_SCLASS_RESCUE:
+ case PM_CONTEXT_SCLASS:
+ in_sclass = true;
+ continue;
+ case PM_CONTEXT_CLASS_ELSE:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_CLASS:
+ case PM_CONTEXT_MODULE_ELSE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_MODULE:
+ // These contexts are invalid for a return.
+ pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
+ return;
+ case PM_CONTEXT_BLOCK_BRACES:
+ case PM_CONTEXT_BLOCK_ELSE:
+ case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
+ case PM_CONTEXT_DEF_ELSE:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_DEF_PARAMS:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_DEF:
+ case PM_CONTEXT_LAMBDA_BRACES:
+ case PM_CONTEXT_LAMBDA_DO_END:
+ case PM_CONTEXT_LAMBDA_ELSE:
+ case PM_CONTEXT_LAMBDA_ENSURE:
+ case PM_CONTEXT_LAMBDA_RESCUE:
+ // These contexts are valid for a return, and we should not
+ // continue to loop.
+ return;
+ case PM_CONTEXT_NONE:
+ // This case should never happen.
+ assert(false && "unreachable");
+ break;
+ }
+ }
+ if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
+ pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
+ }
+}
+
+/**
+ * Check that the block exit (next, break, redo) is allowed in the current
+ * context. If it isn't, add an error to the parser.
+ */
+static void
+parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
+ for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
+ switch (context_node->context) {
+ case PM_CONTEXT_BLOCK_BRACES:
+ case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_ELSE:
+ case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
+ case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_DEFINED:
+ case PM_CONTEXT_FOR:
+ case PM_CONTEXT_LAMBDA_BRACES:
+ case PM_CONTEXT_LAMBDA_DO_END:
+ case PM_CONTEXT_LAMBDA_ELSE:
+ case PM_CONTEXT_LAMBDA_ENSURE:
+ case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_LOOP_PREDICATE:
+ case PM_CONTEXT_POSTEXE:
+ case PM_CONTEXT_UNTIL:
+ case PM_CONTEXT_WHILE:
+ // These are the good cases. We're allowed to have a block exit
+ // in these contexts.
+ return;
+ case PM_CONTEXT_DEF:
+ case PM_CONTEXT_DEF_PARAMS:
+ case PM_CONTEXT_DEF_ELSE:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_MAIN:
+ case PM_CONTEXT_PREEXE:
+ case PM_CONTEXT_SCLASS:
+ case PM_CONTEXT_SCLASS_ELSE:
+ case PM_CONTEXT_SCLASS_ENSURE:
+ case PM_CONTEXT_SCLASS_RESCUE:
+ // These are the bad cases. We're not allowed to have a block
+ // exit in these contexts.
+ //
+ // If we get here, then we're about to mark this block exit
+ // as invalid. However, it could later _become_ valid if we
+ // find a trailing while/until on the expression. In this
+ // case instead of adding the error here, we'll add the
+ // block exit to the list of exits for the expression, and
+ // the node parsing will handle validating it instead.
+ assert(parser->current_block_exits != NULL);
+ pm_node_list_append(parser->current_block_exits, node);
+ return;
+ case PM_CONTEXT_BEGIN_ELSE:
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_BEGIN_RESCUE:
+ case PM_CONTEXT_BEGIN:
+ case PM_CONTEXT_CASE_IN:
+ case PM_CONTEXT_CASE_WHEN:
+ case PM_CONTEXT_CLASS_ELSE:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_CLASS:
+ case PM_CONTEXT_DEFAULT_PARAMS:
+ case PM_CONTEXT_ELSE:
+ case PM_CONTEXT_ELSIF:
+ case PM_CONTEXT_EMBEXPR:
+ case PM_CONTEXT_FOR_INDEX:
+ case PM_CONTEXT_IF:
+ case PM_CONTEXT_MODULE_ELSE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_MODULE:
+ case PM_CONTEXT_MULTI_TARGET:
+ case PM_CONTEXT_PARENS:
+ case PM_CONTEXT_PREDICATE:
+ case PM_CONTEXT_RESCUE_MODIFIER:
+ case PM_CONTEXT_TERNARY:
+ case PM_CONTEXT_UNLESS:
+ // In these contexts we should continue walking up the list of
+ // contexts.
+ break;
+ case PM_CONTEXT_NONE:
+ // This case should never happen.
+ assert(false && "unreachable");
+ break;
+ }
+ }
+}
+
+/**
+ * When we hit an expression that could contain block exits, we need to stash
+ * the previous set and create a new one.
+ */
+static pm_node_list_t *
+push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
+ pm_node_list_t *previous_block_exits = parser->current_block_exits;
+ parser->current_block_exits = current_block_exits;
+ return previous_block_exits;
+}
+
+/**
+ * If we did not match a trailing while/until and this was the last chance to do
+ * so, then all of the block exits in the list are invalid and we need to add an
+ * error for each of them.
+ */
+static void
+flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
+ pm_node_t *block_exit;
+ PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
+ const char *type;
+
+ switch (PM_NODE_TYPE(block_exit)) {
+ case PM_BREAK_NODE: type = "break"; break;
+ case PM_NEXT_NODE: type = "next"; break;
+ case PM_REDO_NODE: type = "redo"; break;
+ default: assert(false && "unreachable"); type = ""; break;
+ }
+
+ PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
+ }
+
+ parser->current_block_exits = previous_block_exits;
+}
+
+/**
+ * Pop the current level of block exits from the parser, and add errors to the
+ * parser if any of them are deemed to be invalid.
+ */
+static void
+pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
+ if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
+ // If we matched a trailing while/until, then all of the block exits in
+ // the contained list are valid. In this case we do not need to do
+ // anything.
+ parser->current_block_exits = previous_block_exits;
+ } else if (previous_block_exits != NULL) {
+ // If we did not matching a trailing while/until, then all of the block
+ // exits contained in the list are invalid for this specific context.
+ // However, they could still become valid in a higher level context if
+ // there is another list above this one. In this case we'll push all of
+ // the block exits up to the previous list.
+ pm_node_list_concat(previous_block_exits, parser->current_block_exits);
+ parser->current_block_exits = previous_block_exits;
+ } else {
+ // If we did not match a trailing while/until and this was the last
+ // chance to do so, then all of the block exits in the list are invalid
+ // and we need to add an error for each of them.
+ flush_block_exits(parser, previous_block_exits);
+ }
+}
+
static inline pm_node_t *
-parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
+parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
context_push(parser, PM_CONTEXT_PREDICATE);
pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, error_id);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
// Predicates are closed by a term, a "then", or a term and then a "then".
bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -12349,16 +15152,19 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
}
static inline pm_node_t *
-parse_conditional(pm_parser_t *parser, pm_context_t context) {
+parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
pm_token_t keyword = parser->previous;
pm_token_t then_keyword = not_provided(parser);
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword);
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
pm_statements_node_t *statements = NULL;
if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = parse_statements(parser, context);
+ statements = parse_statements(parser, context, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
@@ -12368,10 +15174,10 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
switch (context) {
case PM_CONTEXT_IF:
- parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+ parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
break;
case PM_CONTEXT_UNLESS:
- parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
+ parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
break;
default:
assert(false && "unreachable");
@@ -12383,48 +15189,59 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
// Parse any number of elsif clauses. This will form a linked list of if
// nodes pointing to each other from the top.
if (context == PM_CONTEXT_IF) {
- while (accept1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
- pm_token_t elsif_keyword = parser->previous;
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword);
+ while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
+ if (parser_end_of_line_p(parser)) {
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ }
+
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+ pm_token_t elsif_keyword = parser->current;
+ parser_lex(parser);
+
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
pm_accepts_block_stack_push(parser, true);
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF);
- pm_accepts_block_stack_pop(parser);
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
+ pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
- ((pm_if_node_t *) current)->consequent = elsif;
+ pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
+ ((pm_if_node_t *) current)->subsequent = elsif;
current = elsif;
}
}
if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+ opening_newline_index = token_newline_index(parser);
+
parser_lex(parser);
pm_token_t else_keyword = parser->previous;
pm_accepts_block_stack_push(parser, true);
- pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE);
+ pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
switch (context) {
case PM_CONTEXT_IF:
- ((pm_if_node_t *) current)->consequent = (pm_node_t *) else_node;
+ ((pm_if_node_t *) current)->subsequent = UP(else_node);
break;
case PM_CONTEXT_UNLESS:
- ((pm_unless_node_t *) parent)->consequent = else_node;
+ ((pm_unless_node_t *) parent)->else_clause = else_node;
break;
default:
assert(false && "unreachable");
break;
}
} else {
- // We should specialize this error message to refer to 'if' or 'unless' explicitly.
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
}
// Set the appropriate end location for all of the nodes in the subtree.
@@ -12437,7 +15254,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
switch (PM_NODE_TYPE(current)) {
case PM_IF_NODE:
pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
- current = ((pm_if_node_t *) current)->consequent;
+ current = ((pm_if_node_t *) current)->subsequent;
recursing = current != NULL;
break;
case PM_ELSE_NODE:
@@ -12460,6 +15277,9 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
break;
}
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
return parent;
}
@@ -12523,7 +15343,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
- case PM_NUMBERED_REFERENCE_READ_NODE
+ case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
// Assert here that the flags are the same so that we can safely switch the type
// of the node without having to move the flags.
@@ -12537,8 +15357,14 @@ static inline pm_node_flags_t
parse_unescaped_encoding(const pm_parser_t *parser) {
if (parser->explicit_encoding != NULL) {
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ // If the there's an explicit encoding and it's using a UTF-8 escape
+ // sequence, then mark the string as UTF-8.
return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ // If there's a non-UTF-8 escape sequence being used, then the
+ // string uses the source encoding, unless the source is marked as
+ // US-ASCII. In that case the string is forced as ASCII-8BIT in
+ // order to keep the string valid.
return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
}
}
@@ -12550,7 +15376,7 @@ parse_unescaped_encoding(const pm_parser_t *parser) {
* parsed as a string part, then NULL is returned.
*/
static pm_node_t *
-parse_string_part(pm_parser_t *parser) {
+parse_string_part(pm_parser_t *parser, uint16_t depth) {
switch (parser->current.type) {
// Here the lexer has returned to us plain string content. In this case
// we'll create a string node that has no opening or closing and return that
@@ -12562,7 +15388,7 @@ parse_string_part(pm_parser_t *parser) {
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
@@ -12575,6 +15401,10 @@ parse_string_part(pm_parser_t *parser) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^^^
case PM_TOKEN_EMBEXPR_BEGIN: {
+ // Ruby disallows seeing encoding around interpolation in strings,
+ // even though it is known at parse time.
+ parser->explicit_encoding = NULL;
+
pm_lex_state_t state = parser->lex_state;
int brace_nesting = parser->brace_nesting;
@@ -12587,7 +15417,7 @@ parse_string_part(pm_parser_t *parser) {
if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = parse_statements(parser, PM_CONTEXT_EMBEXPR);
+ statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
}
@@ -12597,7 +15427,14 @@ parse_string_part(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
pm_token_t closing = parser->previous;
- return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
+ // If this set of embedded statements only contains a single
+ // statement, then Ruby does not consider it as a possible statement
+ // that could emit a line event.
+ if (statements != NULL && statements->body.size == 1) {
+ pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
+ }
+
+ return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
}
// Here the lexer has returned the beginning of an embedded variable.
@@ -12607,6 +15444,10 @@ parse_string_part(pm_parser_t *parser) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^^
case PM_TOKEN_EMBVAR: {
+ // Ruby disallows seeing encoding around interpolation in strings,
+ // even though it is known at parse time.
+ parser->explicit_encoding = NULL;
+
lex_state_set(parser, PM_LEX_STATE_BEG);
parser_lex(parser);
@@ -12618,42 +15459,42 @@ parse_string_part(pm_parser_t *parser) {
// create a global variable read node.
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
break;
// In this case an nth reference is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
break;
// In this case a global variable is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
break;
// In this case an instance variable is being interpolated.
// We'll create an instance variable read node.
case PM_TOKEN_INSTANCE_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
break;
// In this case a class variable is being interpolated. We'll
// create a class variable read node.
case PM_TOKEN_CLASS_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
break;
// We can hit here if we got an invalid token. In that case
// we'll not attempt to lex this token and instead just return a
// missing node.
default:
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
- variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
break;
}
- return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
+ return UP(pm_embedded_variable_node_create(parser, &operator, variable));
}
default:
parser_lex(parser);
@@ -12667,26 +15508,32 @@ parse_string_part(pm_parser_t *parser) {
* automatically drop trailing `@` characters. This happens at the parser level,
* such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
*/
+static const uint8_t *
+parse_operator_symbol_name(const pm_token_t *name) {
+ switch (name->type) {
+ case PM_TOKEN_TILDE:
+ case PM_TOKEN_BANG:
+ if (name->end[-1] == '@') return name->end - 1;
+ PRISM_FALLTHROUGH
+ default:
+ return name->end;
+ }
+}
+
static pm_node_t *
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
pm_token_t closing = not_provided(parser);
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
- const uint8_t *end = parser->current.end;
- switch (parser->current.type) {
- case PM_TOKEN_TILDE:
- case PM_TOKEN_BANG:
- if (parser->current.end[-1] == '@') end--;
- break;
- default:
- break;
- }
+ const uint8_t *end = parse_operator_symbol_name(&parser->current);
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
parser_lex(parser);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
- return (pm_node_t *) symbol;
+ pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
+
+ return UP(symbol);
}
/**
@@ -12695,7 +15542,7 @@ parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_sta
* symbols.
*/
static pm_node_t *
-parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
+parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
const pm_token_t opening = parser->previous;
if (lex_mode->mode != PM_LEX_STRING) {
@@ -12724,7 +15571,9 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- return (pm_node_t *) symbol;
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+ return UP(symbol);
}
if (lex_mode->as.string.interpolation) {
@@ -12735,11 +15584,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
pm_token_t content = not_provided(parser);
pm_token_t closing = parser->previous;
- return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
+ return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
}
// Now we can parse the first part of the symbol.
- pm_node_t *part = parse_string_part(parser);
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
// If we got a string part, then it's possible that we could transform
// what looks like an interpolated symbol into a regular symbol.
@@ -12747,17 +15596,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
- return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
+ return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
}
- // Create a node_list first. We'll use this to check if it should be an
- // InterpolatedSymbolNode or a SymbolNode.
- pm_node_list_t node_list = { 0 };
- if (part) pm_node_list_append(&node_list, part);
+ pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
+ if (part) pm_interpolated_symbol_node_append(symbol, part);
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_node_list_append(&node_list, part);
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+ pm_interpolated_symbol_node_append(symbol, part);
}
}
@@ -12768,7 +15615,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
}
- return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
+ pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
+ return UP(symbol);
}
pm_token_t content;
@@ -12789,14 +15637,14 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
// In this case, the best way we have to represent this is as an
// interpolated string node, so that's what we'll do here.
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_node_list_t parts = { 0 };
+ pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
pm_token_t bounds = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
- pm_node_list_append(&parts, part);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
+ pm_interpolated_symbol_node_append(symbol, part);
- part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
- pm_node_list_append(&parts, part);
+ part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
+ pm_interpolated_symbol_node_append(symbol, part);
if (next_state != PM_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
@@ -12804,7 +15652,9 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
parser_lex(parser);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
- return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+
+ pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
+ return UP(symbol);
}
} else {
content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
@@ -12820,7 +15670,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
}
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+
+ return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
}
/**
@@ -12828,7 +15679,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
* constant, or an interpolated symbol.
*/
static inline pm_node_t *
-parse_undef_argument(pm_parser_t *parser) {
+parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
switch (parser->current.type) {
case PM_CASE_OPERATOR: {
const pm_token_t opening = not_provided(parser);
@@ -12845,17 +15696,19 @@ parse_undef_argument(pm_parser_t *parser) {
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- return (pm_node_t *) symbol;
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
- return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE);
+ return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
}
default:
pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
}
}
@@ -12866,7 +15719,7 @@ parse_undef_argument(pm_parser_t *parser) {
* between the first and second arguments.
*/
static inline pm_node_t *
-parse_alias_argument(pm_parser_t *parser, bool first) {
+parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
switch (parser->current.type) {
case PM_CASE_OPERATOR: {
const pm_token_t opening = not_provided(parser);
@@ -12884,95 +15737,70 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- return (pm_node_t *) symbol;
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
- return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
+ return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
}
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_back_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ return UP(pm_global_variable_read_node_create(parser, &parser->previous));
default:
pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
}
}
/**
- * Return true if any of the visible scopes to the current context are using
- * numbered parameters.
- */
-static bool
-outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
- for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
- if (scope->numbered_parameters) return true;
- }
-
- return false;
-}
-
-/**
* Parse an identifier into either a local variable read. If the local variable
* is not found, it returns NULL instead.
*/
-static pm_local_variable_read_node_t *
+static pm_node_t *
parse_variable(pm_parser_t *parser) {
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
- return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
- }
-
- if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
- // Now that we know we have a numbered parameter, we need to check
- // if it's allowed in this context. If it is, then we will create a
- // local variable read. If it's not, then we'll create a normal call
- // node but add an error.
- if (parser->current_scope->explicit_params) {
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
- } else {
- // Indicate that this scope is using numbered params so that child
- // scopes cannot.
- uint8_t number = parser->previous.start[1];
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
- // We subtract the value for the character '0' to get the actual
- // integer value of the number (only _1 through _9 are valid)
- uint8_t numbered_parameters = (uint8_t) (number - '0');
- if (numbered_parameters > parser->current_scope->numbered_parameters) {
- parser->current_scope->numbered_parameters = numbered_parameters;
- pm_parser_numbered_parameters_set(parser, numbered_parameters);
- }
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
+ return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
+ }
- // When you use a numbered parameter, it implies the existence
- // of all of the locals that exist before it. For example,
- // referencing _2 means that _1 must exist. Therefore here we
- // loop through all of the possibilities and add them into the
- // constant pool.
- uint8_t current = '1';
- uint8_t *value;
+ pm_scope_t *current_scope = parser->current_scope;
+ if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
+ if (is_numbered_param) {
+ // When you use a numbered parameter, it implies the existence of
+ // all of the locals that exist before it. For example, referencing
+ // _2 means that _1 must exist. Therefore here we loop through all
+ // of the possibilities and add them into the constant pool.
+ uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
+ for (uint8_t number = 1; number <= maximum; number++) {
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
+ }
- while (current < number) {
- value = malloc(2);
- value[0] = '_';
- value[1] = current++;
- pm_parser_local_add_owned(parser, value, 2);
+ if (!match1(parser, PM_TOKEN_EQUAL)) {
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
}
- // Now we can add the actual token that is being used. For
- // this one we can add a shared version since it is directly
- // referenced in the source.
- pm_parser_local_add_token(parser, &parser->previous);
- return pm_local_variable_read_node_create(parser, &parser->previous, 0);
+ pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
+ pm_node_list_append(&current_scope->implicit_parameters, node);
+
+ return node;
+ } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+ pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
+ pm_node_list_append(&current_scope->implicit_parameters, node);
+
+ return node;
}
}
@@ -12987,15 +15815,15 @@ parse_variable_call(pm_parser_t *parser) {
pm_node_flags_t flags = 0;
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
- pm_local_variable_read_node_t *node = parse_variable(parser);
- if (node != NULL) return (pm_node_t *) node;
+ pm_node_t *node = parse_variable(parser);
+ if (node != NULL) return node;
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
}
pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
- pm_node_flag_set((pm_node_t *)node, flags);
+ pm_node_flag_set(UP(node), flags);
- return (pm_node_t *) node;
+ return UP(node);
}
/**
@@ -13020,6 +15848,7 @@ parse_method_definition_name(pm_parser_t *parser) {
parser_lex(parser);
return parser->previous;
default:
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
}
}
@@ -13068,7 +15897,7 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
static void
parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
// The next node should be dedented if it's the first node in the list or if
- // if follows a string node.
+ // it follows a string node.
bool dedent_next = true;
// Iterate over all nodes, and trim whitespace accordingly. We're going to
@@ -13076,9 +15905,8 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
// the whitespace from a node, then we'll drop it from the list entirely.
size_t write_index = 0;
- for (size_t read_index = 0; read_index < nodes->size; read_index++) {
- pm_node_t *node = nodes->nodes[read_index];
-
+ pm_node_t *node;
+ PM_NODE_LIST_FOREACH(nodes, read_index, node) {
// We're not manipulating child nodes that aren't strings. In this case
// we'll skip past it and indicate that the subsequent node should not
// be dedented.
@@ -13106,22 +15934,274 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
nodes->size = write_index;
}
+/**
+ * Return a string content token at a particular location that is empty.
+ */
+static pm_token_t
+parse_strings_empty_content(const uint8_t *location) {
+ return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
+}
+
+/**
+ * Parse a set of strings that could be concatenated together.
+ */
+static inline pm_node_t *
+parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
+ assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
+ bool concating = false;
+
+ while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+ pm_node_t *node = NULL;
+
+ // Here we have found a string literal. We'll parse it and add it to
+ // the list of strings.
+ const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ assert(lex_mode->mode == PM_LEX_STRING);
+ bool lex_interpolation = lex_mode->as.string.interpolation;
+ bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
+
+ pm_token_t opening = parser->current;
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+ // If we get here, then we have an end immediately after a
+ // start. In that case we'll create an empty content token and
+ // return an uninterpolated string.
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
+ node = UP(string);
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ // If we get here, then we have an end of a label immediately
+ // after a start. In that case we'll create an empty symbol
+ // node.
+ pm_token_t content = parse_strings_empty_content(parser->previous.start);
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
+ node = UP(symbol);
+
+ if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+ } else if (!lex_interpolation) {
+ // If we don't accept interpolation then we expect the string to
+ // start with a single string content node.
+ pm_string_t unescaped;
+ pm_token_t content;
+
+ if (match1(parser, PM_TOKEN_EOF)) {
+ unescaped = PM_STRING_EMPTY;
+ content = not_provided(parser);
+ } else {
+ unescaped = parser->current_string;
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
+ content = parser->previous;
+ }
+
+ // It is unfortunately possible to have multiple string content
+ // nodes in a row in the case that there's heredoc content in
+ // the middle of the string, like this cursed example:
+ //
+ // <<-END+'b
+ // a
+ // END
+ // c'+'d'
+ //
+ // In that case we need to switch to an interpolated string to
+ // be able to contain all of the parts.
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_list_t parts = { 0 };
+
+ pm_token_t delimiters = not_provided(parser);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
+ pm_node_list_append(&parts, part);
+
+ do {
+ part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
+ pm_node_list_append(&parts, part);
+ parser_lex(parser);
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
+
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+
+ pm_node_list_free(&parts);
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
+ if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
+ }
+ } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // In this case we've hit string content so we know the string
+ // at least has something in it. We'll need to check if the
+ // following token is the end (in which case we can return a
+ // plain string) or if it's not then it has interpolation.
+ pm_token_t content = parser->current;
+ pm_string_t unescaped = parser->current_string;
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
+ pm_node_flag_set(node, parse_unescaped_encoding(parser));
+
+ // Kind of odd behavior, but basically if we have an
+ // unterminated string and it ends in a newline, we back up one
+ // character so that the error message is on the last line of
+ // content in the string.
+ if (!accept1(parser, PM_TOKEN_STRING_END)) {
+ const uint8_t *location = parser->previous.end;
+ if (location > parser->start && location[-1] == '\n') location--;
+ pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+ } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
+ if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+ } else {
+ // If we get here, then we have interpolation so we'll need
+ // to create a string or symbol node with interpolation.
+ pm_node_list_t parts = { 0 };
+ pm_token_t string_opening = not_provided(parser);
+ pm_token_t string_closing = not_provided(parser);
+
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
+ pm_node_flag_set(part, parse_unescaped_encoding(parser));
+ pm_node_list_append(&parts, part);
+
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+ pm_node_list_append(&parts, part);
+ }
+ }
+
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
+ if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+ }
+
+ pm_node_list_free(&parts);
+ }
+ } else {
+ // If we get here, then the first part of the string is not plain
+ // string content, in which case we need to parse the string as an
+ // interpolated string.
+ pm_node_list_t parts = { 0 };
+ pm_node_t *part;
+
+ while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+ pm_node_list_append(&parts, part);
+ }
+ }
+
+ if (accept1(parser, PM_TOKEN_LABEL_END)) {
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
+ if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+ } else if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+ }
+
+ pm_node_list_free(&parts);
+ }
+
+ if (current == NULL) {
+ // If the node we just parsed is a symbol node, then we can't
+ // concatenate it with anything else, so we can now return that
+ // node.
+ if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
+ return node;
+ }
+
+ // If we don't already have a node, then it's fine and we can just
+ // set the result to be the node we just parsed.
+ current = node;
+ } else {
+ // Otherwise we need to check the type of the node we just parsed.
+ // If it cannot be concatenated with the previous node, then we'll
+ // need to add a syntax error.
+ if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
+ pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
+ }
+
+ // If we haven't already created our container for concatenation,
+ // we'll do that now.
+ if (!concating) {
+ if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
+ }
+
+ concating = true;
+ pm_token_t bounds = not_provided(parser);
+
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
+ pm_interpolated_string_node_append(container, current);
+ current = UP(container);
+ }
+
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+ }
+ }
+
+ return current;
+}
+
+#define PM_PARSE_PATTERN_SINGLE 0
+#define PM_PARSE_PATTERN_TOP 1
+#define PM_PARSE_PATTERN_MULTI 2
+
static pm_node_t *
-parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id);
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
+
+/**
+ * Add the newly created local to the list of captures for this pattern matching
+ * expression. If it is duplicated from a previous local, then we'll need to add
+ * an error to the parser.
+ */
+static void
+parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
+ // Skip this capture if it starts with an underscore.
+ if (peek_at(parser, location->start) == '_') return;
+
+ if (pm_constant_id_list_includes(captures, capture)) {
+ pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+ } else {
+ pm_constant_id_list_append(captures, capture);
+ }
+}
/**
* Accept any number of constants joined by :: delimiters.
*/
static pm_node_t *
-parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
+parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
// Now, if there are any :: operators that follow, parse them as constant
// path nodes.
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- node = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+ node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If there is a [ or ( that follows, then this is part of a larger pattern
@@ -13140,19 +16220,21 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
+ accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
}
closing = parser->previous;
@@ -13161,7 +16243,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
- return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
+ return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
}
// Now that we have the inner pattern, check to see if it's an array, find,
@@ -13180,7 +16262,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -13196,7 +16278,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -13212,7 +16294,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -13226,25 +16308,37 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
// attach our constant to it.
pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
pm_array_pattern_node_requireds_append(pattern_node, inner);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
/**
* Parse a rest pattern.
*/
static pm_splat_node_t *
-parse_pattern_rest(pm_parser_t *parser) {
+parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
assert(parser->previous.type == PM_TOKEN_USTAR);
pm_token_t operator = parser->previous;
pm_node_t *name = NULL;
// Rest patterns don't necessarily have a name associated with them. So we
- // will check for that here. If they do, then we'll add it to the local table
- // since this pattern will cause it to become a local variable.
+ // will check for that here. If they do, then we'll add it to the local
+ // table since this pattern will cause it to become a local variable.
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
pm_token_t identifier = parser->previous;
- pm_parser_local_add_token(parser, &identifier);
- name = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier);
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+ pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
+ }
+
+ parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
+ name = UP(pm_local_variable_target_node_create(
+ parser,
+ &PM_LOCATION_TOKEN_VALUE(&identifier),
+ constant_id,
+ (uint32_t) (depth == -1 ? 0 : depth)
+ ));
}
// Finally we can return the created node.
@@ -13255,7 +16349,7 @@ parse_pattern_rest(pm_parser_t *parser) {
* Parse a keyword rest node.
*/
static pm_node_t *
-parse_pattern_keyword_rest(pm_parser_t *parser) {
+parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
assert(parser->current.type == PM_TOKEN_USTAR_STAR);
parser_lex(parser);
@@ -13263,65 +16357,170 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
- return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
}
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
- pm_parser_local_add_token(parser, &parser->previous);
- value = (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous);
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+ pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
+ }
+
+ parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+ value = UP(pm_local_variable_target_node_create(
+ parser,
+ &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ constant_id,
+ (uint32_t) (depth == -1 ? 0 : depth)
+ ));
+ }
+
+ return UP(pm_assoc_splat_node_create(parser, value, &operator));
+}
+
+/**
+ * Check that the slice of the source given by the bounds parameters constitutes
+ * a valid local variable name.
+ */
+static bool
+pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ ptrdiff_t length = end - start;
+ if (length == 0) return false;
+
+ // First ensure that it starts with a valid identifier starting character.
+ size_t width = char_is_identifier_start(parser, start, end - start);
+ if (width == 0) return false;
+
+ // Next, ensure that it's not an uppercase character.
+ if (parser->encoding_changed) {
+ if (parser->encoding->isupper_char(start, length)) return false;
+ } else {
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
+ }
+
+ // Next, iterate through all of the bytes of the string to ensure that they
+ // are all valid identifier characters.
+ const uint8_t *cursor = start + width;
+ while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
+ return cursor == end;
+}
+
+/**
+ * Create an implicit node for the value of a hash pattern that has omitted the
+ * value. This will use an implicit local variable target.
+ */
+static pm_node_t *
+parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
+
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+ int depth = -1;
+
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
+ depth = pm_parser_local_depth_constant_id(parser, constant_id);
+ } else {
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+ }
+ }
+
+ if (depth == -1) {
+ pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
}
- return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ parse_pattern_capture(parser, captures, constant_id, value_loc);
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
+ parser,
+ value_loc,
+ constant_id,
+ (uint32_t) (depth == -1 ? 0 : depth)
+ );
+
+ return UP(pm_implicit_node_create(parser, UP(target)));
+}
+
+/**
+ * Add a node to the list of keys for a hash pattern, and if it is a duplicate
+ * then add an error to the parser.
+ */
+static void
+parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
+ }
}
/**
* Parse a hash pattern.
*/
static pm_hash_pattern_node_t *
-parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
+parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
pm_node_list_t assocs = { 0 };
+ pm_static_literals_t keys = { 0 };
pm_node_t *rest = NULL;
- switch (PM_NODE_TYPE(first_assoc)) {
- case PM_ASSOC_NODE: {
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- // Here we have a value for the first assoc in the list, so we will
- // parse it now and update the first assoc.
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
+ switch (PM_NODE_TYPE(first_node)) {
+ case PM_ASSOC_SPLAT_NODE:
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
+ rest = first_node;
+ break;
+ case PM_SYMBOL_NODE: {
+ if (pm_symbol_node_label_p(first_node)) {
+ parse_pattern_hash_key(parser, &keys, first_node);
+ pm_node_t *value;
+
+ if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
+ // Otherwise, we will create an implicit local variable
+ // target for the value.
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+ } else {
+ // Here we have a value for the first assoc in the list, so
+ // we will parse it now.
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
+ }
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
- assoc->base.location.end = value->location.end;
- assoc->value = value;
- } else {
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
+ pm_token_t operator = not_provided(parser);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
- }
+ pm_node_list_append(&assocs, assoc);
+ break;
}
+ }
+ PRISM_FALLTHROUGH
+ default: {
+ // If we get anything else, then this is an error. For this we'll
+ // create a missing node for the value and create an assoc node for
+ // the first node in the list.
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
+ pm_parser_err_node(parser, first_node, diag_id);
+
+ pm_token_t operator = not_provided(parser);
+ pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
- pm_node_list_append(&assocs, first_assoc);
+ pm_node_list_append(&assocs, assoc);
break;
}
- case PM_ASSOC_SPLAT_NODE:
- case PM_NO_KEYWORDS_PARAMETER_NODE:
- rest = first_assoc;
- break;
- default:
- assert(false);
- break;
}
// If there are any other assocs, then we'll parse them now.
while (accept1(parser, PM_TOKEN_COMMA)) {
// Here we need to break to support trailing commas.
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
+ // Trailing commas are not allowed to follow a rest pattern.
+ if (rest != NULL) {
+ pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
+ }
+
break;
}
if (match1(parser, PM_TOKEN_USTAR_STAR)) {
- pm_node_t *assoc = parse_pattern_keyword_rest(parser);
+ pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
if (rest == NULL) {
rest = assoc;
@@ -13330,19 +16529,36 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
pm_node_list_append(&assocs, assoc);
}
} else {
- expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+ pm_node_t *key;
+
+ if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+ key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
+
+ if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+ } else if (!pm_symbol_node_label_p(key)) {
+ pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+ }
+ } else {
+ expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+ key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+ }
+
+ parse_pattern_hash_key(parser, &keys, key);
pm_node_t *value = NULL;
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
+ if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+ } else {
+ value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
+ }
} else {
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
}
pm_token_t operator = not_provided(parser);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
if (rest != NULL) {
pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
@@ -13353,8 +16569,9 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
}
pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
- free(assocs.nodes);
+ xfree(assocs.nodes);
+ pm_static_literals_free(&keys);
return node;
}
@@ -13362,18 +16579,25 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
* Parse a pattern expression primitive.
*/
static pm_node_t *
-parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER:
case PM_TOKEN_METHOD_NAME: {
parser_lex(parser);
- pm_token_t name = parser->previous;
- int depth = pm_parser_local_depth(parser, &name);
- if (depth < 0) {
- depth = 0;
- pm_parser_local_add_token(parser, &name);
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+ pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth);
+
+ parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+ return UP(pm_local_variable_target_node_create(
+ parser,
+ &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ constant_id,
+ (uint32_t) (depth == -1 ? 0 : depth)
+ ));
}
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
pm_token_t opening = parser->current;
@@ -13382,16 +16606,15 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
// If we have an empty array pattern, then we'll just return a new
// array pattern node.
- return (pm_node_t *)pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
+ return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
}
// Otherwise, we'll parse the inner pattern, then deal with it depending
// on the type it returns.
- pm_node_t *inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
-
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
pm_token_t closing = parser->previous;
switch (PM_NODE_TYPE(inner)) {
@@ -13404,7 +16627,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -13418,7 +16641,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -13429,7 +16652,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
pm_array_pattern_node_requireds_append(node, inner);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_BRACE_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -13444,48 +16667,32 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
// pattern node.
node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
} else {
- pm_node_t *first_assoc;
+ pm_node_t *first_node;
switch (parser->current.type) {
- case PM_TOKEN_LABEL: {
+ case PM_TOKEN_LABEL:
parser_lex(parser);
-
- pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
- pm_token_t operator = not_provided(parser);
-
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
+ first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
break;
- }
case PM_TOKEN_USTAR_STAR:
- first_assoc = parse_pattern_keyword_rest(parser);
+ first_node = parse_pattern_keyword_rest(parser, captures);
break;
- case PM_TOKEN_STRING_BEGIN: {
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
- pm_token_t operator = not_provided(parser);
-
- if (!pm_symbol_node_label_p(key)) {
- pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
- }
-
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
+ case PM_TOKEN_STRING_BEGIN:
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
break;
- }
default: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
parser_lex(parser);
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
-
- pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
- pm_token_t operator = not_provided(parser);
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
+ first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
break;
}
}
- node = parse_pattern_hash(parser, first_assoc);
+ node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
pm_token_t closing = parser->previous;
node->base.location.start = opening.start;
@@ -13496,7 +16703,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
}
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT: {
@@ -13507,18 +16714,31 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
// expression as the right side of the range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
default: {
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
}
}
case PM_CASE_PRIMITIVE: {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, diag_id);
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
+
+ // If we found a label, we need to immediately return to the caller.
+ if (pm_symbol_node_label_p(node)) return node;
+
+ // Call nodes (arithmetic operations) are not allowed in patterns
+ if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
+ pm_parser_err_node(parser, node, diag_id);
+ pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
+
+ pm_node_unreference(parser, node);
+ pm_node_destroy(parser, node);
+ return UP(missing_node);
+ }
// Now that we have a primitive, we need to check if it's part of a range.
if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -13529,11 +16749,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
// node. Otherwise, we'll create an endless range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, node, &operator, right));
}
default:
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
+ return UP(pm_range_node_create(parser, node, &operator, NULL));
}
}
@@ -13548,43 +16768,44 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) parse_variable(parser);
+ pm_node_t *variable = UP(parse_variable(parser));
+
if (variable == NULL) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
- variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+ variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
}
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_PARENTHESIS_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -13593,19 +16814,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
pm_token_t lparen = parser->current;
parser_lex(parser);
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
- return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
+ return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
}
default: {
// If we get here, then we have a pin operator followed by something
// not understood. We'll create a missing node and return that.
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
- pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
}
}
@@ -13614,34 +16835,56 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
parser_lex(parser);
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
- return parse_pattern_constant_path(parser, (pm_node_t *)node);
+ return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
}
case PM_TOKEN_CONSTANT: {
pm_token_t constant = parser->current;
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
- return parse_pattern_constant_path(parser, node);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
+ return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
}
default:
pm_parser_err_current(parser, diag_id);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
}
}
+static bool
+parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_LOCAL_VARIABLE_TARGET_NODE:
+ pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
+ return false;
+ default:
+ return true;
+ }
+}
+
+/**
+ * When we get here, we know that we already have a syntax error, because we
+ * know we have captured a variable and that we are in an alternation.
+ */
+static void
+parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, parse_pattern_alternation_error_each, parser);
+}
+
/**
* Parse any number of primitives joined by alternation and ended optionally by
* assignment.
*/
static pm_node_t *
-parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
- pm_node_t *node = NULL;
+parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *node = first_node;
+ bool alternation = false;
- do {
- pm_token_t operator = parser->previous;
+ while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
+ if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
+ parse_pattern_alternation_error(parser, node);
+ }
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER:
@@ -13653,55 +16896,76 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT:
case PM_CASE_PRIMITIVE: {
- if (node == NULL) {
- node = parse_pattern_primitive(parser, diag_id);
+ if (!alternation) {
+ node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
} else {
- pm_node_t *right = parse_pattern_primitive(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE);
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ pm_token_t operator = parser->previous;
+ pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
+
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
break;
}
- case PM_TOKEN_PARENTHESIS_LEFT: {
+ case PM_TOKEN_PARENTHESIS_LEFT:
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+ pm_token_t operator = parser->previous;
+ pm_token_t opening = parser->current;
parser_lex(parser);
- if (node != NULL) {
- pm_node_destroy(parser, node);
+
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+ pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
+
+ if (!alternation) {
+ node = right;
+ } else {
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
- node = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
break;
}
default: {
pm_parser_err_current(parser, diag_id);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
- if (node == NULL) {
+ if (!alternation) {
node = right;
} else {
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
}
break;
}
}
- } while (accept1(parser, PM_TOKEN_PIPE));
+ }
// If we have an =>, then we are assigning this pattern to a variable.
// In this case we should create an assignment node.
while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
pm_token_t operator = parser->previous;
-
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
- pm_token_t identifier = parser->previous;
- int depth = pm_parser_local_depth(parser, &identifier);
- if (depth < 0) {
- depth = 0;
- pm_parser_local_add_token(parser, &identifier);
+
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+ int depth;
+
+ if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+ pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth);
- node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
+ parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
+ parser,
+ &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ constant_id,
+ (uint32_t) (depth == -1 ? 0 : depth)
+ );
+
+ node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
}
return node;
@@ -13711,7 +16975,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
* Parse a pattern matching expression.
*/
static pm_node_t *
-parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) {
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
pm_node_t *node = NULL;
bool leading_rest = false;
@@ -13720,85 +16984,121 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
switch (parser->current.type) {
case PM_TOKEN_LABEL: {
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
- pm_token_t operator = not_provided(parser);
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+ node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
+
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+ }
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
+ return node;
}
case PM_TOKEN_USTAR_STAR: {
- node = parse_pattern_keyword_rest(parser);
- return (pm_node_t *) parse_pattern_hash(parser, node);
+ node = parse_pattern_keyword_rest(parser, captures);
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
+
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+ }
+
+ return node;
+ }
+ case PM_TOKEN_STRING_BEGIN: {
+ // We need special handling for string beginnings because they could
+ // be dynamic symbols leading to hash patterns.
+ node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
+
+ if (pm_symbol_node_label_p(node)) {
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
+
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+ }
+
+ return node;
+ }
+
+ node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
+ break;
}
case PM_TOKEN_USTAR: {
- if (top_pattern) {
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
parser_lex(parser);
- node = (pm_node_t *) parse_pattern_rest(parser);
+ node = UP(parse_pattern_rest(parser, captures));
leading_rest = true;
break;
}
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
default:
- node = parse_pattern_primitives(parser, diag_id);
+ node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
break;
}
// If we got a dynamic label symbol, then we need to treat it like the
// beginning of a hash pattern.
if (pm_symbol_node_label_p(node)) {
- pm_token_t operator = not_provided(parser);
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
+ return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
}
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
- // If we have a comma, then we are now parsing either an array pattern or a
- // find pattern. We need to parse all of the patterns, put them into a big
- // list, and then determine which type of node we have.
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
+ // If we have a comma, then we are now parsing either an array pattern
+ // or a find pattern. We need to parse all of the patterns, put them
+ // into a big list, and then determine which type of node we have.
pm_node_list_t nodes = { 0 };
pm_node_list_append(&nodes, node);
// Gather up all of the patterns into the list.
while (accept1(parser, PM_TOKEN_COMMA)) {
// Break early here in case we have a trailing comma.
- if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
+ node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_node_list_append(&nodes, node);
+ trailing_rest = true;
break;
}
if (accept1(parser, PM_TOKEN_USTAR)) {
- node = (pm_node_t *) parse_pattern_rest(parser);
+ node = UP(parse_pattern_rest(parser, captures));
- // If we have already parsed a splat pattern, then this is an error. We
- // will continue to parse the rest of the patterns, but we will indicate
- // it as an error.
+ // If we have already parsed a splat pattern, then this is an
+ // error. We will continue to parse the rest of the patterns,
+ // but we will indicate it as an error.
if (trailing_rest) {
pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
}
trailing_rest = true;
} else {
- node = parse_pattern_primitives(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
+ node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
}
pm_node_list_append(&nodes, node);
}
- // If the first pattern and the last pattern are rest patterns, then we will
- // call this a find pattern, regardless of how many rest patterns are in
- // between because we know we already added the appropriate errors.
- // Otherwise we will create an array pattern.
- if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
- node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
+ // If the first pattern and the last pattern are rest patterns, then we
+ // will call this a find pattern, regardless of how many rest patterns
+ // are in between because we know we already added the appropriate
+ // errors. Otherwise we will create an array pattern.
+ if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
+ node = UP(pm_find_pattern_node_create(parser, &nodes));
+
+ if (nodes.size == 2) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
+ }
} else {
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
+ node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
+
+ if (leading_rest && trailing_rest) {
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
+ }
}
- free(nodes.nodes);
+ xfree(nodes.nodes);
} else if (leading_rest) {
- // Otherwise, if we parsed a single splat pattern, then we know we have an
- // array pattern, so we can go ahead and create that node.
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
+ // Otherwise, if we parsed a single splat pattern, then we know we have
+ // an array pattern, so we can go ahead and create that node.
+ node = UP(pm_array_pattern_node_rest_create(parser, node));
}
return node;
@@ -13812,14 +17112,24 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
static inline void
parse_negative_numeric(pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
- case PM_INTEGER_NODE:
- case PM_FLOAT_NODE:
- node->location.start--;
+ case PM_INTEGER_NODE: {
+ pm_integer_node_t *cast = (pm_integer_node_t *) node;
+ cast->base.location.start--;
+ cast->value.negative = true;
break;
- case PM_RATIONAL_NODE:
- node->location.start--;
- parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
+ }
+ case PM_FLOAT_NODE: {
+ pm_float_node_t *cast = (pm_float_node_t *) node;
+ cast->base.location.start--;
+ cast->value = -cast->value;
break;
+ }
+ case PM_RATIONAL_NODE: {
+ pm_rational_node_t *cast = (pm_rational_node_t *) node;
+ cast->base.location.start--;
+ cast->numerator.negative = true;
+ break;
+ }
case PM_IMAGINARY_NODE:
node->location.start--;
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
@@ -13831,212 +17141,288 @@ parse_negative_numeric(pm_node_t *node) {
}
/**
- * Returns a string content token at a particular location that is empty.
+ * Append an error to the error list on the parser using the given diagnostic
+ * ID. This function is a specialization that handles formatting the specific
+ * kind of error that is being appended.
*/
-static pm_token_t
-parse_strings_empty_content(const uint8_t *location) {
- return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
+static void
+pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+ switch (diag_id) {
+ case PM_ERR_HASH_KEY: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
+ break;
+ }
+ case PM_ERR_HASH_VALUE:
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ break;
+ }
+ case PM_ERR_UNARY_RECEIVER: {
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
+ break;
+ }
+ case PM_ERR_UNARY_DISALLOWED:
+ case PM_ERR_EXPECT_ARGUMENT: {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ break;
+ }
+ default:
+ pm_parser_err_previous(parser, diag_id);
+ break;
+ }
}
/**
- * Parse a set of strings that could be concatenated together.
+ * Ensures that the current retry token is valid in the current context.
*/
-static inline pm_node_t *
-parse_strings(pm_parser_t *parser, pm_node_t *current) {
- assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
-
- bool concating = false;
- bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
-
- while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
- pm_node_t *node = NULL;
-
- // Here we have found a string literal. We'll parse it and add it to
- // the list of strings.
- const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
- assert(lex_mode->mode == PM_LEX_STRING);
- bool lex_interpolation = lex_mode->as.string.interpolation;
-
- pm_token_t opening = parser->current;
- parser_lex(parser);
-
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
- // If we get here, then we have an end immediately after a
- // start. In that case we'll create an empty content token and
- // return an uninterpolated string.
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&string->unescaped, content.start, content.end);
- node = (pm_node_t *) string;
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- // If we get here, then we have an end of a label immediately
- // after a start. In that case we'll create an empty symbol
- // node.
- pm_token_t opening = not_provided(parser);
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
- node = (pm_node_t *) symbol;
- } else if (!lex_interpolation) {
- // If we don't accept interpolation then we expect the string to
- // start with a single string content node.
- pm_string_t unescaped;
- pm_token_t content;
- if (match1(parser, PM_TOKEN_EOF)) {
- unescaped = PM_STRING_EMPTY;
- content = not_provided(parser);
- } else {
- unescaped = parser->current_string;
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
- content = parser->previous;
- }
+static void
+parse_retry(pm_parser_t *parser, const pm_node_t *node) {
+#define CONTEXT_NONE 0
+#define CONTEXT_THROUGH_ENSURE 1
+#define CONTEXT_THROUGH_ELSE 2
- // It is unfortunately possible to have multiple string content
- // nodes in a row in the case that there's heredoc content in
- // the middle of the string, like this cursed example:
- //
- // <<-END+'b
- // a
- // END
- // c'+'d'
- //
- // In that case we need to switch to an interpolated string to
- // be able to contain all of the parts.
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_node_list_t parts = { 0 };
+ pm_context_node_t *context_node = parser->current_context;
+ int context = CONTEXT_NONE;
- pm_token_t delimiters = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
- pm_node_list_append(&parts, part);
+ while (context_node != NULL) {
+ switch (context_node->context) {
+ case PM_CONTEXT_BEGIN_RESCUE:
+ case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_SCLASS_RESCUE:
+ case PM_CONTEXT_DEFINED:
+ case PM_CONTEXT_RESCUE_MODIFIER:
+ // These are the good cases. We're allowed to have a retry here.
+ return;
+ case PM_CONTEXT_CLASS:
+ case PM_CONTEXT_DEF:
+ case PM_CONTEXT_DEF_PARAMS:
+ case PM_CONTEXT_MAIN:
+ case PM_CONTEXT_MODULE:
+ case PM_CONTEXT_PREEXE:
+ case PM_CONTEXT_SCLASS:
+ // These are the bad cases. We're not allowed to have a retry in
+ // these contexts.
+ if (context == CONTEXT_NONE) {
+ pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
+ } else if (context == CONTEXT_THROUGH_ENSURE) {
+ pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
+ } else if (context == CONTEXT_THROUGH_ELSE) {
+ pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
+ }
+ return;
+ case PM_CONTEXT_BEGIN_ELSE:
+ case PM_CONTEXT_BLOCK_ELSE:
+ case PM_CONTEXT_CLASS_ELSE:
+ case PM_CONTEXT_DEF_ELSE:
+ case PM_CONTEXT_LAMBDA_ELSE:
+ case PM_CONTEXT_MODULE_ELSE:
+ case PM_CONTEXT_SCLASS_ELSE:
+ // These are also bad cases, but with a more specific error
+ // message indicating the else.
+ context = CONTEXT_THROUGH_ELSE;
+ break;
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_LAMBDA_ENSURE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_SCLASS_ENSURE:
+ // These are also bad cases, but with a more specific error
+ // message indicating the ensure.
+ context = CONTEXT_THROUGH_ENSURE;
+ break;
+ case PM_CONTEXT_NONE:
+ // This case should never happen.
+ assert(false && "unreachable");
+ break;
+ case PM_CONTEXT_BEGIN:
+ case PM_CONTEXT_BLOCK_BRACES:
+ case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
+ case PM_CONTEXT_CASE_IN:
+ case PM_CONTEXT_CASE_WHEN:
+ case PM_CONTEXT_DEFAULT_PARAMS:
+ case PM_CONTEXT_ELSE:
+ case PM_CONTEXT_ELSIF:
+ case PM_CONTEXT_EMBEXPR:
+ case PM_CONTEXT_FOR_INDEX:
+ case PM_CONTEXT_FOR:
+ case PM_CONTEXT_IF:
+ case PM_CONTEXT_LAMBDA_BRACES:
+ case PM_CONTEXT_LAMBDA_DO_END:
+ case PM_CONTEXT_LOOP_PREDICATE:
+ case PM_CONTEXT_MULTI_TARGET:
+ case PM_CONTEXT_PARENS:
+ case PM_CONTEXT_POSTEXE:
+ case PM_CONTEXT_PREDICATE:
+ case PM_CONTEXT_TERNARY:
+ case PM_CONTEXT_UNLESS:
+ case PM_CONTEXT_UNTIL:
+ case PM_CONTEXT_WHILE:
+ // In these contexts we should continue walking up the list of
+ // contexts.
+ break;
+ }
- do {
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
- pm_node_list_append(&parts, part);
- parser_lex(parser);
- } while (match1(parser, PM_TOKEN_STRING_CONTENT));
+ context_node = context_node->prev;
+ }
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- }
- } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- // In this case we've hit string content so we know the string
- // at least has something in it. We'll need to check if the
- // following token is the end (in which case we can return a
- // plain string) or if it's not then it has interpolation.
- pm_token_t content = parser->current;
- pm_string_t unescaped = parser->current_string;
- parser_lex(parser);
+#undef CONTEXT_NONE
+#undef CONTEXT_ENSURE
+#undef CONTEXT_ELSE
+}
- if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
- pm_node_flag_set(node, parse_unescaped_encoding(parser));
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
- } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- } else {
- // If we get here, then we have interpolation so we'll need
- // to create a string or symbol node with interpolation.
- pm_node_list_t parts = { 0 };
- pm_token_t string_opening = not_provided(parser);
- pm_token_t string_closing = not_provided(parser);
+/**
+ * Ensures that the current yield token is valid in the current context.
+ */
+static void
+parse_yield(pm_parser_t *parser, const pm_node_t *node) {
+ pm_context_node_t *context_node = parser->current_context;
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
- pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_node_list_append(&parts, part);
+ while (context_node != NULL) {
+ switch (context_node->context) {
+ case PM_CONTEXT_DEF:
+ case PM_CONTEXT_DEF_PARAMS:
+ case PM_CONTEXT_DEFINED:
+ case PM_CONTEXT_DEF_ENSURE:
+ case PM_CONTEXT_DEF_RESCUE:
+ case PM_CONTEXT_DEF_ELSE:
+ // These are the good cases. We're allowed to have a block exit
+ // in these contexts.
+ return;
+ case PM_CONTEXT_CLASS:
+ case PM_CONTEXT_CLASS_ENSURE:
+ case PM_CONTEXT_CLASS_RESCUE:
+ case PM_CONTEXT_CLASS_ELSE:
+ case PM_CONTEXT_MAIN:
+ case PM_CONTEXT_MODULE:
+ case PM_CONTEXT_MODULE_ENSURE:
+ case PM_CONTEXT_MODULE_RESCUE:
+ case PM_CONTEXT_MODULE_ELSE:
+ case PM_CONTEXT_SCLASS:
+ case PM_CONTEXT_SCLASS_RESCUE:
+ case PM_CONTEXT_SCLASS_ENSURE:
+ case PM_CONTEXT_SCLASS_ELSE:
+ // These are the bad cases. We're not allowed to have a retry in
+ // these contexts.
+ pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
+ return;
+ case PM_CONTEXT_NONE:
+ // This case should never happen.
+ assert(false && "unreachable");
+ break;
+ case PM_CONTEXT_BEGIN:
+ case PM_CONTEXT_BEGIN_ELSE:
+ case PM_CONTEXT_BEGIN_ENSURE:
+ case PM_CONTEXT_BEGIN_RESCUE:
+ case PM_CONTEXT_BLOCK_BRACES:
+ case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_ELSE:
+ case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
+ case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_CASE_IN:
+ case PM_CONTEXT_CASE_WHEN:
+ case PM_CONTEXT_DEFAULT_PARAMS:
+ case PM_CONTEXT_ELSE:
+ case PM_CONTEXT_ELSIF:
+ case PM_CONTEXT_EMBEXPR:
+ case PM_CONTEXT_FOR_INDEX:
+ case PM_CONTEXT_FOR:
+ case PM_CONTEXT_IF:
+ case PM_CONTEXT_LAMBDA_BRACES:
+ case PM_CONTEXT_LAMBDA_DO_END:
+ case PM_CONTEXT_LAMBDA_ELSE:
+ case PM_CONTEXT_LAMBDA_ENSURE:
+ case PM_CONTEXT_LAMBDA_RESCUE:
+ case PM_CONTEXT_LOOP_PREDICATE:
+ case PM_CONTEXT_MULTI_TARGET:
+ case PM_CONTEXT_PARENS:
+ case PM_CONTEXT_POSTEXE:
+ case PM_CONTEXT_PREDICATE:
+ case PM_CONTEXT_PREEXE:
+ case PM_CONTEXT_RESCUE_MODIFIER:
+ case PM_CONTEXT_TERNARY:
+ case PM_CONTEXT_UNLESS:
+ case PM_CONTEXT_UNTIL:
+ case PM_CONTEXT_WHILE:
+ // In these contexts we should continue walking up the list of
+ // contexts.
+ break;
+ }
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_node_list_append(&parts, part);
- }
- }
+ context_node = context_node->prev;
+ }
+}
- if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- }
- }
- } else {
- // If we get here, then the first part of the string is not plain
- // string content, in which case we need to parse the string as an
- // interpolated string.
- pm_node_list_t parts = { 0 };
- pm_node_t *part;
+/**
+ * This struct is used to pass information between the regular expression parser
+ * and the error callback.
+ */
+typedef struct {
+ /** The parser that we are parsing the regular expression for. */
+ pm_parser_t *parser;
- while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_node_list_append(&parts, part);
- }
- }
+ /** The start of the regular expression. */
+ const uint8_t *start;
- if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
- } else if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- }
- }
+ /** The end of the regular expression. */
+ const uint8_t *end;
- if (current == NULL) {
- // If the node we just parsed is a symbol node, then we can't
- // concatenate it with anything else, so we can now return that
- // node.
- if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
- return node;
- }
+ /**
+ * Whether or not the source of the regular expression is shared. This
+ * impacts the location of error messages, because if it is shared then we
+ * can use the location directly and if it is not, then we use the bounds of
+ * the regular expression itself.
+ */
+ bool shared;
+} parse_regular_expression_error_data_t;
- // If we don't already have a node, then it's fine and we can just
- // set the result to be the node we just parsed.
- current = node;
- } else {
- // Otherwise we need to check the type of the node we just parsed.
- // If it cannot be concatenated with the previous node, then we'll
- // need to add a syntax error.
- if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
- pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
- }
+/**
+ * This callback is called when the regular expression parser encounters a
+ * syntax error.
+ */
+static void
+parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
+ pm_location_t location;
- // If we haven't already created our container for concatenation,
- // we'll do that now.
- if (!concating) {
- concating = true;
- pm_token_t bounds = not_provided(parser);
+ if (callback_data->shared) {
+ location = (pm_location_t) { .start = start, .end = end };
+ } else {
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+ }
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
- pm_interpolated_string_node_append(container, current);
- current = (pm_node_t *) container;
- }
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+}
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
- }
- }
+/**
+ * Parse the errors for the regular expression and add them to the parser.
+ */
+static void
+parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
+ const pm_string_t *unescaped = &node->unescaped;
+ parse_regular_expression_error_data_t error_data = {
+ .parser = parser,
+ .start = node->base.location.start,
+ .end = node->base.location.end,
+ .shared = unescaped->type == PM_STRING_SHARED
+ };
- return current;
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
}
/**
* Parse an expression that begins with the previous node that we just lexed.
*/
static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
switch (parser->current.type) {
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
@@ -14046,14 +17432,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
bool parsed_bare_hash = false;
while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
+ bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
+
// Handle the case where we don't have a comma and we have a
// newline followed by a right bracket.
- if (accept1(parser, PM_TOKEN_NEWLINE) && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+ if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
break;
}
- if (pm_array_node_size(array) != 0) {
- expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
+ // Ensure that we have a comma between elements in the array.
+ if (array->elements.size > 0) {
+ if (accept1(parser, PM_TOKEN_COMMA)) {
+ // If there was a comma but we also accepts a newline,
+ // then this is a syntax error.
+ if (accepted_newline) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ }
+ } else {
+ // If there was no comma, then we need to add a syntax
+ // error.
+ const uint8_t *location = parser->previous.end;
+ PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
+
+ parser->previous.start = location;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
}
// If we have a right bracket immediately following a comma,
@@ -14068,29 +17471,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *expression = NULL;
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
- }
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
} else {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ element = UP(pm_splat_node_create(parser, &operator, expression));
} else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
if (parsed_bare_hash) {
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
- pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
- element = (pm_node_t *)hash;
+ element = UP(pm_keyword_hash_node_create(parser));
+ pm_static_literals_t hash_keys = { 0 };
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
- parse_assocs(parser, (pm_node_t *) hash);
+ parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
+ pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
} else {
- element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
+ element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
@@ -14098,6 +17500,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
+ pm_static_literals_t hash_keys = { 0 };
+ pm_hash_key_static_literals_add(parser, &hash_keys, element);
pm_token_t operator;
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -14106,15 +17510,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
operator = not_provided(parser);
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
pm_keyword_hash_node_elements_append(hash, assoc);
- element = (pm_node_t *)hash;
+ element = UP(hash);
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- parse_assocs(parser, (pm_node_t *) hash);
+ parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
+ pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
}
}
@@ -14124,38 +17529,73 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_ARRAY_TERM);
+
+ if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+
pm_array_node_close_set(array, &parser->previous);
pm_accepts_block_stack_pop(parser);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PARENTHESIS_LEFT:
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
pm_token_t opening = parser->current;
+ pm_node_flags_t flags = 0;
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
parser_lex(parser);
- while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
// If this is the end of the file or we match a right parenthesis, then
// we have an empty parentheses node, and we can immediately return.
if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
+
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
}
// Otherwise, we're going to parse the first statement in the list
// of statements within the parentheses.
pm_accepts_block_stack_push(parser, true);
context_push(parser, PM_CONTEXT_PARENS);
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
context_pop(parser);
// Determine if this statement is followed by a terminator. In the
// case of a single statement, this is fine. But in the case of
// multiple statements it's required.
- bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ bool terminator_found = false;
+
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ terminator_found = true;
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+ terminator_found = true;
+ }
+
if (terminator_found) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
}
// If we hit a right parenthesis, then we're done parsing the
@@ -14165,9 +17605,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
lex_state_set(parser, PM_LEX_STATE_ENDARG);
}
+
parser_lex(parser);
pm_accepts_block_stack_pop(parser);
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
// If we have a single statement and are ending on a right
// parenthesis, then we need to check if this is possibly a
@@ -14189,42 +17633,62 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
multi_target->base.location.start = lparen_loc.start;
multi_target->base.location.end = rparen_loc.end;
- if (match1(parser, PM_TOKEN_COMMA)) {
- if (binding_power == PM_BINDING_POWER_STATEMENT) {
- return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
- }
- return (pm_node_t *) multi_target;
+ pm_node_t *result;
+ if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+ result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ accept1(parser, PM_TOKEN_NEWLINE);
+ } else {
+ result = UP(multi_target);
}
- return parse_target_validate(parser, (pm_node_t *) multi_target);
+ if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+ // All set, this is explicitly allowed by the parent
+ // context.
+ } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ // All set, we're inside a for loop and we're parsing
+ // multiple targets.
+ } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+ // Multi targets are not allowed when it's not a
+ // statement level.
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ // Multi targets must be followed by an equal sign in
+ // order to be valid (or a right parenthesis if they are
+ // nested).
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+
+ return result;
}
// If we have a single statement and are ending on a right parenthesis
// and we didn't return a multiple assignment node, then we can return a
// regular parentheses node now.
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, statement);
+ pm_statements_node_body_append(parser, statements, statement, true);
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
}
// If we have more than one statement in the set of parentheses,
// then we are going to parse all of them as a list of statements.
// We'll do that here.
context_push(parser, PM_CONTEXT_PARENS);
+ flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, statement);
+ pm_statements_node_body_append(parser, statements, statement, true);
// If we didn't find a terminator and we didn't find a right
// parenthesis, then this is a syntax error.
- if (!terminator_found) {
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
}
// Parse each statement within the parentheses.
while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
- pm_statements_node_body_append(statements, node);
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_statements_node_body_append(parser, statements, node, true);
// If we're recovering from a syntax error, then we need to stop
// parsing the statements now.
@@ -14246,8 +17710,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
break;
- } else {
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
+ // If we're at the end of the file, then we're going to add
+ // an error after this for the ) anyway.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
}
}
@@ -14255,53 +17721,111 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_accepts_block_stack_pop(parser);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
+ // When we're parsing multi targets, we allow them to be followed by
+ // a right parenthesis if they are at the statement level. This is
+ // only possible if they are the final statement in a parentheses.
+ // We need to explicitly reject that here.
+ {
+ pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+ if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+ statement = UP(multi_target);
+ statements->body.nodes[statements->body.size - 1] = statement;
+ }
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+ const uint8_t *offset = statement->location.end;
+ pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+ pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
+
+ statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+ statements->body.nodes[statements->body.size - 1] = statement;
+
+ pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ pm_void_statements_check(parser, statements, true);
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
}
case PM_TOKEN_BRACE_LEFT: {
+ // If we were passed a current_hash_keys via the parser, then that
+ // means we're already parsing a hash and we want to share the set
+ // of hash keys with this inner hash we're about to parse for the
+ // sake of warnings. We'll set it to NULL after we grab it to make
+ // sure subsequent expressions don't use it. Effectively this is a
+ // way of getting around passing it to every call to
+ // parse_expression.
+ pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
+ parser->current_hash_keys = NULL;
+
pm_accepts_block_stack_push(parser, true);
parser_lex(parser);
- pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+
+ pm_token_t opening = parser->previous;
+ pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
- parse_assocs(parser, (pm_node_t *) node);
+ if (current_hash_keys != NULL) {
+ parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
+ } else {
+ pm_static_literals_t hash_keys = { 0 };
+ parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
+ pm_static_literals_free(&hash_keys);
+ }
+
accept1(parser, PM_TOKEN_NEWLINE);
}
pm_accepts_block_stack_pop(parser);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
pm_hash_node_closing_loc_set(node, &parser->previous);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_CHARACTER_LITERAL: {
- parser_lex(parser);
-
- pm_token_t opening = parser->previous;
- opening.type = PM_TOKEN_STRING_BEGIN;
- opening.end = opening.start + 1;
-
- pm_token_t content = parser->previous;
- content.type = PM_TOKEN_STRING_CONTENT;
- content.start = content.start + 1;
-
pm_token_t closing = not_provided(parser);
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
+ pm_node_t *node = UP(pm_string_node_create_current_string(
+ parser,
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_BEGIN,
+ .start = parser->current.start,
+ .end = parser->current.start + 1
+ },
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->current.start + 1,
+ .end = parser->current.end
+ },
+ &closing
+ ));
+
pm_node_flag_set(node, parse_unescaped_encoding(parser));
+ // Skip past the character literal here, since now we have handled
+ // parser->explicit_encoding correctly.
+ parser_lex(parser);
+
// Characters can be followed by strings in which case they are
// automatically concatenated.
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
- return parse_strings(parser, node);
+ return parse_strings(parser, node, false, (uint16_t) (depth + 1));
}
return node;
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
@@ -14315,34 +17839,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
+ match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
- return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
}
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
// If we get here, then we have a comma immediately following a
// constant, so we're going to parse this as a multiple assignment.
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
}
case PM_TOKEN_UCOLON_COLON: {
parser_lex(parser);
-
pm_token_t delimiter = parser->previous;
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
@@ -14352,47 +17875,56 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t operator = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+
+ // Unary .. and ... are special because these are non-associative
+ // operators that can also be unary operators. In this case we need
+ // to explicitly reject code that has a .. or ... that follows this
+ // expression.
+ if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
+ pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+ }
+
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
case PM_TOKEN_FLOAT:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
+ return UP(pm_float_node_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_imaginary_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
@@ -14411,27 +17943,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_call_node_t *call = (pm_call_node_t *) node;
pm_arguments_t arguments = { 0 };
- if (parse_arguments_list(parser, &arguments, true, accepts_command_call)) {
+ if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
// Since we found arguments, we need to turn off the
// variable call bit in the flags.
- pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
+ pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
call->opening_loc = arguments.opening_loc;
call->arguments = arguments.arguments;
call->closing_loc = arguments.closing_loc;
call->block = arguments.block;
- if (arguments.block != NULL) {
- call->base.location.end = arguments.block->location.end;
- } else if (arguments.closing_loc.start == NULL) {
- if (arguments.arguments != NULL) {
- call->base.location.end = arguments.arguments->base.location.end;
- } else {
- call->base.location.end = call->message_loc.end;
- }
- } else {
- call->base.location.end = arguments.closing_loc.end;
+ const uint8_t *end = pm_arguments_end(&arguments);
+ if (!end) {
+ end = call->message_loc.end;
}
+ call->base.location.end = end;
}
} else {
// Otherwise, we know the identifier is in the local table. This
@@ -14439,19 +17965,40 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// a block, so we need to check for that here.
if (
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
+ match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
-
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
+
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ // If we're about to convert an 'it' implicit local
+ // variable read into a method call, we need to remove
+ // it from the list of implicit local variables.
+ pm_node_unreference(parser, node);
+ } else {
+ // Otherwise, we're about to convert a regular local
+ // variable read into a method call, in which case we
+ // need to indicate that this was not a read for the
+ // purposes of warnings.
+ assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
+
+ if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
+ pm_node_unreference(parser, node);
+ } else {
+ pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+ pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
+ }
+ }
+
pm_node_destroy(parser, node);
- return (pm_node_t *) fcall;
+ return UP(fcall);
}
}
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
@@ -14459,10 +18006,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_HEREDOC_START: {
// Here we have found a heredoc. We'll parse it and add it to the
// list of strings.
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
- assert(lex_mode->mode == PM_LEX_HEREDOC);
- pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
- pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
+ assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
+ pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
+
+ size_t common_whitespace = (size_t) -1;
+ parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
parser_lex(parser);
pm_token_t opening = parser->previous;
@@ -14473,22 +18021,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then we have an empty heredoc. We'll create
// an empty content token and return an empty string node.
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
pm_token_t content = parse_strings_empty_content(parser->previous.start);
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
- node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
+ node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
} else {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
}
node->location.end = opening.end;
- } else if ((part = parse_string_part(parser)) == NULL) {
+ } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
// If we get here, then we tried to find something in the
// heredoc but couldn't actually parse anything, so we'll just
// return a missing node.
- node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ //
+ // parse_string_part handles its own errors, so there is no need
+ // for us to add one here.
+ node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then the part that we parsed was plain string
// content and we're at the end of the heredoc, so we can return
@@ -14501,19 +18051,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
cast->base.location = cast->opening_loc;
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
cast->base.type = PM_X_STRING_NODE;
}
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
}
- node = (pm_node_t *) cast;
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+ node = UP(cast);
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
} else {
// If we get here, then we have multiple parts in the heredoc,
// so we'll need to create an interpolated string node to hold
@@ -14522,41 +18070,38 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_list_append(&parts, part);
while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
pm_node_list_append(&parts, part);
}
}
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
-
// Now that we have all of the parts, create the correct type of
// interpolated node.
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
cast->parts = parts;
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
-
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
} else {
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
+ pm_node_list_free(&parts);
- lex_mode_pop(parser);
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
-
+ expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
pm_interpolated_string_node_closing_set(cast, &parser->previous);
+
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
}
// If this is a heredoc that is indented with a ~, then we need
// to dedent each line by the common leading whitespace.
- if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+ if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
pm_node_list_t *nodes;
- if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
+ if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
} else {
nodes = &((pm_interpolated_string_node_t *) node)->parts;
@@ -14567,17 +18112,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
- return parse_strings(parser, node);
+ return parse_strings(parser, node, false, (uint16_t) (depth + 1));
}
return node;
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
+ node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return node;
@@ -14585,32 +18130,32 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_INTEGER: {
pm_node_flags_t base = parser->integer_base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_IMAGINARY: {
pm_node_flags_t base = parser->integer_base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL: {
pm_node_flags_t base = parser->integer_base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
pm_node_flags_t base = parser->integer_base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_KEYWORD___ENCODING__:
parser_lex(parser);
- return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
+ return UP(pm_source_encoding_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___FILE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
+ return UP(pm_source_file_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___LINE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
+ return UP(pm_source_line_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_ALIAS: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
@@ -14619,8 +18164,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t keyword = parser->previous;
- pm_node_t *new_name = parse_alias_argument(parser, true);
- pm_node_t *old_name = parse_alias_argument(parser, false);
+ pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
+ pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
switch (PM_NODE_TYPE(new_name)) {
case PM_BACK_REFERENCE_READ_NODE:
@@ -14628,13 +18173,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_GLOBAL_VARIABLE_READ_NODE: {
if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
- pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+ pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
}
} else {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
}
- return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
+ return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
}
case PM_SYMBOL_NODE:
case PM_INTERPOLATED_SYMBOL_NODE: {
@@ -14642,16 +18187,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
}
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
default:
- return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
+ return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
}
}
case PM_TOKEN_KEYWORD_CASE: {
+ size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
+
pm_token_t case_keyword = parser->previous;
pm_node_t *predicate = NULL;
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
predicate = NULL;
@@ -14660,61 +18210,86 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
} else if (!token_begins_expression_p(parser->current.type)) {
predicate = NULL;
} else {
- predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
}
- if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
+ if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ parser_lex(parser);
+
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
+ return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
}
- // At this point we can create a case node, though we don't yet know if it
- // is a case-in or case-when node.
+ // At this point we can create a case node, though we don't yet know
+ // if it is a case-in or case-when node.
pm_token_t end_keyword = not_provided(parser);
pm_node_t *node;
if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
+ pm_static_literals_t literals = { 0 };
// At this point we've seen a when keyword, so we know this is a
- // case-when node. We will continue to parse the when nodes until we hit
- // the end of the list.
- while (accept1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ // case-when node. We will continue to parse the when nodes
+ // until we hit the end of the list.
+ while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+ parser_lex(parser);
+
pm_token_t when_keyword = parser->previous;
pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
do {
if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
- pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
+ pm_when_node_conditions_append(when_node, UP(splat_node));
if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
} else {
- pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
pm_when_node_conditions_append(when_node, condition);
+ // If we found a missing node, then this is a syntax
+ // error and we should stop looping.
if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
+
+ // If this is a string node, then we need to mark it
+ // as frozen because when clause strings are frozen.
+ if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+ pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ pm_when_clause_static_literals_add(parser, &literals, condition);
}
} while (accept1(parser, PM_TOKEN_COMMA));
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- accept1(parser, PM_TOKEN_KEYWORD_THEN);
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
+ }
} else {
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+ pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
}
if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN);
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
if (statements != NULL) {
pm_when_node_statements_set(when_node, statements);
}
}
- pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
+ pm_case_node_condition_append(case_node, UP(when_node));
}
// If we didn't parse any conditions (in or when) then we need
@@ -14723,7 +18298,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
}
- node = (pm_node_t *) case_node;
+ pm_static_literals_free(&literals);
+ node = UP(case_node);
} else {
pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
@@ -14733,9 +18309,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
}
- // At this point we expect that we're parsing a case-in node. We will
- // continue to parse the in nodes until we hit the end of the list.
+ // At this point we expect that we're parsing a case-in node. We
+ // will continue to parse the in nodes until we hit the end of
+ // the list.
while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = true;
@@ -14744,24 +18323,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t in_keyword = parser->previous;
- pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+
+ pm_constant_id_list_t captures = { 0 };
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+ pm_constant_id_list_free(&captures);
- // Since we're in the top-level of the case-in node we need to check
- // for guard clauses in the form of `if` or `unless` statements.
+ // Since we're in the top-level of the case-in node we need
+ // to check for guard clauses in the form of `if` or
+ // `unless` statements.
if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
- pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
} else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
- pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
}
- // Now we need to check for the terminator of the in node's pattern.
- // It can be a newline or semicolon optionally followed by a `then`
- // keyword.
+ // Now we need to check for the terminator of the in node's
+ // pattern. It can be a newline or semicolon optionally
+ // followed by a `then` keyword.
pm_token_t then_keyword;
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
@@ -14770,22 +18354,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
then_keyword = not_provided(parser);
}
} else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
then_keyword = parser->previous;
}
- // Now we can actually parse the statements associated with the in
- // node.
+ // Now we can actually parse the statements associated with
+ // the in node.
pm_statements_node_t *statements;
if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
statements = NULL;
} else {
- statements = parse_statements(parser, PM_CONTEXT_CASE_IN);
+ statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
}
- // Now that we have the full pattern and statements, we can create the
- // node and attach it to the case node.
- pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
+ // Now that we have the full pattern and statements, we can
+ // create the node and attach it to the case node.
+ pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
pm_case_match_node_condition_append(case_node, condition);
}
@@ -14795,7 +18379,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
}
- node = (pm_node_t *) case_node;
+ node = UP(case_node);
}
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -14804,55 +18388,66 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_else_node_t *else_node;
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
- else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE), &parser->current);
+ else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
} else {
else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
}
if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
- pm_case_node_consequent_set((pm_case_node_t *) node, else_node);
+ pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
} else {
- pm_case_match_node_consequent_set((pm_case_match_node_t *) node, else_node);
+ pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
}
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
} else {
pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
}
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
return node;
}
case PM_TOKEN_KEYWORD_BEGIN: {
+ size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
pm_token_t begin_keyword = parser->previous;
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
pm_statements_node_t *begin_statements = NULL;
- if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN);
+ begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
- parse_rescues(parser, begin_node, false);
+ parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
begin_node->base.location.end = parser->previous.end;
pm_begin_node_end_keyword_set(begin_node, &parser->previous);
- if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
- pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE);
- }
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
- return (pm_node_t *) begin_node;
+ return UP(begin_node);
}
case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
}
@@ -14862,14 +18457,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
pm_token_t opening = parser->previous;
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE);
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
pm_context_t context = parser->current_context->context;
if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
}
- return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+
+ flush_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_BREAK:
case PM_TOKEN_KEYWORD_NEXT:
@@ -14886,27 +18485,35 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
- parse_arguments(parser, &arguments, false, PM_TOKEN_EOF);
+ pm_token_t next = parser->current;
+ parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+
+ // Reject `foo && return bar`.
+ if (!accepts_command_call && arguments.arguments != NULL) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
+ }
}
}
switch (keyword.type) {
- case PM_TOKEN_KEYWORD_BREAK:
- return (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
- case PM_TOKEN_KEYWORD_NEXT:
- return (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
+ case PM_TOKEN_KEYWORD_BREAK: {
+ pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
+ if (!parser->partial_script) parse_block_exit(parser, node);
+ return node;
+ }
+ case PM_TOKEN_KEYWORD_NEXT: {
+ pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
+ if (!parser->partial_script) parse_block_exit(parser, node);
+ return node;
+ }
case PM_TOKEN_KEYWORD_RETURN: {
- if (
- (parser->current_context->context == PM_CONTEXT_CLASS) ||
- (parser->current_context->context == PM_CONTEXT_MODULE)
- ) {
- pm_parser_err_current(parser, PM_ERR_RETURN_INVALID);
- }
- return (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
+ parse_return(parser, node);
+ return node;
}
default:
assert(false && "unreachable");
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
}
}
case PM_TOKEN_KEYWORD_SUPER: {
@@ -14914,63 +18521,89 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
if (
arguments.opening_loc.start == NULL &&
arguments.arguments == NULL &&
((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
) {
- return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
}
- return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_super_node_create(parser, &keyword, &arguments));
}
case PM_TOKEN_KEYWORD_YIELD: {
parser_lex(parser);
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, false, accepts_command_call);
+ parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
+
+ // It's possible that we've parsed a block argument through our
+ // call to parse_arguments_list. If we found one, we should mark it
+ // as invalid and destroy it, as we don't have a place for it on the
+ // yield node.
+ if (arguments.block != NULL) {
+ pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
+ pm_node_unreference(parser, arguments.block);
+ pm_node_destroy(parser, arguments.block);
+ arguments.block = NULL;
+ }
+
+ pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
+ if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
- return (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
+ return node;
}
case PM_TOKEN_KEYWORD_CLASS: {
+ size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
+
pm_token_t class_keyword = parser->previous;
pm_do_loop_stack_push(parser, false);
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
if (accept1(parser, PM_TOKEN_LESS_LESS)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
+ }
pm_node_t *statements = NULL;
- if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS);
+ statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
- return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
+ flush_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
}
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_CLASS_NAME);
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
pm_token_t name = parser->previous;
if (name.type != PM_TOKEN_CONSTANT) {
pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
@@ -14986,13 +18619,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser->command_start = true;
parser_lex(parser);
- superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CLASS_SUPERCLASS);
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
} else {
inheritance_operator = not_provided(parser);
superclass = NULL;
}
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
@@ -15002,53 +18634,63 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_node_t *statements = NULL;
- if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS);
+ statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
if (context_def_p(parser)) {
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
}
- return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_DEF: {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
pm_token_t def_keyword = parser->current;
+ size_t opening_newline_index = token_newline_index(parser);
pm_node_t *receiver = NULL;
pm_token_t operator = not_provided(parser);
- pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
+ pm_token_t name;
// This context is necessary for lexing `...` in a bare params
// correctly. It must be pushed before lexing the first param, so it
// is here.
context_push(parser, PM_CONTEXT_DEF_PARAMS);
- pm_constant_id_t saved_param_name;
-
parser_lex(parser);
+ // This will be false if the method name is not a valid identifier
+ // but could be followed by an operator.
+ bool valid_name = true;
+
switch (parser->current.type) {
case PM_CASE_OPERATOR:
- saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
lex_state_set(parser, PM_LEX_STATE_ENDFN);
parser_lex(parser);
@@ -15061,7 +18703,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
receiver = parse_variable_call(parser);
- saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
lex_state_set(parser, PM_LEX_STATE_FNAME);
parser_lex(parser);
@@ -15069,7 +18710,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
operator = parser->previous;
name = parse_method_definition_name(parser);
} else {
- saved_param_name = pm_parser_current_param_name_unset(parser);
pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
pm_parser_scope_push(parser, true);
@@ -15078,10 +18718,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
break;
}
- case PM_TOKEN_CONSTANT:
case PM_TOKEN_INSTANCE_VARIABLE:
case PM_TOKEN_CLASS_VARIABLE:
case PM_TOKEN_GLOBAL_VARIABLE:
+ valid_name = false;
+ PRISM_FALLTHROUGH
+ case PM_TOKEN_CONSTANT:
case PM_TOKEN_KEYWORD_NIL:
case PM_TOKEN_KEYWORD_SELF:
case PM_TOKEN_KEYWORD_TRUE:
@@ -15089,7 +18731,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_KEYWORD___FILE__:
case PM_TOKEN_KEYWORD___LINE__:
case PM_TOKEN_KEYWORD___ENCODING__: {
- saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
parser_lex(parser);
@@ -15102,37 +18743,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
switch (identifier.type) {
case PM_TOKEN_CONSTANT:
- receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
+ receiver = UP(pm_constant_read_node_create(parser, &identifier));
break;
case PM_TOKEN_INSTANCE_VARIABLE:
- receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
+ receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
break;
case PM_TOKEN_CLASS_VARIABLE:
- receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
+ receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
break;
case PM_TOKEN_GLOBAL_VARIABLE:
- receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
+ receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD_NIL:
- receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
+ receiver = UP(pm_nil_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD_SELF:
- receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
+ receiver = UP(pm_self_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD_TRUE:
- receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
+ receiver = UP(pm_true_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD_FALSE:
- receiver = (pm_node_t *)pm_false_node_create(parser, &identifier);
+ receiver = UP(pm_false_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD___FILE__:
- receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
+ receiver = UP(pm_source_file_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD___LINE__:
- receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
+ receiver = UP(pm_source_line_node_create(parser, &identifier));
break;
case PM_TOKEN_KEYWORD___ENCODING__:
- receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
+ receiver = UP(pm_source_encoding_node_create(parser, &identifier));
break;
default:
break;
@@ -15140,19 +18781,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
name = parse_method_definition_name(parser);
} else {
+ if (!valid_name) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
+ }
+
name = identifier;
}
break;
}
case PM_TOKEN_PARENTHESIS_LEFT: {
- // The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner expression
- // of this parenthesis should not be processed under this context.
- // Thus, the context is popped here.
+ // The current context is `PM_CONTEXT_DEF_PARAMS`, however
+ // the inner expression of this parenthesis should not be
+ // processed under this context. Thus, the context is popped
+ // here.
context_pop(parser);
parser_lex(parser);
pm_token_t lparen = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
@@ -15162,34 +18808,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
operator = parser->previous;
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
- saved_param_name = pm_parser_current_param_name_unset(parser);
+ // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
+ // reason as described the above.
pm_parser_scope_push(parser, true);
-
- // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
context_push(parser, PM_CONTEXT_DEF_PARAMS);
name = parse_method_definition_name(parser);
break;
}
default:
- saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
-
name = parse_method_definition_name(parser);
break;
}
- // If, after all that, we were unable to find a method name, add an
- // error to the error list.
- if (name.type == PM_TOKEN_MISSING) {
- pm_parser_err_previous(parser, PM_ERR_DEF_NAME);
- }
-
pm_token_t lparen;
pm_token_t rparen;
pm_parameters_node_t *params;
+ bool accept_endless_def = true;
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
parser_lex(parser);
@@ -15198,13 +18836,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
params = NULL;
} else {
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true);
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
}
lex_state_set(parser, PM_LEX_STATE_BEG);
parser->command_start = true;
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
+ context_pop(parser);
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = PM_TOKEN_MISSING;
+ }
+
rparen = parser->previous;
break;
}
@@ -15217,20 +18861,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
lparen = not_provided(parser);
rparen = not_provided(parser);
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true);
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
+
+ // Reject `def * = 1` and similar. We have to specifically check
+ // for them because they create ambiguity with optional arguments.
+ accept_endless_def = false;
+
+ context_pop(parser);
break;
}
default: {
lparen = not_provided(parser);
rparen = not_provided(parser);
params = NULL;
+
+ context_pop(parser);
break;
}
}
- uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
-
- context_pop(parser);
pm_node_t *statements = NULL;
pm_token_t equal;
pm_token_t end_keyword;
@@ -15239,22 +18888,42 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (token_is_setter_name(&name)) {
pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
}
+ if (!accept_endless_def) {
+ pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+ }
+ if (
+ parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+ parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+ ) {
+ PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+ }
equal = parser->previous;
context_push(parser, PM_CONTEXT_DEF);
pm_do_loop_stack_push(parser, false);
- statements = (pm_node_t *) pm_statements_node_create(parser);
+ statements = UP(pm_statements_node_create(parser));
+
+ bool allow_command_call;
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ allow_command_call = accepts_command_call;
+ } else {
+ // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
+ allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
+ }
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, PM_ERR_DEF_ENDLESS);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
pm_token_t rescue_keyword = parser->previous;
- pm_node_t *value = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
- pm_rescue_modifier_node_t *rescue_node = pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
- statement = (pm_node_t *)rescue_node;
+ pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
}
- pm_statements_node_body_append((pm_statements_node_t *) statements, statement);
+ pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
pm_do_loop_stack_pop(parser);
context_pop(parser);
end_keyword = not_provided(parser);
@@ -15272,43 +18941,55 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_accepts_block_stack_push(parser, true);
pm_do_loop_stack_push(parser, false);
- if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF);
+ statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
}
pm_accepts_block_stack_pop(parser);
pm_do_loop_stack_pop(parser);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
end_keyword = parser->previous;
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
-
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
pm_parser_scope_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
- return (pm_node_t *) pm_def_node_create(
+ /**
+ * If the final character is `@` as is the case when defining
+ * methods to override the unary operators, we should ignore
+ * the @ in the same way we do for symbols.
+ */
+ pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
+
+ flush_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_def_node_create(
parser,
+ name_id,
&name,
receiver,
params,
statements,
&locals,
- locals_body_index,
&def_keyword,
&operator,
&lparen,
&rparen,
&equal,
&end_keyword
- );
+ ));
}
case PM_TOKEN_KEYWORD_DEFINED: {
parser_lex(parser);
@@ -15318,30 +18999,41 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t rparen;
pm_node_t *expression;
+ context_push(parser, PM_CONTEXT_DEFINED);
+ bool newline = accept1(parser, PM_TOKEN_NEWLINE);
+
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
lparen = parser->previous;
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_DEFINED_EXPRESSION);
- if (parser->recovering) {
+ if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+ lparen = not_provided(parser);
rparen = not_provided(parser);
} else {
- accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- rparen = parser->previous;
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+
+ if (parser->recovering) {
+ rparen = not_provided(parser);
+ } else {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ rparen = parser->previous;
+ }
}
} else {
lparen = not_provided(parser);
rparen = not_provided(parser);
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_DEFINED_EXPRESSION);
+ expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_defined_node_create(
+ context_pop(parser);
+ return UP(pm_defined_node_create(
parser,
&lparen,
expression,
&rparen,
- &PM_LOCATION_TOKEN_VALUE(&keyword)
- );
+ &keyword
+ ));
}
case PM_TOKEN_KEYWORD_END_UPCASE: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
@@ -15357,16 +19049,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
pm_token_t opening = parser->previous;
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE);
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
- return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
+ return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_FALSE:
parser_lex(parser);
- return (pm_node_t *)pm_false_node_create(parser, &parser->previous);
+ return UP(pm_false_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_FOR: {
+ size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
+
pm_token_t for_keyword = parser->previous;
pm_node_t *index;
@@ -15378,22 +19072,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ index = UP(pm_splat_node_create(parser, &star_operator, name));
} else if (token_begins_expression_p(parser->current.type)) {
- index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
+ index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
} else {
pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
- index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
+ index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
}
// Now, if there are multiple index expressions, parse them out.
if (match1(parser, PM_TOKEN_COMMA)) {
- index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
+ index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
} else {
- index = parse_target(parser, index);
+ index = parse_target(parser, index, false, false);
}
context_pop(parser);
@@ -15402,7 +19096,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
pm_token_t in_keyword = parser->previous;
- pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_FOR_COLLECTION);
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
pm_token_t do_keyword;
@@ -15410,21 +19104,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
do_keyword = parser->previous;
} else {
do_keyword = not_provided(parser);
+ if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
+ }
}
- accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
pm_statements_node_t *statements = NULL;
-
- if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
- statements = parse_statements(parser, PM_CONTEXT_FOR);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
+ if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+ statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
+
+ return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
}
case PM_TOKEN_KEYWORD_IF:
+ if (parser_end_of_line_p(parser)) {
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ }
+
+ size_t opening_newline_index = token_newline_index(parser);
+ bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
parser_lex(parser);
- return parse_conditional(parser, PM_CONTEXT_IF);
+
+ return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
case PM_TOKEN_KEYWORD_UNDEF: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
@@ -15432,7 +19136,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
- pm_node_t *name = parse_undef_argument(parser);
+ pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
pm_node_destroy(parser, name);
@@ -15442,7 +19146,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
while (match1(parser, PM_TOKEN_COMMA)) {
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
parser_lex(parser);
- name = parse_undef_argument(parser);
+ name = parse_undef_argument(parser, (uint16_t) (depth + 1));
if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
pm_node_destroy(parser, name);
@@ -15453,7 +19157,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
}
- return (pm_node_t *) undef;
+ return UP(undef);
}
case PM_TOKEN_KEYWORD_NOT: {
parser_lex(parser);
@@ -15462,16 +19166,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_arguments_t arguments = { 0 };
pm_node_t *receiver = NULL;
+ // If we do not accept a command call, then we also do not accept a
+ // not without parentheses. In this case we need to reject this
+ // syntax.
+ if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+ if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
+ pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
+ } else {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
+ }
+
+ return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+ }
+
accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ pm_token_t lparen = parser->previous;
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
} else {
- receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_NOT_EXPRESSION);
- pm_conditional_predicate(receiver);
+ arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
+ receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
if (!parser->recovering) {
accept1(parser, PM_TOKEN_NEWLINE);
@@ -15480,36 +19198,43 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
}
} else {
- receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_NOT_EXPRESSION);
- pm_conditional_predicate(receiver);
+ receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
+ return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
}
- case PM_TOKEN_KEYWORD_UNLESS:
+ case PM_TOKEN_KEYWORD_UNLESS: {
+ size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
- return parse_conditional(parser, PM_CONTEXT_UNLESS);
+
+ return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
+ }
case PM_TOKEN_KEYWORD_MODULE: {
- parser_lex(parser);
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
pm_token_t module_keyword = parser->previous;
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_MODULE_NAME);
+
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
pm_token_t name;
// If we can recover from a syntax error that occurred while parsing
// the name of the module, then we'll handle that here.
if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
+ return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
}
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
pm_token_t double_colon = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
-
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
+ constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
}
// Here we retrieve the name of the module. If it wasn't a constant,
@@ -15520,108 +19245,181 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
}
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, true);
-
accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
pm_node_t *statements = NULL;
- if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE);
+ statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
- pm_parser_scope_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
+ pm_parser_scope_pop(parser);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
if (context_def_p(parser)) {
pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
}
- return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_NIL:
parser_lex(parser);
- return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
- case PM_TOKEN_KEYWORD_REDO:
+ return UP(pm_nil_node_create(parser, &parser->previous));
+ case PM_TOKEN_KEYWORD_REDO: {
parser_lex(parser);
- return (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
- case PM_TOKEN_KEYWORD_RETRY:
+
+ pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
+ if (!parser->partial_script) parse_block_exit(parser, node);
+
+ return node;
+ }
+ case PM_TOKEN_KEYWORD_RETRY: {
parser_lex(parser);
- return (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
+
+ pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
+ parse_retry(parser, node);
+
+ return node;
+ }
case PM_TOKEN_KEYWORD_SELF:
parser_lex(parser);
- return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
+ return UP(pm_self_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_TRUE:
parser_lex(parser);
- return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
+ return UP(pm_true_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_UNTIL: {
+ size_t opening_newline_index = token_newline_index(parser);
+
+ context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
pm_do_loop_stack_push(parser, true);
+
parser_lex(parser);
pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
pm_do_loop_stack_pop(parser);
+ context_pop(parser);
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
- pm_statements_node_t *statements = NULL;
+ pm_token_t do_keyword;
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+ do_keyword = parser->previous;
+ } else {
+ do_keyword = not_provided(parser);
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
+ }
- if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
+ pm_statements_node_t *statements = NULL;
+ if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = parse_statements(parser, PM_CONTEXT_UNTIL);
+ statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
}
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
+
+ return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_KEYWORD_WHILE: {
+ size_t opening_newline_index = token_newline_index(parser);
+
+ context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
pm_do_loop_stack_push(parser, true);
+
parser_lex(parser);
pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
pm_do_loop_stack_pop(parser);
+ context_pop(parser);
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
- pm_statements_node_t *statements = NULL;
+ pm_token_t do_keyword;
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+ do_keyword = parser->previous;
+ } else {
+ do_keyword = not_provided(parser);
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
+ }
- if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
+ pm_statements_node_t *statements = NULL;
+ if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
pm_accepts_block_stack_push(parser, true);
- statements = parse_statements(parser, PM_CONTEXT_WHILE);
+ statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
}
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
+
+ return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_PERCENT_LOWER_I: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+ parser_lex(parser);
+ pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t bounds = not_provided(parser);
+
+ pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+ pm_interpolated_symbol_node_append(interpolated, first_string);
+ pm_interpolated_symbol_node_append(interpolated, second_string);
+
+ xfree(current);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
@@ -15633,7 +19431,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PERCENT_UPPER_I: {
parser_lex(parser);
@@ -15668,27 +19466,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we hit content and the current node is NULL, then this is
// the first string content we've seen. In that case we're going
// to create a new string node and set that to the current.
- current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
+ current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
parser_lex(parser);
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
// If we hit string content and the current node is an
// interpolated string, then we need to append the string content
// to the list of child nodes.
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
parser_lex(parser);
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit string content and the current node is a string node,
+ // If we hit string content and the current node is a symbol node,
// then we need to convert the current node into an interpolated
// string and add the string content to the list of child nodes.
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t bounds = not_provided(parser);
+
+ pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
parser_lex(parser);
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_symbol_node_append(interpolated, current);
- pm_interpolated_symbol_node_append(interpolated, string);
- current = (pm_node_t *) interpolated;
+ pm_interpolated_symbol_node_append(interpolated, first_string);
+ pm_interpolated_symbol_node_append(interpolated, second_string);
+
+ xfree(current);
+ current = UP(interpolated);
} else {
assert(false && "unreachable");
}
@@ -15703,7 +19508,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// node to a new interpolated string.
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+ current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
// If we hit an embedded variable and the current node is a string
// node, then we'll convert the current into an interpolated
@@ -15712,17 +19517,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t closing = not_provided(parser);
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
pm_interpolated_symbol_node_append(interpolated, current);
interpolated->base.location.start = current->location.start;
start_location_set = true;
- current = (pm_node_t *) interpolated;
+ current = UP(interpolated);
} else {
// If we hit an embedded variable and the current node is an
// interpolated string, then we'll just add the embedded variable.
}
- pm_node_t *part = parse_string_part(parser);
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
if (!start_location_set) {
current->location.start = part->location.start;
@@ -15737,7 +19542,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// node to a new interpolated string.
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+ current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
// If we hit an embedded expression and the current node is a
// string node, then we'll convert the current into an
@@ -15747,11 +19552,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t closing = not_provided(parser);
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
pm_interpolated_symbol_node_append(interpolated, current);
interpolated->base.location.start = current->location.start;
start_location_set = true;
- current = (pm_node_t *) interpolated;
+ current = UP(interpolated);
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
// If we hit an embedded expression and the current node is an
// interpolated string, then we'll just continue on.
@@ -15759,7 +19564,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
assert(false && "unreachable");
}
- pm_node_t *part = parse_string_part(parser);
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
if (!start_location_set) {
current->location.start = part->location.start;
@@ -15787,29 +19592,48 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PERCENT_LOWER_W: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // skip all leading whitespaces
- accept1(parser, PM_TOKEN_WORDS_SEP);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- pm_array_node_elements_append(array, string);
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+ pm_interpolated_string_node_append(interpolated, current);
+ pm_interpolated_string_node_append(interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+ parser_lex(parser);
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
@@ -15821,7 +19645,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PERCENT_UPPER_W: {
parser_lex(parser);
@@ -15857,7 +19681,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
pm_node_flag_set(string, parse_unescaped_encoding(parser));
parser_lex(parser);
@@ -15880,7 +19704,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
pm_interpolated_string_node_append(interpolated, current);
pm_interpolated_string_node_append(interpolated, string);
- current = (pm_node_t *) interpolated;
+ current = UP(interpolated);
} else {
assert(false && "unreachable");
}
@@ -15895,7 +19719,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// interpolated string.
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+ current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
// If we hit an embedded variable and the current
// node is a string node, then we'll convert the
@@ -15905,14 +19729,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t closing = not_provided(parser);
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
+ current = UP(interpolated);
} else {
// If we hit an embedded variable and the current
// node is an interpolated string, then we'll just
// add the embedded variable.
}
- pm_node_t *part = parse_string_part(parser);
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
break;
}
@@ -15924,7 +19748,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// interpolated string.
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+ current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
// If we hit an embedded expression and the current
// node is a string node, then we'll convert the
@@ -15934,7 +19758,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t closing = not_provided(parser);
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
+ current = UP(interpolated);
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
// If we hit an embedded expression and the current
// node is an interpolated string, then we'll just
@@ -15943,7 +19767,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
assert(false && "unreachable");
}
- pm_node_t *part = parse_string_part(parser);
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
break;
}
@@ -15968,7 +19792,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_REGEXP_BEGIN: {
pm_token_t opening = parser->current;
@@ -15985,10 +19809,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
- return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
+ pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
+
+ return node;
}
- pm_interpolated_regular_expression_node_t *node;
+ pm_interpolated_regular_expression_node_t *interpolated;
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the regular
@@ -15997,36 +19825,56 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// regular expression) or if it's not then it has interpolation.
pm_string_t unescaped = parser->current_string;
pm_token_t content = parser->current;
+ bool ascii_only = parser->current_regular_expression_ascii_only;
parser_lex(parser);
- // If we hit an end, then we can create a regular expression node
- // without interpolation, which can be represented more succinctly and
- // more easily compiled.
+ // If we hit an end, then we can create a regular expression
+ // node without interpolation, which can be represented more
+ // succinctly and more easily compiled.
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
- return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+
+ // If we're not immediately followed by a =~, then we want
+ // to parse all of the errors at this point. If it is
+ // followed by a =~, then it will get parsed higher up while
+ // parsing the named captures as well.
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
+ parse_regular_expression_errors(parser, node);
+ }
+
+ pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
+ return UP(node);
}
// If we get here, then we have interpolation so we'll need to create
// a regular expression node with interpolation.
- node = pm_interpolated_regular_expression_node_create(parser, &opening);
+ interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
- pm_interpolated_regular_expression_node_append(node, part);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
+
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ // This is extremely strange, but the first string part of a
+ // regular expression will always be tagged as binary if we
+ // are in a US-ASCII file, no matter its contents.
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
+ }
+
+ pm_interpolated_regular_expression_node_append(interpolated, part);
} else {
// If the first part of the body of the regular expression is not a
// string content, then we have interpolation and we need to create an
// interpolated regular expression node.
- node = pm_interpolated_regular_expression_node_create(parser, &opening);
+ interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
}
// Now that we're here and we have interpolation, we'll parse all of the
// parts into the list.
pm_node_t *part;
while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
- pm_interpolated_regular_expression_node_append(node, part);
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+ pm_interpolated_regular_expression_node_append(interpolated, part);
}
}
@@ -16037,9 +19885,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
} else {
expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
}
- pm_interpolated_regular_expression_node_closing_set(node, &closing);
- return (pm_node_t *) node;
+ pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
+ return UP(interpolated);
}
case PM_TOKEN_BACKTICK:
case PM_TOKEN_PERCENT_LOWER_X: {
@@ -16061,7 +19909,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
- return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
+ return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
}
pm_interpolated_x_string_node_t *node;
@@ -16076,7 +19924,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
if (match1(parser, PM_TOKEN_STRING_END)) {
- pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
return node;
@@ -16089,7 +19937,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
pm_node_flag_set(part, parse_unescaped_encoding(parser));
pm_interpolated_xstring_node_append(node, part);
@@ -16102,7 +19950,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *part;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
+ if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
pm_interpolated_xstring_node_append(node, part);
}
}
@@ -16116,7 +19964,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_interpolated_xstring_node_closing_set(node, &closing);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_USTAR: {
parser_lex(parser);
@@ -16125,63 +19973,74 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// context of a multiple assignment. We enforce that here. We'll
// still lex past it though and create a missing node place.
if (binding_power != PM_BINDING_POWER_STATEMENT) {
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ pm_parser_err_prefix(parser, diag_id);
+ return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
}
pm_token_t operator = parser->previous;
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
if (match1(parser, PM_TOKEN_COMMA)) {
- return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
+ return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
} else {
- return parse_target_validate(parser, splat);
+ return parse_target_validate(parser, splat, true);
}
}
case PM_TOKEN_BANG: {
+ if (binding_power > PM_BINDING_POWER_UNARY) {
+ pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+ }
+
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
- pm_conditional_predicate(receiver);
- return (pm_node_t *) node;
+ pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
+ return UP(node);
}
case PM_TOKEN_TILDE: {
+ if (binding_power > PM_BINDING_POWER_UNARY) {
+ pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+ }
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS: {
+ if (binding_power > PM_BINDING_POWER_UNARY) {
+ pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+ }
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS_NUM: {
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
pm_token_t exponent_operator = parser->previous;
- pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, PM_ERR_EXPECT_ARGUMENT);
- node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent);
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
} else {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE:
@@ -16191,7 +20050,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parse_negative_numeric(node);
break;
default:
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
break;
}
}
@@ -16202,25 +20061,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
parser->lambda_enclosure_nesting = parser->enclosure_nesting;
+ size_t opening_newline_index = token_newline_index(parser);
pm_accepts_block_stack_push(parser, true);
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
pm_parser_scope_push(parser, false);
pm_block_parameters_node_t *block_parameters;
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
- parser->current_scope->explicit_params = true;
pm_token_t opening = parser->current;
parser_lex(parser);
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
} else {
- block_parameters = parse_block_parameters(parser, false, &opening, true);
+ block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
}
accept1(parser, PM_TOKEN_NEWLINE);
@@ -16230,10 +20088,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
break;
}
case PM_CASE_PARAMETER: {
- parser->current_scope->explicit_params = true;
pm_accepts_block_stack_push(parser, false);
pm_token_t opening = not_provided(parser);
- block_parameters = parse_block_parameters(parser, false, &opening, true);
+ block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
break;
}
@@ -16243,12 +20100,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
}
- uint32_t locals_body_index = 0;
-
- if (block_parameters) {
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
- }
-
pm_token_t opening;
pm_node_t *body = NULL;
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -16256,121 +20107,230 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
opening = parser->previous;
- if (!accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
+ if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
}
+
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
} else {
expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
opening = parser->previous;
if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
pm_accepts_block_stack_push(parser, true);
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END);
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
- body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
+ body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
- }
-
- pm_node_t *parameters = (pm_node_t *) block_parameters;
- uint8_t maximum = parser->current_scope->numbered_parameters;
-
- if (parameters == NULL && (maximum > 0)) {
- parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
- locals_body_index = maximum;
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
- pm_parser_current_param_name_restore(parser, saved_param_name);
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
+ return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
}
case PM_TOKEN_UPLUS: {
+ if (binding_power > PM_BINDING_POWER_UNARY) {
+ pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+ }
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_STRING_BEGIN:
- return parse_strings(parser, NULL);
+ return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
- return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
+ return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
}
- default:
- if (context_recoverable(parser, &parser->current)) {
+ default: {
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
+
+ if (recoverable != PM_CONTEXT_NONE) {
parser->recovering = true;
+
+ // If the given error is not the generic one, then we'll add it
+ // here because it will provide more context in addition to the
+ // recoverable error that we will also add.
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
+ pm_parser_err_prefix(parser, diag_id);
+ }
+
+ // If we get here, then we are assuming this token is closing a
+ // parent context, so we'll indicate that to the user so that
+ // they know how we behaved.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
+ // We're going to make a special case here, because "cannot
+ // parse expression" is pretty generic, and we know here that we
+ // have an unexpected token.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+ } else {
+ pm_parser_err_prefix(parser, diag_id);
}
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+ }
}
}
-static inline pm_node_t *
-parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
- pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
+/**
+ * Parse a value that is going to be written to some kind of variable or method
+ * call. We need to handle this separately because the rescue modifier is
+ * permitted on the end of the these expressions, which is a deviation from its
+ * normal binding power.
+ *
+ * Note that this will only be called after an operator write, as in &&=, ||=,
+ * or any of the binary operators that can be written to a variable.
+ */
+static pm_node_t *
+parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
- // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
+ // Contradicting binding powers, the right-hand-side value of the assignment
+ // allows the `rescue` modifier.
if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
pm_token_t rescue = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
}
+/**
+ * When a local variable write node is the value being written in a different
+ * write, the local variable is considered "used".
+ */
+static void
+parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_BEGIN_NODE: {
+ const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
+ if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
+ break;
+ }
+ case PM_LOCAL_VARIABLE_WRITE_NODE: {
+ const pm_local_variable_write_node_t *cast = (const pm_local_variable_write_node_t *) node;
+ pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
+ break;
+ }
+ case PM_PARENTHESES_NODE: {
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
+ if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
+ break;
+ }
+ case PM_STATEMENTS_NODE: {
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
+ const pm_node_t *statement;
+
+ PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
+ parse_assignment_value_local(parser, statement);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+/**
+ * Parse the value (or values, through an implicit array) that is going to be
+ * written to some kind of variable or method call. We need to handle this
+ * separately because the rescue modifier is permitted on the end of the these
+ * expressions, which is a deviation from its normal binding power.
+ *
+ * Additionally, if the value is a local variable write node (e.g., a = a = 1),
+ * the "a" is marked as being used so the parser should not warn on it.
+ *
+ * Note that this will only be called after an = operator, as that is the only
+ * operator that allows multiple values after it.
+ */
+static pm_node_t *
+parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ bool permitted = true;
+ if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
-static inline pm_node_t *
-parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
- pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
+ pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
+ if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
+
+ parse_assignment_value_local(parser, value);
+ bool single_value = true;
- bool is_single_value = true;
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
- is_single_value = false;
+ single_value = false;
+
pm_token_t opening = not_provided(parser);
pm_array_node_t *array = pm_array_node_create(parser, &opening);
pm_array_node_elements_append(array, value);
- value = (pm_node_t *) array;
+ value = UP(array);
while (accept1(parser, PM_TOKEN_COMMA)) {
- pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT);
+ pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
+
pm_array_node_elements_append(array, element);
if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+
+ parse_assignment_value_local(parser, element);
}
}
- // Contradicting binding powers, the right-hand-side value of the assignment allows the `rescue` modifier.
- if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ // Contradicting binding powers, the right-hand-side value of the assignment
+ // allows the `rescue` modifier.
+ if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
pm_token_t rescue = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ bool accepts_command_call_inner = false;
+
+ // RHS can accept command call iff the value is a call with arguments
+ // but without parenthesis.
+ if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
+ pm_call_node_t *call_node = (pm_call_node_t *) value;
+ if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
+ accepts_command_call_inner = true;
+ }
+ }
+
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
}
/**
- * Ensures a call node that is about to become a call operator node does not
+ * Ensure a call node that is about to become a call operator node does not
* have arguments or a block attached. If it does, then we'll need to add an
* error message and destroy the arguments/block. Ideally we would keep the node
* around so that consumers would still have access to it, but we don't have a
@@ -16380,129 +20340,288 @@ static void
parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
if (call_node->arguments != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
- pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
+ pm_node_unreference(parser, UP(call_node->arguments));
+ pm_node_destroy(parser, UP(call_node->arguments));
call_node->arguments = NULL;
}
if (call_node->block != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
- pm_node_destroy(parser, (pm_node_t *) call_node->block);
+ pm_node_unreference(parser, UP(call_node->block));
+ pm_node_destroy(parser, UP(call_node->block));
call_node->block = NULL;
}
}
-static bool
-name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
- if (length == 0) {
- return false;
+/**
+ * This struct is used to pass information between the regular expression parser
+ * and the named capture callback.
+ */
+typedef struct {
+ /** The parser that is parsing the regular expression. */
+ pm_parser_t *parser;
+
+ /** The call node wrapping the regular expression node. */
+ pm_call_node_t *call;
+
+ /** The match write node that is being created. */
+ pm_match_write_node_t *match;
+
+ /** The list of names that have been parsed. */
+ pm_constant_id_list_t names;
+
+ /**
+ * Whether the content of the regular expression is shared. This impacts
+ * whether or not we used owned constants or shared constants in the
+ * constant pool for the names of the captures.
+ */
+ bool shared;
+} parse_regular_expression_named_capture_data_t;
+
+static inline const uint8_t *
+pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+ cursor++;
+
+ if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
+ uint8_t value = escape_hexadecimal_digit(*cursor);
+ cursor++;
+
+ if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
+ value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
+ cursor++;
+ }
+
+ pm_buffer_append_byte(unescaped, value);
+ } else {
+ pm_buffer_append_string(unescaped, "\\x", 2);
}
- size_t width = char_is_identifier_start(parser, source);
- if (!width) {
- return false;
+ return cursor;
+}
+
+static inline const uint8_t *
+pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+ uint8_t value = (uint8_t) (*cursor - '0');
+ cursor++;
+
+ if (cursor < end && pm_char_is_octal_digit(*cursor)) {
+ value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
+ cursor++;
+
+ if (cursor < end && pm_char_is_octal_digit(*cursor)) {
+ value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
+ cursor++;
+ }
}
- uint8_t *cursor = ((uint8_t *)source) + width;
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
- cursor += width;
+ pm_buffer_append_byte(unescaped, value);
+ return cursor;
+}
+
+static inline const uint8_t *
+pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
+ const uint8_t *start = cursor - 1;
+ cursor++;
+
+ if (cursor >= end) {
+ pm_buffer_append_string(unescaped, "\\u", 2);
+ return cursor;
+ }
+
+ if (*cursor != '{') {
+ size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
+ uint32_t value = escape_unicode(parser, cursor, length, error_location);
+
+ if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
+ pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
+ }
+
+ return cursor + length;
+ }
+
+ cursor++;
+ for (;;) {
+ while (cursor < end && *cursor == ' ') cursor++;
+
+ if (cursor >= end) break;
+ if (*cursor == '}') {
+ cursor++;
+ break;
+ }
+
+ size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
+ if (length == 0) {
+ break;
+ }
+ uint32_t value = escape_unicode(parser, cursor, length, error_location);
+
+ (void) pm_buffer_append_unicode_codepoint(unescaped, value);
+ cursor += length;
+ }
+
+ return cursor;
+}
+
+static void
+pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
+ const uint8_t *end = source + length;
+ pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
+
+ for (;;) {
+ if (++cursor >= end) {
+ pm_buffer_append_byte(unescaped, '\\');
+ return;
+ }
+
+ switch (*cursor) {
+ case 'x':
+ cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
+ break;
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+ cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
+ break;
+ case 'u':
+ cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
+ break;
+ default:
+ pm_buffer_append_byte(unescaped, '\\');
+ break;
+ }
+
+ const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
+ if (next_cursor == NULL) break;
+
+ pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
+ cursor = next_cursor;
}
- return cursor == source + length;
+ pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
}
/**
- * Potentially change a =~ with a regular expression with named captures into a
- * match write node.
+ * This callback is called when the regular expression parser encounters a named
+ * capture group.
*/
-static pm_node_t *
-parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
- pm_string_list_t named_captures = { 0 };
- pm_node_t *result;
-
- if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
- // Since we should not create a MatchWriteNode when all capture names
- // are invalid, creating a MatchWriteNode is delayed here.
- pm_match_write_node_t *match = NULL;
- pm_constant_id_list_t names = { 0 };
-
- for (size_t index = 0; index < named_captures.length; index++) {
- pm_string_t *string = &named_captures.strings[index];
-
- const uint8_t *source = pm_string_source(string);
- size_t length = pm_string_length(string);
-
- pm_location_t location;
- pm_constant_id_t name;
-
- // If the name of the capture group isn't a valid identifier, we do
- // not add it to the local table.
- if (!name_is_identifier(parser, source, length)) continue;
-
- if (content->type == PM_STRING_SHARED) {
- // If the unescaped string is a slice of the source, then we can
- // copy the names directly. The pointers will line up.
- location = (pm_location_t) { .start = source, .end = source + length };
- name = pm_parser_constant_id_location(parser, location.start, location.end);
- pm_refute_numbered_parameter(parser, source, source + length);
- } else {
- // Otherwise, the name is a slice of the malloc-ed owned string,
- // in which case we need to copy it out into a new string.
- location = call->receiver->location;
-
- void *memory = malloc(length);
- if (memory == NULL) abort();
+static void
+parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
+ parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
+
+ pm_parser_t *parser = callback_data->parser;
+ pm_call_node_t *call = callback_data->call;
+ pm_constant_id_list_t *names = &callback_data->names;
+
+ const uint8_t *source = pm_string_source(capture);
+ size_t length = pm_string_length(capture);
+ pm_buffer_t unescaped = { 0 };
+
+ // First, we need to handle escapes within the name of the capture group.
+ // This is because regular expressions have three different representations
+ // in prism. The first is the plain source code. The second is the
+ // representation that will be sent to the regular expression engine, which
+ // is the value of the "unescaped" field. This is poorly named, because it
+ // actually still contains escapes, just a subset of them that the regular
+ // expression engine knows how to handle. The third representation is fully
+ // unescaped, which is what we need.
+ const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
+ if (PRISM_UNLIKELY(cursor != NULL)) {
+ pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
+ source = (const uint8_t *) pm_buffer_value(&unescaped);
+ length = pm_buffer_length(&unescaped);
+ }
- memcpy(memory, source, length);
- // This silences clang analyzer warning about leak of memory pointed by `memory`.
- // NOLINTNEXTLINE(clang-analyzer-*)
- name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
+ pm_location_t location;
+ pm_constant_id_t name;
- if (pm_token_is_numbered_parameter(source, source + length)) {
- const pm_location_t *location = &call->receiver->location;
- PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start);
- }
- }
+ // If the name of the capture group isn't a valid identifier, we do
+ // not add it to the local table.
+ if (!pm_slice_is_valid_local(parser, source, source + length)) {
+ pm_buffer_free(&unescaped);
+ return;
+ }
- if (name != 0) {
- // We dont want to create duplicate targets if the capture name
- // is duplicated.
- if (pm_constant_id_list_includes(&names, name)) continue;
- pm_constant_id_list_append(&names, name);
+ if (callback_data->shared) {
+ // If the unescaped string is a slice of the source, then we can
+ // copy the names directly. The pointers will line up.
+ location = (pm_location_t) { .start = source, .end = source + length };
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
+ } else {
+ // Otherwise, the name is a slice of the malloc-ed owned string,
+ // in which case we need to copy it out into a new string.
+ location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
- // Here we lazily create the MatchWriteNode since we know we're
- // about to add a target.
- if (match == NULL) match = pm_match_write_node_create(parser, call);
+ void *memory = xmalloc(length);
+ if (memory == NULL) abort();
- // First, find the depth of the local that is being assigned.
- int depth;
- if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
- pm_parser_local_add(parser, name);
- }
+ memcpy(memory, source, length);
+ name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+ }
- // Next, create the local variable target and add it to the
- // list of targets for the match.
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
- pm_node_list_append(&match->targets, target);
+ // Add this name to the list of constants if it is valid, not duplicated,
+ // and not a keyword.
+ if (name != 0 && !pm_constant_id_list_includes(names, name)) {
+ pm_constant_id_list_append(names, name);
+
+ int depth;
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
+ // If the local is not already a local but it is a keyword, then we
+ // do not want to add a capture for this.
+ if (pm_local_is_keyword((const char *) source, length)) {
+ pm_buffer_free(&unescaped);
+ return;
}
+
+ // If the identifier is not already a local, then we will add it to
+ // the local table.
+ pm_parser_local_add(parser, name, location.start, location.end, 0);
}
- if (match != NULL) {
- result = (pm_node_t *) match;
- } else {
- result = (pm_node_t *) call;
+ // Here we lazily create the MatchWriteNode since we know we're
+ // about to add a target.
+ if (callback_data->match == NULL) {
+ callback_data->match = pm_match_write_node_create(parser, call);
}
- pm_constant_id_list_free(&names);
- } else {
- result = (pm_node_t *) call;
+ // Next, create the local variable target and add it to the list of
+ // targets for the match.
+ pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
+ pm_node_list_append(&callback_data->match->targets, target);
}
- pm_string_list_free(&named_captures);
- return result;
+ pm_buffer_free(&unescaped);
+}
+
+/**
+ * Potentially change a =~ with a regular expression with named captures into a
+ * match write node.
+ */
+static pm_node_t *
+parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
+ parse_regular_expression_named_capture_data_t callback_data = {
+ .parser = parser,
+ .call = call,
+ .names = { 0 },
+ .shared = content->type == PM_STRING_SHARED
+ };
+
+ parse_regular_expression_error_data_t error_data = {
+ .parser = parser,
+ .start = call->receiver->location.start,
+ .end = call->receiver->location.end,
+ .shared = content->type == PM_STRING_SHARED
+ };
+
+ pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
+ pm_constant_id_list_free(&callback_data.names);
+
+ if (callback_data.match != NULL) {
+ return UP(callback_data.match);
+ } else {
+ return UP(call);
+ }
}
static inline pm_node_t *
-parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call) {
+parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
pm_token_t token = parser->current;
switch (token.type) {
@@ -16514,14 +20633,26 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// local variable write. This _must_ happen before the value
// is parsed because it could be referenced in the value.
pm_call_node_t *call_node = (pm_call_node_t *) node;
- if (pm_call_node_variable_call_p(call_node)) {
- pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end);
+ if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
+ pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
}
}
- /* fallthrough */
+ PRISM_FALLTHROUGH
case PM_CASE_WRITABLE: {
+ // When we have `it = value`, we need to add `it` as a local
+ // variable before parsing the value, in case the value
+ // references the variable.
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
+ }
+
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+
+ if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
+ }
+
return parse_write(parser, node, &token, value);
}
case PM_SPLAT_NODE: {
@@ -16529,16 +20660,28 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_multi_target_node_targets_append(parser, multi_target, node);
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
- return parse_write(parser, (pm_node_t *) multi_target, &token, value);
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ return parse_write(parser, UP(multi_target), &token, value);
+ }
+ case PM_SOURCE_ENCODING_NODE:
+ case PM_FALSE_NODE:
+ case PM_SOURCE_FILE_NODE:
+ case PM_SOURCE_LINE_NODE:
+ case PM_NIL_NODE:
+ case PM_SELF_NODE:
+ case PM_TRUE_NODE: {
+ // In these special cases, we have specific error messages
+ // and we will replace them with local variable writes.
+ parser_lex(parser);
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ return parse_unwriteable_write(parser, node, &token, value);
}
default:
+ // In this case we have an = sign, but we don't know what
+ // it's for. We need to treat it as an error. We'll mark it
+ // as an error and skip past it.
parser_lex(parser);
-
- // In this case we have an = sign, but we don't know what it's for. We
- // need to treat it as an error. For now, we'll mark it as an error
- // and just skip right past it.
- pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
+ pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
return node;
}
}
@@ -16546,13 +20689,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
switch (PM_NODE_TYPE(node)) {
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
- /* fallthrough */
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+ PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16560,8 +20703,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16569,74 +20712,97 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+ return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
- return result;
+ return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
+
+ pm_node_destroy(parser, node);
+ return result;
+ }
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ parser_lex(parser);
+
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
+ pm_node_unreference(parser, node);
pm_node_destroy(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+ pm_node_unreference(parser, node);
+ }
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
- parser_lex(parser);
pm_call_node_t *cast = (pm_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
- if (pm_call_node_variable_call_p(cast)) {
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
pm_location_t *message_loc = &cast->message_loc;
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ parser_lex(parser);
- pm_node_destroy(parser, (pm_node_t *) cast);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
+
+ pm_node_destroy(parser, UP(cast));
return result;
}
+ // Move past the token here so that we have already added
+ // the local variable by this point.
+ parser_lex(parser);
+
// If there is no call operator and the message is "[]" then
// this is an aref expression, and we can transform it into
// an aset expression.
- if (pm_call_node_index_p(cast)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_and_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
- if (pm_call_node_writable_p(cast)) {
+ if (pm_call_node_writable_p(parser, cast)) {
parse_write_name(parser, &cast->name);
} else {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
- return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_and_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -16657,13 +20823,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
switch (PM_NODE_TYPE(node)) {
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
- /* fallthrough */
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+ PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16671,8 +20837,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16680,74 +20846,97 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+ return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
- return result;
+ return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
return result;
}
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ parser_lex(parser);
+
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
+
+ pm_node_unreference(parser, node);
+ pm_node_destroy(parser, node);
+ return result;
+ }
case PM_LOCAL_VARIABLE_READ_NODE: {
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+ pm_node_unreference(parser, node);
+ }
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
- parser_lex(parser);
pm_call_node_t *cast = (pm_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
- if (pm_call_node_variable_call_p(cast)) {
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
pm_location_t *message_loc = &cast->message_loc;
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ parser_lex(parser);
+
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
+ pm_node_destroy(parser, UP(cast));
return result;
}
+ // Move past the token here so that we have already added
+ // the local variable by this point.
+ parser_lex(parser);
+
// If there is no call operator and the message is "[]" then
// this is an aref expression, and we can transform it into
// an aset expression.
- if (pm_call_node_index_p(cast)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_or_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
- if (pm_call_node_writable_p(cast)) {
+ if (pm_call_node_writable_p(parser, cast)) {
parse_write_name(parser, &cast->name);
} else {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
- return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_or_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -16778,13 +20967,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
switch (PM_NODE_TYPE(node)) {
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
- /* fallthrough */
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+ PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16792,8 +20981,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
return result;
@@ -16801,33 +20990,51 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+ return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
pm_node_destroy(parser, node);
- return result;
+ return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
+
+ pm_node_destroy(parser, node);
+ return result;
+ }
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+ parser_lex(parser);
+
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
+ pm_node_unreference(parser, node);
pm_node_destroy(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
+ if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+ pm_node_unreference(parser, node);
+ }
+
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
pm_node_destroy(parser, node);
return result;
@@ -16839,36 +21046,36 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
- if (pm_call_node_variable_call_p(cast)) {
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
pm_location_t *message_loc = &cast->message_loc;
pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
+ pm_node_destroy(parser, UP(cast));
return result;
}
// If there is no call operator and the message is "[]" then
// this is an aref expression, and we can transform it into
// an aset expression.
- if (pm_call_node_index_p(cast)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
+ if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
- if (pm_call_node_writable_p(cast)) {
+ if (pm_call_node_writable_p(parser, cast)) {
parse_write_name(parser, &cast->name);
} else {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -16881,7 +21088,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In this case we have an operator but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
return node;
}
}
@@ -16889,15 +21096,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_TOKEN_KEYWORD_AND: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_and_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_OR:
case PM_TOKEN_PIPE_PIPE: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_or_node_create(parser, node, &token, right));
}
case PM_TOKEN_EQUAL_TILDE: {
// Note that we _must_ parse the value before adding the local
@@ -16908,11 +21115,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
//
// In this case, `foo` should be a method call and not a local yet.
parser_lex(parser);
- pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
+ pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// By default, we're going to create a call node and then return it.
- pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument);
- pm_node_t *result = (pm_node_t *) call;
+ pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
+ pm_node_t *result = UP(call);
// If the receiver of this =~ is a regular expression node, then we
// need to introduce local variables for it based on its named
@@ -16927,9 +21134,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
bool interpolated = false;
size_t total_length = 0;
- for (size_t index = 0; index < parts->size; index++) {
- pm_node_t *part = parts->nodes[index];
-
+ pm_node_t *part;
+ PM_NODE_LIST_FOREACH(parts, index, part) {
if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
} else {
@@ -16939,12 +21145,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
if (!interpolated && total_length > 0) {
- void *memory = malloc(total_length);
+ void *memory = xmalloc(total_length);
if (!memory) abort();
uint8_t *cursor = memory;
- for (size_t index = 0; index < parts->size; index++) {
- pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
+ PM_NODE_LIST_FOREACH(parts, index, part) {
+ pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
size_t length = pm_string_length(unescaped);
memcpy(cursor, pm_string_source(unescaped), length);
@@ -16954,14 +21160,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_string_t owned;
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
- result = parse_regular_expression_named_captures(parser, &owned, call);
+ result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
pm_string_free(&owned);
}
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
// If we have a regular expression node, then we can just parse
// the named captures directly off the unescaped string.
const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
- result = parse_regular_expression_named_captures(parser, content, call);
+ result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
}
return result;
@@ -16976,10 +21182,6 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_TOKEN_EQUAL_EQUAL:
case PM_TOKEN_EQUAL_EQUAL_EQUAL:
case PM_TOKEN_LESS_EQUAL_GREATER:
- case PM_TOKEN_GREATER:
- case PM_TOKEN_GREATER_EQUAL:
- case PM_TOKEN_LESS:
- case PM_TOKEN_LESS_EQUAL:
case PM_TOKEN_CARET:
case PM_TOKEN_PIPE:
case PM_TOKEN_AMPERSAND:
@@ -16992,9 +21194,47 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_TOKEN_STAR:
case PM_TOKEN_STAR_STAR: {
parser_lex(parser);
+ pm_token_t operator = parser->previous;
+ switch (PM_NODE_TYPE(node)) {
+ case PM_RESCUE_MODIFIER_NODE: {
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ case PM_AND_NODE: {
+ pm_and_node_t *cast = (pm_and_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ case PM_OR_NODE: {
+ pm_or_node_t *cast = (pm_or_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ default:
+ break;
+ }
- pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument);
+ pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
+ }
+ case PM_TOKEN_GREATER:
+ case PM_TOKEN_GREATER_EQUAL:
+ case PM_TOKEN_LESS:
+ case PM_TOKEN_LESS_EQUAL: {
+ if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
+ }
+
+ parser_lex(parser);
+ pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
}
case PM_TOKEN_AMPERSAND_DOT:
case PM_TOKEN_DOT: {
@@ -17004,8 +21244,34 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// This if statement handles the foo.() syntax.
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
- parse_arguments_list(parser, &arguments, true, false);
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
+ parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
+ return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
+ }
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_RESCUE_MODIFIER_NODE: {
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ case PM_AND_NODE: {
+ pm_and_node_t *cast = (pm_and_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ case PM_OR_NODE: {
+ pm_or_node_t *cast = (pm_or_node_t *) node;
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ }
+ break;
+ }
+ default:
+ break;
}
pm_token_t message;
@@ -17021,12 +21287,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
break;
}
default: {
- pm_parser_err_current(parser, PM_ERR_DEF_NAME);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
}
}
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
if (
@@ -17035,9 +21301,9 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
arguments.opening_loc.start == NULL &&
match1(parser, PM_TOKEN_COMMA)
) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX);
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
} else {
- return (pm_node_t *) call;
+ return UP(call);
}
}
case PM_TOKEN_DOT_DOT:
@@ -17046,45 +21312,50 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *right = NULL;
if (token_begins_expression_p(parser->current.type)) {
- right = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
+ right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
+ return UP(pm_range_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_IF_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
- return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
- return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, node);
+ pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
- return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(statements, node);
+ pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
- return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_QUESTION_MARK: {
+ context_push(parser, PM_CONTEXT_TERNARY);
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
pm_token_t qmark = parser->current;
parser_lex(parser);
- pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_TRUE);
+
+ pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
if (parser->recovering) {
// If parsing the true expression of this ternary resulted in a syntax
@@ -17094,18 +21365,26 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// accidentally move past a ':' token that occurs after the syntax
// error.
pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
+ pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
+
+ context_pop(parser);
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
pm_token_t colon = parser->previous;
- pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_FALSE);
+ pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ context_pop(parser);
+ pop_block_exits(parser, previous_block_exits);
+ pm_node_list_free(&current_block_exits);
+
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
case PM_TOKEN_COLON_COLON: {
parser_lex(parser);
@@ -17118,7 +21397,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
- (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
+ (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
) {
// If we have a constant immediately following a '::' operator, then
// this can either be a constant path or a method call, depending on
@@ -17129,17 +21408,16 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t message = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
- path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
} else {
// Otherwise, this is a constant path. That would look like Foo::Bar.
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+ path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX);
+ return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
return path;
@@ -17154,37 +21432,39 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// If we have an identifier following a '::' operator, then it is for
// sure a method call.
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call);
+ parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX);
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
- return (pm_node_t *) call;
+ return UP(call);
}
case PM_TOKEN_PARENTHESIS_LEFT: {
// If we have a parenthesis following a '::' operator, then it is the
// method call shorthand. That would look like Foo::(bar).
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, false);
+ parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
+ return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
}
default: {
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
}
}
case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
parser_lex(parser);
accept1(parser, PM_TOKEN_NEWLINE);
- pm_node_t *value = parse_expression(parser, binding_power, true, PM_ERR_RESCUE_MODIFIER_VALUE);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
}
case PM_TOKEN_BRACKET_LEFT: {
parser_lex(parser);
@@ -17194,7 +21474,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT);
+ parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
}
@@ -17205,7 +21485,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// assignment and we should parse the targets.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
- return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX);
+ return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
// If we're at the end of the arguments, we can now check if there is a
@@ -17213,25 +21493,25 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// add it to the arguments.
pm_block_node_t *block = NULL;
if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
- block = parse_block(parser);
+ block = parse_block(parser, (uint16_t) (depth + 1));
pm_arguments_validate_block(parser, &arguments, block);
} else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
- block = parse_block(parser);
+ block = parse_block(parser, (uint16_t) (depth + 1));
}
if (block != NULL) {
if (arguments.block != NULL) {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
if (arguments.arguments == NULL) {
arguments.arguments = pm_arguments_node_create(parser);
}
pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
}
- arguments.block = (pm_node_t *) block;
+ arguments.block = UP(block);
}
- return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
+ return UP(pm_call_node_aref_create(parser, node, &arguments));
}
case PM_TOKEN_KEYWORD_IN: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17240,13 +21520,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t operator = parser->current;
parser->command_start = false;
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-
parser_lex(parser);
- pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
+ pm_constant_id_list_t captures = { 0 };
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+ pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
}
case PM_TOKEN_EQUAL_GREATER: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17255,13 +21537,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t operator = parser->current;
parser->command_start = false;
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-
parser_lex(parser);
- pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
+ pm_constant_id_list_t captures = { 0 };
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
+
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+ pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_required_node_create(parser, node, pattern, &operator));
}
default:
assert(false && "unreachable");
@@ -17269,6 +21553,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
}
+#undef PM_PARSE_PATTERN_SINGLE
+#undef PM_PARSE_PATTERN_TOP
+#undef PM_PARSE_PATTERN_MULTI
+
+/**
+ * Determine if a given call node looks like a "command", which means it has
+ * arguments but does not have parentheses.
+ */
+static inline bool
+pm_call_node_command_p(const pm_call_node_t *node) {
+ return (
+ (node->opening_loc.start == NULL) &&
+ (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
+ (node->arguments != NULL || node->block != NULL)
+ );
+}
+
/**
* Parse an expression at the given point of the parser using the given binding
* power to parse subsequent chains. If this function finds a syntax error, it
@@ -17278,34 +21579,46 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
* determine if they need to perform additional cleanup.
*/
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
- pm_token_t recovery = parser->previous;
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
+ pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
+ return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+ }
+
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
switch (PM_NODE_TYPE(node)) {
case PM_MISSING_NODE:
// If we found a syntax error, then the type of node returned by
- // parse_expression_prefix is going to be a missing node. In that
- // case we need to add the error message to the parser's error list.
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
+ // parse_expression_prefix is going to be a missing node.
return node;
case PM_PRE_EXECUTION_NODE:
case PM_POST_EXECUTION_NODE:
case PM_ALIAS_GLOBAL_VARIABLE_NODE:
case PM_ALIAS_METHOD_NODE:
+ case PM_MULTI_WRITE_NODE:
case PM_UNDEF_NODE:
// These expressions are statements, and cannot be followed by
// operators (except modifiers).
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+ return node;
+ }
+ break;
+ case PM_CALL_NODE:
+ // If we have a call node, then we need to check if it looks like a
+ // method call without parentheses that contains arguments. If it
+ // does, then it has different rules for parsing infix operators,
+ // namely that it only accepts composition (and/or) and modifiers
+ // (if/unless/etc.).
+ if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
return node;
}
break;
- case PM_RANGE_NODE:
- // Range operators are non-associative, so that it does not
- // associate with other range operators (i.e. `..1..` should be
- // rejected.) For this reason, we check such a case for unary ranges
- // here, and if so, it returns the node immediately,
- if ((((pm_range_node_t *) node)->left == NULL) && pm_binding_powers[parser->current.type].left >= PM_BINDING_POWER_RANGE) {
+ case PM_SYMBOL_NODE:
+ // If we have a symbol node that is being parsed as a label, then we
+ // need to immediately return, because there should never be an
+ // infix operator following this node.
+ if (pm_symbol_node_label_p(node)) {
return node;
}
break;
@@ -17316,25 +21629,85 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
// Otherwise we'll look and see if the next token can be parsed as an infix
// operator. If it can, then we'll parse it using parse_expression_infix.
pm_binding_powers_t current_binding_powers;
+ pm_token_type_t current_token_type;
+
while (
- current_binding_powers = pm_binding_powers[parser->current.type],
+ current_token_type = parser->current.type,
+ current_binding_powers = pm_binding_powers[current_token_type],
binding_power <= current_binding_powers.left &&
current_binding_powers.binary
) {
- node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call);
+ node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
+
+ if (context_terminator(parser->current_context->context, &parser->current)) {
+ // If this token terminates the current context, then we need to
+ // stop parsing the expression, as it has become a statement.
+ return node;
+ }
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_MULTI_WRITE_NODE:
+ // Multi-write nodes are statements, and cannot be followed by
+ // operators except modifiers.
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+ return node;
+ }
+ break;
+ case PM_CLASS_VARIABLE_WRITE_NODE:
+ case PM_CONSTANT_PATH_WRITE_NODE:
+ case PM_CONSTANT_WRITE_NODE:
+ case PM_GLOBAL_VARIABLE_WRITE_NODE:
+ case PM_INSTANCE_VARIABLE_WRITE_NODE:
+ case PM_LOCAL_VARIABLE_WRITE_NODE:
+ // These expressions are statements, by virtue of the right-hand
+ // side of their write being an implicit array.
+ if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+ return node;
+ }
+ break;
+ case PM_CALL_NODE:
+ // These expressions are also statements, by virtue of the
+ // right-hand side of the expression (i.e., the last argument to
+ // the call node) being an implicit array.
+ if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+ return node;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // If the operator is nonassoc and we should not be able to parse the
+ // upcoming infix operator, break.
if (current_binding_powers.nonassoc) {
- bool endless_range_p = PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL;
- pm_binding_power_t left = endless_range_p ? PM_BINDING_POWER_TERM : current_binding_powers.left;
- if (
- left <= pm_binding_powers[parser->current.type].left ||
- // Exceptionally to operator precedences, '1.. & 2' is rejected.
- // '1.. || 2' is also an exception, but it is handled by the lexer.
- // (Here, parser->current is PM_TOKEN_PIPE, not PM_TOKEN_PIPE_PIPE).
- (endless_range_p && match1(parser, PM_TOKEN_AMPERSAND))
- ) {
+ // If this is a non-assoc operator and we are about to parse the
+ // exact same operator, then we need to add an error.
+ if (match1(parser, current_token_type)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ break;
+ }
+
+ // If this is an endless range, then we need to reject a couple of
+ // additional operators because it violates the normal operator
+ // precedence rules. Those patterns are:
+ //
+ // 1.. & 2
+ // 1.. * 2
+ //
+ if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
+ if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ break;
+ }
+
+ if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
+ break;
+ }
+ } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
break;
}
}
+
if (accepts_command_call) {
// A command-style method call is only accepted on method chains.
// Thus, we check whether the parsed node can continue method chains.
@@ -17389,6 +21762,89 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
return node;
}
+/**
+ * ruby -p, ruby -n, ruby -a, and ruby -l options will mutate the AST. We
+ * perform that mutation here.
+ */
+static pm_statements_node_t *
+wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
+ if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
+ if (statements == NULL) {
+ statements = pm_statements_node_create(parser);
+ }
+
+ pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+ pm_arguments_node_arguments_append(
+ arguments,
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
+ );
+
+ pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
+ parser,
+ arguments,
+ pm_parser_constant_id_constant(parser, "print", 5)
+ )), true);
+ }
+
+ if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
+ if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
+ if (statements == NULL) {
+ statements = pm_statements_node_create(parser);
+ }
+
+ pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+ pm_arguments_node_arguments_append(
+ arguments,
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
+ );
+
+ pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
+ pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
+
+ pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
+ parser,
+ pm_parser_constant_id_constant(parser, "$F", 2),
+ UP(call)
+ );
+
+ pm_statements_node_body_prepend(statements, UP(write));
+ }
+
+ pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+ pm_arguments_node_arguments_append(
+ arguments,
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
+ );
+
+ if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
+ pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
+ pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
+ parser,
+ UP(pm_symbol_node_synthesized_create(parser, "chomp")),
+ &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
+ UP(pm_true_node_synthesized_create(parser))
+ )));
+
+ pm_arguments_node_arguments_append(arguments, UP(keywords));
+ pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
+ }
+
+ pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
+ pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
+ parser,
+ UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
+ statements
+ )), true);
+
+ statements = wrapped_statements;
+ }
+
+ return statements;
+}
+
+/**
+ * Parse the top-level program node.
+ */
static pm_node_t *
parse_program(pm_parser_t *parser) {
// If the current scope is NULL, then we want to push a new top level scope.
@@ -17398,22 +21854,43 @@ parse_program(pm_parser_t *parser) {
pm_parser_scope_push(parser, true);
}
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
parser_lex(parser);
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
- if (!statements) {
- statements = pm_statements_node_create(parser);
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
+
+ if (statements != NULL && !parser->parsing_eval) {
+ // If we have statements, then the top-level statement should be
+ // explicitly checked as well. We have to do this here because
+ // everywhere else we check all but the last statement.
+ assert(statements->body.size > 0);
+ pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
pm_parser_scope_pop(parser);
+ // At the top level, see if we need to wrap the statements in a program
+ // node with a while loop based on the options.
+ if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
+ statements = wrap_statements(parser, statements);
+ } else {
+ flush_block_exits(parser, previous_block_exits);
+ }
+
+ pm_node_list_free(&current_block_exits);
+
// If this is an empty file, then we're still going to parse all of the
// statements in order to gather up all of the comments and such. Here we'll
// correct the location information.
- if (pm_statements_node_body_length(statements) == 0) {
+ if (statements == NULL) {
+ statements = pm_statements_node_create(parser);
pm_statements_node_location_set(statements, parser->start, parser->start);
}
- return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
+ return UP(pm_program_node_create(parser, &locals, statements));
}
/******************************************************************************/
@@ -17421,6 +21898,69 @@ parse_program(pm_parser_t *parser) {
/******************************************************************************/
/**
+ * A vendored version of strnstr that is used to find a substring within a
+ * string with a given length. This function is used to search for the Ruby
+ * engine name within a shebang when the -x option is passed to Ruby.
+ *
+ * The only modification that we made here is that we don't do NULL byte checks
+ * because we know the little parameter will not have a NULL byte and we allow
+ * the big parameter to have them.
+ */
+static const char *
+pm_strnstr(const char *big, const char *little, size_t big_length) {
+ size_t little_length = strlen(little);
+
+ for (const char *max = big + big_length - little_length; big <= max; big++) {
+ if (*big == *little && memcmp(big, little, little_length) == 0) return big;
+ }
+
+ return NULL;
+}
+
+#ifdef _WIN32
+#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
+#else
+/**
+ * Potentially warn the user if the shebang that has been found to include
+ * "ruby" has a carriage return at the end, as that can cause problems on some
+ * platforms.
+ */
+static void
+pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
+ if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
+ pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+ }
+}
+#endif
+
+/**
+ * Process the shebang when initializing the parser. This function assumes that
+ * the shebang_callback option has already been checked for nullability.
+ */
+static void
+pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
+ const char *switches = pm_strnstr(engine, " -", length);
+ if (switches == NULL) return;
+
+ pm_options_t next_options = *options;
+ options->shebang_callback(
+ &next_options,
+ (const uint8_t *) (switches + 1),
+ length - ((size_t) (switches - engine)) - 1,
+ options->shebang_callback_data
+ );
+
+ size_t encoding_length;
+ if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
+ const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
+ parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
+ }
+
+ parser->command_line = next_options.command_line;
+ parser->frozen_string_literal = next_options.frozen_string_literal;
+}
+
+/**
* Initialize a parser with the given start and end pointers.
*/
PRISM_EXPORTED_FUNCTION void
@@ -17428,6 +21968,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
assert(source != NULL);
*parser = (pm_parser_t) {
+ .node_id = 0,
.lex_state = PM_LEX_STATE_BEG,
.enclosure_nesting = 0,
.lambda_enclosure_nesting = -1,
@@ -17445,6 +21986,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
.next_start = NULL,
.heredoc_end = NULL,
+ .data_loc = { .start = NULL, .end = NULL },
.comment_list = { 0 },
.magic_comment_list = { 0 },
.warning_list = { 0 },
@@ -17455,22 +21997,27 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.encoding_changed_callback = NULL,
.encoding_comment_start = source,
.lex_callback = NULL,
- .filepath_string = { 0 },
+ .filepath = { 0 },
.constant_pool = { 0 },
.newline_list = { 0 },
.integer_base = 0,
.current_string = PM_STRING_EMPTY,
.start_line = 1,
.explicit_encoding = NULL,
+ .command_line = 0,
+ .parsing_eval = false,
+ .partial_script = false,
.command_start = true,
.recovering = false,
+ .encoding_locked = false,
.encoding_changed = false,
.pattern_matching_newlines = false,
.in_keyword_arg = false,
- .current_param_name = 0,
+ .current_block_exits = NULL,
.semantic_token_seen = false,
- .frozen_string_literal = false,
- .suppress_warnings = false
+ .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
+ .current_regular_expression_ascii_only = false,
+ .warn_mismatched_indentation = true
};
// Initialize the constant pool. We're going to completely guess as to the
@@ -17499,7 +22046,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// If options were provided to this parse, establish them here.
if (options != NULL) {
// filepath option
- parser->filepath_string = options->filepath;
+ parser->filepath = options->filepath;
// line option
parser->start_line = options->line;
@@ -17511,55 +22058,166 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
}
+ // encoding_locked option
+ parser->encoding_locked = options->encoding_locked;
+
// frozen_string_literal option
- if (options->frozen_string_literal) {
- parser->frozen_string_literal = true;
- }
+ parser->frozen_string_literal = options->frozen_string_literal;
- // suppress_warnings option
- if (options->suppress_warnings) {
- parser->suppress_warnings = true;
- }
+ // command_line option
+ parser->command_line = options->command_line;
// version option
parser->version = options->version;
+ // partial_script
+ parser->partial_script = options->partial_script;
+
// scopes option
+ parser->parsing_eval = options->scopes_count > 0;
+ if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
+
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
pm_parser_scope_push(parser, scope_index == 0);
+ // Scopes given from the outside are not allowed to have numbered
+ // parameters.
+ parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
+
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
const uint8_t *source = pm_string_source(local);
size_t length = pm_string_length(local);
- uint8_t *allocated = malloc(length);
+ void *allocated = xmalloc(length);
if (allocated == NULL) continue;
- memcpy((void *) allocated, source, length);
- pm_parser_local_add_owned(parser, allocated, length);
+ memcpy(allocated, source, length);
+ pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
}
}
}
+ // Now that we have established the user-provided options, check if
+ // a version was given and parse as the latest version otherwise.
+ if (parser->version == PM_OPTIONS_VERSION_UNSET) {
+ parser->version = PM_OPTIONS_VERSION_LATEST;
+ }
+
pm_accepts_block_stack_push(parser, true);
// Skip past the UTF-8 BOM if it exists.
if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
parser->current.end += 3;
parser->encoding_comment_start += 3;
+
+ if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+ parser->encoding = PM_ENCODING_UTF_8_ENTRY;
+ if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
+ }
}
- // If the first two bytes of the source are a shebang, then we'll indicate
- // that the encoding comment is at the end of the shebang.
- if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
- const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
- if (encoding_comment_start) {
- parser->encoding_comment_start = encoding_comment_start + 1;
+ // If the -x command line flag is set, or the first shebang of the file does
+ // not include "ruby", then we'll search for a shebang that does include
+ // "ruby" and start parsing from there.
+ bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
+
+ // If the first two bytes of the source are a shebang, then we will do a bit
+ // of extra processing.
+ //
+ // First, we'll indicate that the encoding comment is at the end of the
+ // shebang. This means that when a shebang is present the encoding comment
+ // can begin on the second line.
+ //
+ // Second, we will check if the shebang includes "ruby". If it does, then we
+ // we will start parsing from there. We will also potentially warning the
+ // user if there is a carriage return at the end of the shebang. We will
+ // also potentially call the shebang callback if this is the main script to
+ // allow the caller to parse the shebang and find any command-line options.
+ // If the shebang does not include "ruby" and this is the main script being
+ // parsed, then we will start searching the file for a shebang that does
+ // contain "ruby" as if -x were passed on the command line.
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
+
+ if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
+ const char *engine;
+
+ if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
+ if (newline != NULL) {
+ parser->encoding_comment_start = newline + 1;
+
+ if (options == NULL || options->main_script) {
+ pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
+ }
+ }
+
+ if (options != NULL && options->main_script && options->shebang_callback != NULL) {
+ pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
+ }
+
+ search_shebang = false;
+ } else if (options != NULL && options->main_script && !parser->parsing_eval) {
+ search_shebang = true;
}
}
+
+ // Here we're going to find the first shebang that includes "ruby" and start
+ // parsing from there.
+ if (search_shebang) {
+ // If a shebang that includes "ruby" is not found, then we're going to a
+ // a load error to the list of errors on the parser.
+ bool found_shebang = false;
+
+ // This is going to point to the start of each line as we check it.
+ // We'll maintain a moving window looking at each line at they come.
+ const uint8_t *cursor = parser->start;
+
+ // The newline pointer points to the end of the current line that we're
+ // considering. If it is NULL, then we're at the end of the file.
+ const uint8_t *newline = next_newline(cursor, parser->end - cursor);
+
+ while (newline != NULL) {
+ pm_newline_list_append(&parser->newline_list, newline);
+
+ cursor = newline + 1;
+ newline = next_newline(cursor, parser->end - cursor);
+
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
+ if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
+ const char *engine;
+ if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
+ found_shebang = true;
+
+ if (newline != NULL) {
+ pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
+ parser->encoding_comment_start = newline + 1;
+ }
+
+ if (options != NULL && options->shebang_callback != NULL) {
+ pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (found_shebang) {
+ parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+ parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+ } else {
+ pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
+ pm_newline_list_clear(&parser->newline_list);
+ }
+ }
+
+ // The encoding comment can start after any amount of inline whitespace, so
+ // here we'll advance it to the first non-inline-whitespace character so
+ // that it is ready for future comparisons.
+ parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
}
/**
@@ -17582,7 +22240,7 @@ pm_comment_list_free(pm_list_t *list) {
next = node->next;
pm_comment_t *comment = (pm_comment_t *) node;
- free(comment);
+ xfree(comment);
}
}
@@ -17597,7 +22255,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
next = node->next;
pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
- free(magic_comment);
+ xfree(magic_comment);
}
}
@@ -17606,7 +22264,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
*/
PRISM_EXPORTED_FUNCTION void
pm_parser_free(pm_parser_t *parser) {
- pm_string_free(&parser->filepath_string);
+ pm_string_free(&parser->filepath);
pm_diagnostic_list_free(&parser->error_list);
pm_diagnostic_list_free(&parser->warning_list);
pm_comment_list_free(&parser->comment_list);
@@ -17619,7 +22277,6 @@ pm_parser_free(pm_parser_t *parser) {
// assumed that ownership has transferred to the AST. However if we have
// scopes while we're freeing the parser, it's likely they came from
// eval scopes and we need to free them explicitly here.
- pm_constant_id_list_free(&parser->current_scope->locals);
pm_parser_scope_pop(parser);
}
@@ -17636,6 +22293,140 @@ pm_parse(pm_parser_t *parser) {
return parse_program(parser);
}
+/**
+ * Read into the stream until the gets callback returns false. If the last read
+ * line from the stream matches an __END__ marker, then halt and return false,
+ * otherwise return true.
+ */
+static bool
+pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
+#define LINE_SIZE 4096
+ char line[LINE_SIZE];
+
+ while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
+ size_t length = LINE_SIZE;
+ while (length > 0 && line[length - 1] == '\n') length--;
+
+ if (length == LINE_SIZE) {
+ // If we read a line that is the maximum size and it doesn't end
+ // with a newline, then we'll just append it to the buffer and
+ // continue reading.
+ length--;
+ pm_buffer_append_string(buffer, line, length);
+ continue;
+ }
+
+ // Append the line to the buffer.
+ length--;
+ pm_buffer_append_string(buffer, line, length);
+
+ // Check if the line matches the __END__ marker. If it does, then stop
+ // reading and return false. In most circumstances, this means we should
+ // stop reading from the stream so that the DATA constant can pick it
+ // up.
+ switch (length) {
+ case 7:
+ if (strncmp(line, "__END__", 7) == 0) return false;
+ break;
+ case 8:
+ if (strncmp(line, "__END__\n", 8) == 0) return false;
+ break;
+ case 9:
+ if (strncmp(line, "__END__\r\n", 9) == 0) return false;
+ break;
+ }
+
+ // All data should be read via gets. If the string returned by gets
+ // _doesn't_ end with a newline, then we assume we hit EOF condition.
+ if (stream_feof(stream)) {
+ break;
+ }
+ }
+
+ return true;
+#undef LINE_SIZE
+}
+
+/**
+ * Determine if there was an unterminated heredoc at the end of the input, which
+ * would mean the stream isn't finished and we should keep reading.
+ *
+ * For the other lex modes we can check if the lex mode has been closed, but for
+ * heredocs when we hit EOF we close the lex mode and then go back to parse the
+ * rest of the line after the heredoc declaration so that we get more of the
+ * syntax tree.
+ */
+static bool
+pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
+
+ for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
+ if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * Prism is designed around having the entire source in memory at once, but you
+ * can stream stdin in to Ruby so we need to support a streaming API.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t *
+pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
+ pm_buffer_init(buffer);
+
+ bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
+
+ pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
+ pm_node_t *node = pm_parse(parser);
+
+ while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
+ pm_node_destroy(parser, node);
+ eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
+
+ pm_parser_free(parser);
+ pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
+ node = pm_parse(parser);
+ }
+
+ return node;
+}
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_parser_t parser;
+ pm_parser_init(&parser, source, size, &options);
+
+ pm_node_t *node = pm_parse(&parser);
+ pm_node_destroy(&parser, node);
+
+ bool result = parser.error_list.size == 0;
+ pm_parser_free(&parser);
+ pm_options_free(&options);
+
+ return result;
+}
+
+#undef PM_CASE_KEYWORD
+#undef PM_CASE_OPERATOR
+#undef PM_CASE_WRITABLE
+#undef PM_STRING_EMPTY
+
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
static inline void
pm_serialize_header(pm_buffer_t *buffer) {
pm_buffer_append_string(buffer, "PRISM", 5);
@@ -17679,6 +22470,28 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
}
/**
+ * Parse and serialize the AST represented by the source that is read out of the
+ * given stream into to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
+ pm_parser_t parser;
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_buffer_t parser_buffer;
+ pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
+ pm_serialize_header(buffer);
+ pm_serialize_content(&parser, node, buffer);
+ pm_buffer_append_byte(buffer, '\0');
+
+ pm_node_destroy(&parser, node);
+ pm_buffer_free(&parser_buffer);
+ pm_parser_free(&parser);
+ pm_options_free(&options);
+}
+
+/**
* Parse and serialize the comments in the given source to the given buffer.
*/
PRISM_EXPORTED_FUNCTION void
@@ -17700,301 +22513,167 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
pm_options_free(&options);
}
-#undef PM_CASE_KEYWORD
-#undef PM_CASE_OPERATOR
-#undef PM_CASE_WRITABLE
-#undef PM_STRING_EMPTY
-#undef PM_LOCATION_NODE_BASE_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
+#endif
-/** An error that is going to be formatted into the output. */
-typedef struct {
- /** A pointer to the diagnostic that was generated during parsing. */
- pm_diagnostic_t *error;
+/******************************************************************************/
+/* Slice queries for the Ruby API */
+/******************************************************************************/
- /** The start line of the diagnostic message. */
- size_t line;
+/** The category of slice returned from pm_slice_type. */
+typedef enum {
+ /** Returned when the given encoding name is invalid. */
+ PM_SLICE_TYPE_ERROR = -1,
- /** The column start of the diagnostic message. */
- size_t column_start;
+ /** Returned when no other types apply to the slice. */
+ PM_SLICE_TYPE_NONE,
- /** The column end of the diagnostic message. */
- size_t column_end;
-} pm_error_t;
+ /** Returned when the slice is a valid local variable name. */
+ PM_SLICE_TYPE_LOCAL,
-/** The format that will be used to format the errors into the output. */
-typedef struct {
- /** The prefix that will be used for line numbers. */
- const char *number_prefix;
-
- /** The prefix that will be used for blank lines. */
- const char *blank_prefix;
-
- /** The divider that will be used between sections of source code. */
- const char *divider;
-
- /** The length of the blank prefix. */
- size_t blank_prefix_length;
-
- /** The length of the divider. */
- size_t divider_length;
-} pm_error_format_t;
-
-#define PM_COLOR_GRAY "\033[38;5;102m"
-#define PM_COLOR_RED "\033[1;31m"
-#define PM_COLOR_RESET "\033[0m"
-
-static inline pm_error_t *
-pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
- pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
-
- for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
-
- // We're going to insert this error into the array in sorted order. We
- // do this by finding the first error that has a line number greater
- // than the current error and then inserting the current error before
- // that one.
- size_t index = 0;
- while (
- (index < error_list->size) &&
- (errors[index].error != NULL) &&
- (
- (errors[index].line < start.line) ||
- (errors[index].line == start.line && errors[index].column_start < start.column)
- )
- ) index++;
+ /** Returned when the slice is a valid constant name. */
+ PM_SLICE_TYPE_CONSTANT,
- // Now we're going to shift all of the errors after this one down one
- // index to make room for the new error.
- memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
+ /** Returned when the slice is a valid method name. */
+ PM_SLICE_TYPE_METHOD_NAME
+} pm_slice_type_t;
- // Finally, we'll insert the error into the array.
- size_t column_end;
- if (start.line == end.line) {
- column_end = end.column;
- } else {
- column_end = newline_list->offsets[start.line + 1] - newline_list->offsets[start.line] - 1;
- }
+/**
+ * Check that the slice is a valid local variable name or constant.
+ */
+pm_slice_type_t
+pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
+ // first, get the right encoding object
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
- // Ensure we have at least one column of error.
- if (start.column == column_end) column_end++;
+ // check that there is at least one character
+ if (length == 0) return PM_SLICE_TYPE_NONE;
- errors[index] = (pm_error_t) {
- .error = error,
- .line = start.line,
- .column_start = start.column,
- .column_end = column_end
- };
+ size_t width;
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
+ // valid because alphabetical
+ } else if (*source == '_') {
+ // valid because underscore
+ width = 1;
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
+ // valid because multibyte
+ } else {
+ // invalid because no match
+ return PM_SLICE_TYPE_NONE;
}
- return errors;
-}
-
-static inline void
-pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
- const uint8_t *start = &parser->start[newline_list->offsets[line]];
- const uint8_t *end;
+ // determine the type of the slice based on the first character
+ const uint8_t *end = source + length;
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
+
+ // next, iterate through all of the bytes of the string to ensure that they
+ // are all valid identifier characters
+ source += width;
+
+ while (source < end) {
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
+ // valid because alphanumeric
+ source += width;
+ } else if (*source == '_') {
+ // valid because underscore
+ source++;
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
+ // valid because multibyte
+ source += width;
+ } else {
+ // invalid because no match
+ break;
+ }
+ }
- if (line + 1 > newline_list->size) {
- end = parser->end;
- } else {
- end = &parser->start[newline_list->offsets[line + 1]];
+ // accept a ! or ? at the end of the slice as a method name
+ if (*source == '!' || *source == '?' || *source == '=') {
+ source++;
+ result = PM_SLICE_TYPE_METHOD_NAME;
}
- pm_buffer_append_format(buffer, number_prefix, line + 1);
- pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
+ // valid if we are at the end of the slice
+ return source == end ? result : PM_SLICE_TYPE_NONE;
}
/**
- * Format the errors on the parser into the given buffer.
+ * Check that the slice is a valid local variable name.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
- const pm_list_t *error_list = &parser->error_list;
- assert(error_list->size != 0);
-
- // First, we're going to sort all of the errors by line number using an
- // insertion sort into a newly allocated array.
- const pm_newline_list_t *newline_list = &parser->newline_list;
- pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
-
- // Now we're going to determine how we're going to format line numbers and
- // blank lines based on the maximum number of digits in the line numbers
- // that are going to be displayed.
- pm_error_format_t error_format;
- size_t max_line_number = errors[error_list->size - 1].line + 1;
-
- if (max_line_number < 10) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%1zu | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%1zu | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~\n"
- };
- }
- } else if (max_line_number < 100) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%2zu | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%2zu | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~\n"
- };
- }
- } else if (max_line_number < 1000) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%3zu | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%3zu | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~\n"
- };
- }
- } else if (max_line_number < 10000) {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%4zu | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%4zu | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~~\n"
- };
- }
- } else {
- if (colorize) {
- error_format = (pm_error_format_t) {
- .number_prefix = PM_COLOR_GRAY "%5zu | " PM_COLOR_RESET,
- .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
- .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
- };
- } else {
- error_format = (pm_error_format_t) {
- .number_prefix = "%5zu | ",
- .blank_prefix = " | ",
- .divider = " ~~~~~~~~\n"
- };
- }
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ case PM_SLICE_TYPE_CONSTANT:
+ case PM_SLICE_TYPE_METHOD_NAME:
+ return PM_STRING_QUERY_FALSE;
+ case PM_SLICE_TYPE_LOCAL:
+ return PM_STRING_QUERY_TRUE;
}
- error_format.blank_prefix_length = strlen(error_format.blank_prefix);
- error_format.divider_length = strlen(error_format.divider);
-
- // Now we're going to iterate through every error in our error list and
- // display it. While we're iterating, we will display some padding lines of
- // the source before the error to give some context. We'll be careful not to
- // display the same line twice in case the errors are close enough in the
- // source.
- size_t last_line = (size_t) -1;
- const pm_encoding_t *encoding = parser->encoding;
-
- for (size_t index = 0; index < error_list->size; index++) {
- pm_error_t *error = &errors[index];
-
- // Here we determine how many lines of padding of the source to display,
- // based on the difference from the last line that was displayed.
- if (error->line - last_line > 1) {
- if (error->line - last_line > 2) {
- if ((index != 0) && (error->line - last_line > 3)) {
- pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
- }
-
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
- }
-
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
- }
-
- // If this is the first error or we're on a new line, then we'll display
- // the line that has the error in it.
- if ((index == 0) || (error->line != last_line)) {
- if (colorize) {
- pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
- } else {
- pm_buffer_append_string(buffer, "> ", 2);
- }
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
- }
-
- // Now we'll display the actual error message. We'll do this by first
- // putting the prefix to the line, then a bunch of blank spaces
- // depending on the column, then as many carets as we need to display
- // the width of the error, then the error message itself.
- //
- // Note that this doesn't take into account the width of the actual
- // character when displayed in the terminal. For some east-asian
- // languages or emoji, this means it can be thrown off pretty badly. We
- // will need to solve this eventually.
- pm_buffer_append_string(buffer, " ", 2);
- pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
-
- size_t column = 0;
- const uint8_t *start = &parser->start[newline_list->offsets[error->line]];
-
- while (column < error->column_end) {
- if (column < error->column_start) {
- pm_buffer_append_byte(buffer, ' ');
- } else if (colorize) {
- pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
- } else {
- pm_buffer_append_byte(buffer, '^');
- }
-
- size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
- column += (char_width == 0 ? 1 : char_width);
- }
+ assert(false && "unreachable");
+ return PM_STRING_QUERY_FALSE;
+}
- pm_buffer_append_byte(buffer, ' ');
+/**
+ * Check that the slice is a valid constant name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ case PM_SLICE_TYPE_LOCAL:
+ case PM_SLICE_TYPE_METHOD_NAME:
+ return PM_STRING_QUERY_FALSE;
+ case PM_SLICE_TYPE_CONSTANT:
+ return PM_STRING_QUERY_TRUE;
+ }
- const char *message = error->error->message;
- pm_buffer_append_string(buffer, message, strlen(message));
- pm_buffer_append_byte(buffer, '\n');
+ assert(false && "unreachable");
+ return PM_STRING_QUERY_FALSE;
+}
- // Here we determine how many lines of padding to display after the
- // error, depending on where the next error is in source.
- last_line = error->line;
- size_t next_line = (index == error_list->size - 1) ? newline_list->size - 1 : errors[index + 1].line;
+/**
+ * Check that the slice is a valid method name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
+#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
+#define C1(c) (*source == c)
+#define C2(s) (memcmp(source, s, 2) == 0)
+#define C3(s) (memcmp(source, s, 3) == 0)
- if (next_line - last_line > 1) {
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
- }
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ break;
+ case PM_SLICE_TYPE_LOCAL:
+ // numbered parameters are not valid method names
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
+ case PM_SLICE_TYPE_CONSTANT:
+ // all constants are valid method names
+ case PM_SLICE_TYPE_METHOD_NAME:
+ // all method names are valid method names
+ return PM_STRING_QUERY_TRUE;
+ }
- if (next_line - last_line > 1) {
- pm_buffer_append_string(buffer, " ", 2);
- pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
- }
+ switch (length) {
+ case 1:
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
+ case 2:
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
+ case 3:
+ return B(C3("===") || C3("<=>") || C3("[]="));
+ default:
+ return PM_STRING_QUERY_FALSE;
}
- // Finally, we'll free the array of errors that we allocated.
- free(errors);
+#undef B
+#undef C1
+#undef C2
+#undef C3
}
-
-#undef PM_COLOR_GRAY
-#undef PM_COLOR_RED
-#undef PM_COLOR_RESET
diff --git a/prism/prism.h b/prism/prism.h
index 45bfff7a11..c468db18be 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -9,6 +9,7 @@
#include "prism/defines.h"
#include "prism/util/pm_buffer.h"
#include "prism/util/pm_char.h"
+#include "prism/util/pm_integer.h"
#include "prism/util/pm_memchr.h"
#include "prism/util/pm_strncasecmp.h"
#include "prism/util/pm_strpbrk.h"
@@ -20,10 +21,13 @@
#include "prism/parser.h"
#include "prism/prettyprint.h"
#include "prism/regexp.h"
+#include "prism/static_literals.h"
#include "prism/version.h"
#include <assert.h>
#include <errno.h>
+#include <locale.h>
+#include <math.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
@@ -45,10 +49,15 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void);
/**
* Initialize a parser with the given start and end pointers.
*
+ * The resulting parser must eventually be freed with `pm_parser_free()`.
+ *
* @param parser The parser to initialize.
* @param source The source to parse.
* @param size The size of the source.
- * @param options The optional options to use when parsing.
+ * @param options The optional options to use when parsing. These options must
+ * live for the whole lifetime of this parser.
+ *
+ * \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
@@ -58,13 +67,20 @@ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *
*
* @param parser The parser to register the callback with.
* @param callback The callback to register.
+ *
+ * \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
/**
* Free any memory associated with the given parser.
*
+ * This does not free the `pm_options_t` object that was used to initialize the
+ * parser.
+ *
* @param parser The parser to free.
+ *
+ * \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
@@ -73,10 +89,58 @@ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
*
* @param parser The parser to use.
* @return The AST representing the source.
+ *
+ * \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
/**
+ * This function is used in pm_parse_stream() to retrieve a line of input from a
+ * stream. It closely mirrors that of fgets so that fgets can be used as the
+ * default implementation.
+ */
+typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
+
+/**
+ * This function is used in pm_parse_stream to check whether a stream is EOF.
+ * It closely mirrors that of feof so that feof can be used as the
+ * default implementation.
+ */
+typedef int (pm_parse_stream_feof_t)(void *stream);
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * @param parser The parser to use.
+ * @param buffer The buffer to use.
+ * @param stream The stream to parse.
+ * @param stream_fgets The function to use to read from the stream.
+ * @param stream_feof The function to use to determine if the stream has hit eof.
+ * @param options The optional options to use when parsing.
+ * @return The AST representing the source.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
+
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+/**
+ * Parse and serialize the AST represented by the source that is read out of the
+ * given stream into to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param stream The stream to parse.
+ * @param stream_fgets The function to use to read from the stream.
+ * @param stream_feof The function to use to tell if the stream has hit eof.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data);
+
+/**
* Serialize the given list of comments to the given buffer.
*
* @param parser The parser to serialize.
@@ -152,6 +216,8 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t
*/
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
+#endif
+
/**
* Parse the source and return true if it parses without errors or warnings.
*
@@ -168,16 +234,77 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si
* @param token_type The token type to convert to a string.
* @return A string representation of the given token type.
*/
-PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
+PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
+
+/**
+ * Returns the human name of the given token type.
+ *
+ * @param token_type The token type to convert to a human name.
+ * @return The human name of the given token type.
+ */
+const char * pm_token_type_human(pm_token_type_t token_type);
+
+// We optionally support dumping to JSON. For systems that don't want or need
+// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+#ifndef PRISM_EXCLUDE_JSON
+
+/**
+ * Dump JSON to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param parser The parser that parsed the node.
+ * @param node The node to serialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
+
+#endif
+
+/**
+ * Represents the results of a slice query.
+ */
+typedef enum {
+ /** Returned if the encoding given to a slice query was invalid. */
+ PM_STRING_QUERY_ERROR = -1,
+
+ /** Returned if the result of the slice query is false. */
+ PM_STRING_QUERY_FALSE,
+
+ /** Returned if the result of the slice query is true. */
+ PM_STRING_QUERY_TRUE
+} pm_string_query_t;
+
+/**
+ * Check that the slice is a valid local variable name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name);
+
+/**
+ * Check that the slice is a valid constant name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name);
/**
- * Format the errors on the parser into the given buffer.
+ * Check that the slice is a valid method name.
*
- * @param parser The parser to format the errors for.
- * @param buffer The buffer to format the errors into.
- * @param colorize Whether or not to colorize the errors with ANSI escape sequences.
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
*/
-PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name);
/**
* @mainpage
@@ -187,7 +314,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
* dependencies. It is currently being integrated into
* [CRuby](https://github.com/ruby/ruby),
* [JRuby](https://github.com/jruby/jruby),
- * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [TruffleRuby](https://github.com/truffleruby/truffleruby),
* [Sorbet](https://github.com/sorbet/sorbet), and
* [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
*
@@ -205,10 +332,10 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
* to want to use and be aware of are:
*
* * `pm_parser_t` - the main parser structure
- * * `pm_parser_init` - initialize a parser
- * * `pm_parse` - parse and return the root node
- * * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
- * * `pm_parser_free` - free the internal memory of the parser
+ * * `pm_parser_init()` - initialize a parser
+ * * `pm_parse()` - parse and return the root node
+ * * `pm_node_destroy()` - deallocate the root node returned by `pm_parse()`
+ * * `pm_parser_free()` - free the internal memory of the parser
*
* Putting all of this together would look something like:
*
@@ -225,8 +352,8 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
* }
* ```
*
- * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
- * their first member. This means you can downcast and upcast any node in the
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures
+ * as their first member. This means you can downcast and upcast any node in the
* tree to a `pm_node_t`.
*
* @section serializing Serializing
@@ -238,9 +365,9 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
* use and be aware of are:
*
* * `pm_buffer_t` - a small buffer object that will hold the serialized AST
- * * `pm_buffer_free` - free the memory associated with the buffer
- * * `pm_serialize` - serialize the AST into a buffer
- * * `pm_serialize_parse` - parse and serialize the AST into a buffer
+ * * `pm_buffer_free()` - free the memory associated with the buffer
+ * * `pm_serialize()` - serialize the AST into a buffer
+ * * `pm_serialize_parse()` - parse and serialize the AST into a buffer
*
* Putting all of this together would look something like:
*
@@ -258,7 +385,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
* @section inspecting Inspecting
*
* Prism provides the ability to inspect the AST by pretty-printing nodes. You
- * can do this with the `pm_prettyprint` function, which you would use like:
+ * can do this with the `pm_prettyprint()` function, which you would use like:
*
* ```c
* void prettyprint(const uint8_t *source, size_t length) {
diff --git a/prism/regexp.c b/prism/regexp.c
index ba498ecc83..dcc7476244 100644
--- a/prism/regexp.c
+++ b/prism/regexp.c
@@ -1,9 +1,14 @@
#include "prism/regexp.h"
+#define PM_REGEXP_PARSE_DEPTH_MAX 4096
+
/**
* This is the parser that is going to handle parsing regular expressions.
*/
typedef struct {
+ /** The parser that is currently being used. */
+ pm_parser_t *parser;
+
/** A pointer to the start of the source that we are parsing. */
const uint8_t *start;
@@ -13,39 +18,48 @@ typedef struct {
/** A pointer to the end of the source that we are parsing. */
const uint8_t *end;
- /** A list of named captures that we've found. */
- pm_string_list_t *named_captures;
+ /**
+ * Whether or not the regular expression currently being parsed is in
+ * extended mode, wherein whitespace is ignored and comments are allowed.
+ */
+ bool extended_mode;
/** Whether the encoding has changed from the default. */
bool encoding_changed;
/** The encoding of the source. */
const pm_encoding_t *encoding;
+
+ /** The callback to call when a named capture group is found. */
+ pm_regexp_name_callback_t name_callback;
+
+ /** The data to pass to the name callback. */
+ void *name_data;
+
+ /** The callback to call when a parse error is found. */
+ pm_regexp_error_callback_t error_callback;
+
+ /** The data to pass to the error callback. */
+ void *error_data;
} pm_regexp_parser_t;
/**
- * This initializes a new parser with the given source.
+ * Append an error to the parser.
*/
-static void
-pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding) {
- *parser = (pm_regexp_parser_t) {
- .start = start,
- .cursor = start,
- .end = end,
- .named_captures = named_captures,
- .encoding_changed = encoding_changed,
- .encoding = encoding
- };
+static inline void
+pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
+ parser->error_callback(start, end, message, parser->error_data);
}
/**
- * This appends a new string to the list of named captures.
+ * This appends a new string to the list of named captures. This function
+ * assumes the caller has already checked the validity of the name callback.
*/
static void
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
pm_string_t string;
pm_string_shared_init(&string, start, end);
- pm_string_list_append(parser->named_captures, &string);
+ parser->name_callback(&string, parser->name_data);
pm_string_free(&string);
}
@@ -144,6 +158,11 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
} state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
while (1) {
+ if (parser->cursor >= parser->end) {
+ parser->cursor = savepoint;
+ return true;
+ }
+
switch (state) {
case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
switch (*parser->cursor) {
@@ -217,21 +236,24 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
*/
static bool
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
- if (pm_regexp_char_is_eof(parser)) return true;
-
- switch (*parser->cursor) {
- case '*':
- case '+':
- case '?':
- parser->cursor++;
- return true;
- case '{':
- parser->cursor++;
- return pm_regexp_parse_range_quantifier(parser);
- default:
- // In this case there is no quantifier.
- return true;
+ while (!pm_regexp_char_is_eof(parser)) {
+ switch (*parser->cursor) {
+ case '*':
+ case '+':
+ case '?':
+ parser->cursor++;
+ break;
+ case '{':
+ parser->cursor++;
+ if (!pm_regexp_parse_range_quantifier(parser)) return false;
+ break;
+ default:
+ // In this case there is no quantifier.
+ return true;
+ }
}
+
+ return true;
}
/**
@@ -255,20 +277,20 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
// Forward declaration because character sets can be nested.
static bool
-pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
+pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
/**
* match-char-set : '[' '^'? (match-range | match-char)* ']'
* ;
*/
static bool
-pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
+pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
pm_regexp_char_accept(parser, '^');
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
switch (*parser->cursor++) {
case '[':
- pm_regexp_parse_lbracket(parser);
+ pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
break;
case '\\':
if (!pm_regexp_char_is_eof(parser)) {
@@ -288,7 +310,18 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
* A left bracket can either mean a POSIX class or a character set.
*/
static bool
-pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
+pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth) {
+ if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
+ pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
+ return false;
+ }
+
+ if ((parser->cursor < parser->end) && parser->cursor[0] == ']') {
+ parser->cursor++;
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "empty char-class");
+ return true;
+ }
+
const uint8_t *reset = parser->cursor;
if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
@@ -298,13 +331,13 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
parser->cursor = reset;
}
- return pm_regexp_parse_character_set(parser);
+ return pm_regexp_parse_character_set(parser, depth);
}
// Forward declaration here since parsing groups needs to go back up the grammar
// to parse expressions within them.
static bool
-pm_regexp_parse_expression(pm_regexp_parser_t *parser);
+pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth);
/**
* These are the states of the options that are configurable on the regular
@@ -397,6 +430,19 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
}
/**
+ * True if the given key is set in the options.
+ */
+static uint8_t
+pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
+ if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
+ key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+ return options->values[key];
+ }
+
+ return false;
+}
+
+/**
* Groups can have quite a few different patterns for syntax. They basically
* just wrap a set of expressions, but they can potentially have options after a
* question mark. If there _isn't_ a question mark, then it's just a set of
@@ -418,17 +464,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
* * (?imxdau-imx:subexp) - turn on and off configuration for an expression
*/
static bool
-pm_regexp_parse_group(pm_regexp_parser_t *parser) {
+pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
+ const uint8_t *group_start = parser->cursor;
+
+ pm_regexp_options_t options;
+ pm_regexp_options_init(&options);
+
// First, parse any options for the group.
if (pm_regexp_char_accept(parser, '?')) {
if (pm_regexp_char_is_eof(parser)) {
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
return false;
}
- pm_regexp_options_t options;
- pm_regexp_options_init(&options);
switch (*parser->cursor) {
case '#': { // inline comments
+ parser->cursor++;
+ if (pm_regexp_char_is_eof(parser)) {
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
+ return false;
+ }
+
if (parser->encoding_changed && parser->encoding->multibyte) {
bool escaped = false;
@@ -472,6 +528,7 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
case '<':
parser->cursor++;
if (pm_regexp_char_is_eof(parser)) {
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
return false;
}
@@ -485,7 +542,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
if (!pm_regexp_char_find(parser, '>')) {
return false;
}
- pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+
+ if (parser->cursor - start == 1) {
+ pm_regexp_parse_error(parser, start, parser->cursor, "group name is empty");
+ }
+
+ if (parser->name_callback != NULL) {
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+ }
+
break;
}
}
@@ -496,7 +561,10 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
return false;
}
- pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+ if (parser->name_callback != NULL) {
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+ }
+
break;
}
case '(': // conditional expression
@@ -516,11 +584,22 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
return false;
}
+ // If we are at the end of the group of options and there is no
+ // subexpression, then we are going to be setting the options
+ // for the parent group. In this case we are safe to return now.
+ if (*parser->cursor == ')') {
+ if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
+ parser->extended_mode = true;
+ }
+
+ parser->cursor++;
+ return true;
+ }
+
// If we hit a -, then we're done parsing options.
if (*parser->cursor != '-') break;
- // Otherwise, fallthrough to the - case.
- /* fallthrough */
+ PRISM_FALLTHROUGH
case '-':
parser->cursor++;
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
@@ -533,22 +612,57 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
+
+ // If we are at the end of the group of options and there is no
+ // subexpression, then we are going to be setting the options
+ // for the parent group. In this case we are safe to return now.
+ if (*parser->cursor == ')') {
+ switch (pm_regexp_options_state(&options, 'x')) {
+ case PM_REGEXP_OPTION_STATE_ADDED:
+ parser->extended_mode = true;
+ break;
+ case PM_REGEXP_OPTION_STATE_REMOVED:
+ parser->extended_mode = false;
+ break;
+ }
+
+ parser->cursor++;
+ return true;
+ }
+
break;
default:
- return false;
+ parser->cursor++;
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "undefined group option");
+ break;
}
}
+ bool extended_mode = parser->extended_mode;
+ switch (pm_regexp_options_state(&options, 'x')) {
+ case PM_REGEXP_OPTION_STATE_ADDED:
+ parser->extended_mode = true;
+ break;
+ case PM_REGEXP_OPTION_STATE_REMOVED:
+ parser->extended_mode = false;
+ break;
+ }
+
// Now, parse the expressions within this group.
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
- if (!pm_regexp_parse_expression(parser)) {
+ if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
+ parser->extended_mode = extended_mode;
return false;
}
pm_regexp_char_accept(parser, '|');
}
// Finally, make sure we have a closing parenthesis.
- return pm_regexp_char_expect(parser, ')');
+ parser->extended_mode = extended_mode;
+ if (pm_regexp_char_expect(parser, ')')) return true;
+
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
+ return false;
}
/**
@@ -564,22 +678,53 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
* ;
*/
static bool
-pm_regexp_parse_item(pm_regexp_parser_t *parser) {
- switch (*parser->cursor++) {
+pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
+ switch (*parser->cursor) {
case '^':
case '$':
- return true;
+ parser->cursor++;
+ return pm_regexp_parse_quantifier(parser);
case '\\':
+ parser->cursor++;
if (!pm_regexp_char_is_eof(parser)) {
parser->cursor++;
}
return pm_regexp_parse_quantifier(parser);
case '(':
- return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
+ parser->cursor++;
+ return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
case '[':
- return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
- default:
+ parser->cursor++;
+ return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
+ case '*':
+ case '?':
+ case '+':
+ parser->cursor++;
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "target of repeat operator is not specified");
+ return true;
+ case ')':
+ parser->cursor++;
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
+ return true;
+ case '#':
+ if (parser->extended_mode) {
+ if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
+ return true;
+ }
+ PRISM_FALLTHROUGH
+ default: {
+ size_t width;
+ if (!parser->encoding_changed) {
+ width = pm_encoding_utf_8_char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+ } else {
+ width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+ }
+
+ if (width == 0) return false; // TODO: add appropriate error
+ parser->cursor += width;
+
return pm_regexp_parse_quantifier(parser);
+ }
}
}
@@ -588,13 +733,18 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
* ;
*/
static bool
-pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
- if (!pm_regexp_parse_item(parser)) {
+pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth) {
+ if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
+ pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
+ return false;
+ }
+
+ if (!pm_regexp_parse_item(parser, depth)) {
return false;
}
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
- if (!pm_regexp_parse_item(parser)) {
+ if (!pm_regexp_parse_item(parser, depth)) {
return false;
}
}
@@ -610,29 +760,31 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
*/
static bool
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
- return (
- (
- // Exit early if the pattern is empty.
- pm_regexp_char_is_eof(parser) ||
- // Parse the first expression in the pattern.
- pm_regexp_parse_expression(parser)
- ) &&
- (
- // Return now if we've parsed the entire pattern.
- pm_regexp_char_is_eof(parser) ||
- // Otherwise, we should have a pipe character.
- (pm_regexp_char_expect(parser, '|') && pm_regexp_parse_pattern(parser))
- )
- );
+ do {
+ if (pm_regexp_char_is_eof(parser)) return true;
+ if (!pm_regexp_parse_expression(parser, 0)) return false;
+ } while (pm_regexp_char_accept(parser, '|'));
+
+ return pm_regexp_char_is_eof(parser);
}
/**
* Parse a regular expression and extract the names of all of the named capture
* groups.
*/
-PRISM_EXPORTED_FUNCTION bool
-pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding) {
- pm_regexp_parser_t parser;
- pm_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
- return pm_regexp_parse_pattern(&parser);
+PRISM_EXPORTED_FUNCTION void
+pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
+ pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
+ .parser = parser,
+ .start = source,
+ .cursor = source,
+ .end = source + size,
+ .extended_mode = extended_mode,
+ .encoding_changed = parser->encoding_changed,
+ .encoding = parser->encoding,
+ .name_callback = name_callback,
+ .name_data = name_data,
+ .error_callback = error_callback,
+ .error_data = error_data
+ });
}
diff --git a/prism/regexp.h b/prism/regexp.h
index c5ceab11f9..5366b5a5a0 100644
--- a/prism/regexp.h
+++ b/prism/regexp.h
@@ -10,7 +10,6 @@
#include "prism/parser.h"
#include "prism/encoding.h"
#include "prism/util/pm_memchr.h"
-#include "prism/util/pm_string_list.h"
#include "prism/util/pm_string.h"
#include <stdbool.h>
@@ -18,16 +17,27 @@
#include <string.h>
/**
- * Parse a regular expression and extract the names of all of the named capture
- * groups.
+ * This callback is called by pm_regexp_parse() when a named capture group is found.
+ */
+typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
+
+/**
+ * This callback is called by pm_regexp_parse() when a parse error is found.
+ */
+typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
+
+/**
+ * Parse a regular expression.
*
+ * @param parser The parser that is currently being used.
* @param source The source code to parse.
* @param size The size of the source code.
- * @param named_captures The list to add the names of the named capture groups.
- * @param encoding_changed Whether or not the encoding changed from the default.
- * @param encoding The encoding of the source code.
- * @return Whether or not the parsing was successful.
+ * @param extended_mode Whether to parse the regular expression in extended mode.
+ * @param name_callback The optional callback to call when a named capture group is found.
+ * @param name_data The optional data to pass to the name callback.
+ * @param error_callback The callback to call when a parse error is found.
+ * @param error_data The data to pass to the error callback.
*/
-PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
+PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
#endif
diff --git a/prism/srcs.mk b/prism/srcs.mk
new file mode 100644
index 0000000000..022662a00b
--- /dev/null
+++ b/prism/srcs.mk
@@ -0,0 +1,150 @@
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs uncommon.mk: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+ touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+ $(PRISM_SRCDIR)/templates/template.rb \
+ $(PRISM_SRCDIR)/srcs.mk.in
+ $(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/srcs.mk $(PRISM_SRCDIR)/srcs.mk.in
+
+distclean-prism-srcs::
+ $(RM) prism/.srcs.mk.time
+ $(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+ $(RM) $(PRISM_SRCDIR)/srcs.mk
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: $(srcdir)/prism/api_node.c
+$(srcdir)/prism/api_node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/ext/prism/api_node.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) ext/prism/api_node.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/api_node.c
+
+main incs: $(srcdir)/prism/ast.h
+$(srcdir)/prism/ast.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/ast.h.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/ast.h $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/ast.h
+
+main incs: $(srcdir)/prism/diagnostic.h
+$(srcdir)/prism/diagnostic.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/diagnostic.h.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/diagnostic.h $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/diagnostic.h
+
+main srcs: $(srcdir)/lib/prism/compiler.rb
+$(srcdir)/lib/prism/compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/compiler.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/compiler.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/compiler.rb
+
+main srcs: $(srcdir)/lib/prism/dispatcher.rb
+$(srcdir)/lib/prism/dispatcher.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dispatcher.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dispatcher.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dispatcher.rb
+
+main srcs: $(srcdir)/lib/prism/dot_visitor.rb
+$(srcdir)/lib/prism/dot_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dot_visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dot_visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dot_visitor.rb
+
+main srcs: $(srcdir)/lib/prism/dsl.rb
+$(srcdir)/lib/prism/dsl.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dsl.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dsl.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dsl.rb
+
+main srcs: $(srcdir)/lib/prism/inspect_visitor.rb
+$(srcdir)/lib/prism/inspect_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/inspect_visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/inspect_visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/inspect_visitor.rb
+
+main srcs: $(srcdir)/lib/prism/mutation_compiler.rb
+$(srcdir)/lib/prism/mutation_compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/mutation_compiler.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/mutation_compiler.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/mutation_compiler.rb
+
+main srcs: $(srcdir)/lib/prism/node.rb
+$(srcdir)/lib/prism/node.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/node.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/node.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/node.rb
+
+main srcs: $(srcdir)/lib/prism/reflection.rb
+$(srcdir)/lib/prism/reflection.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/reflection.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/reflection.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/reflection.rb
+
+main srcs: $(srcdir)/lib/prism/serialize.rb
+$(srcdir)/lib/prism/serialize.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/serialize.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/serialize.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/serialize.rb
+
+main srcs: $(srcdir)/lib/prism/visitor.rb
+$(srcdir)/lib/prism/visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/visitor.rb
+
+main srcs: $(srcdir)/prism/diagnostic.c
+$(srcdir)/prism/diagnostic.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/diagnostic.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/diagnostic.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/diagnostic.c
+
+main srcs: $(srcdir)/prism/node.c
+$(srcdir)/prism/node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/node.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/node.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/node.c
+
+main srcs: $(srcdir)/prism/prettyprint.c
+$(srcdir)/prism/prettyprint.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/prettyprint.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/prettyprint.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/prettyprint.c
+
+main srcs: $(srcdir)/prism/serialize.c
+$(srcdir)/prism/serialize.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/serialize.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/serialize.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/serialize.c
+
+main srcs: $(srcdir)/prism/token_type.c
+$(srcdir)/prism/token_type.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/token_type.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/token_type.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/token_type.c
diff --git a/prism/srcs.mk.in b/prism/srcs.mk.in
new file mode 100644
index 0000000000..cc263fd1b4
--- /dev/null
+++ b/prism/srcs.mk.in
@@ -0,0 +1,48 @@
+<% # -*- ruby -*-
+require_relative 'templates/template'
+
+script = File.basename(__FILE__)
+srcs = output ? File.basename(output) : script.chomp('.in')
+mk = 'uncommon.mk'
+
+# %>
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs <%=%><%=mk%>: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+ touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+ $(PRISM_SRCDIR)/templates/template.rb \
+ $(PRISM_SRCDIR)/<%=%><%=script%>
+ $(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/<%=%><%=srcs%> $(PRISM_SRCDIR)/<%=%><%=script%>
+
+distclean-prism-srcs::
+ $(RM) prism/.srcs.mk.time
+ $(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+ $(RM) $(PRISM_SRCDIR)/<%=%><%=srcs%>
+
+realclean-srcs-local:: realclean-prism-srcs
+<% Prism::Template::TEMPLATES.map do |t|
+ /\.(?:[ch]|rb)\z/ =~ t or next
+ s = '$(srcdir)/' + t.sub(%r[\A(?:(src)|ext|include)/]) {$1 && 'prism/'}
+ s.sub!(%r[\A\$(srcdir)/prism/], '$(PRISM_SRCDIR)/')
+ target = s.end_with?('.h') ? 'incs' : 'srcs'
+# %>
+
+main <%=%><%=target%>: <%=%><%=s%>
+<%=%><%=s%>: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/<%=%><%=t%>.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) <%=%><%=t%> $@
+
+realclean-prism-srcs::
+ $(RM) <%=%><%=s%>
+<%
+end
+# %>
diff --git a/prism/static_literals.c b/prism/static_literals.c
new file mode 100644
index 0000000000..9fa37b999a
--- /dev/null
+++ b/prism/static_literals.c
@@ -0,0 +1,617 @@
+#include "prism/static_literals.h"
+
+/**
+ * A small struct used for passing around a subset of the information that is
+ * stored on the parser. We use this to avoid having static literals explicitly
+ * depend on the parser struct.
+ */
+typedef struct {
+ /** The list of newline offsets to use to calculate line numbers. */
+ const pm_newline_list_t *newline_list;
+
+ /** The line number that the parser starts on. */
+ int32_t start_line;
+
+ /** The name of the encoding that the parser is using. */
+ const char *encoding_name;
+} pm_static_literals_metadata_t;
+
+static inline uint32_t
+murmur_scramble(uint32_t value) {
+ value *= 0xcc9e2d51;
+ value = (value << 15) | (value >> 17);
+ value *= 0x1b873593;
+ return value;
+}
+
+/**
+ * Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
+ * general-purpose hash function. It is fast, which is what we care about in
+ * this case.
+ */
+static uint32_t
+murmur_hash(const uint8_t *key, size_t length) {
+ uint32_t hash = 0x9747b28c;
+ uint32_t segment;
+
+ for (size_t index = length >> 2; index; index--) {
+ memcpy(&segment, key, sizeof(uint32_t));
+ key += sizeof(uint32_t);
+ hash ^= murmur_scramble(segment);
+ hash = (hash << 13) | (hash >> 19);
+ hash = hash * 5 + 0xe6546b64;
+ }
+
+ segment = 0;
+ for (size_t index = length & 3; index; index--) {
+ segment <<= 8;
+ segment |= key[index - 1];
+ }
+
+ hash ^= murmur_scramble(segment);
+ hash ^= (uint32_t) length;
+ hash ^= hash >> 16;
+ hash *= 0x85ebca6b;
+ hash ^= hash >> 13;
+ hash *= 0xc2b2ae35;
+ hash ^= hash >> 16;
+ return hash;
+}
+
+/**
+ * Hash the value of an integer and return it.
+ */
+static uint32_t
+integer_hash(const pm_integer_t *integer) {
+ uint32_t hash;
+ if (integer->values) {
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
+ } else {
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
+ }
+
+ if (integer->negative) {
+ hash ^= murmur_scramble((uint32_t) 1);
+ }
+
+ return hash;
+}
+
+/**
+ * Return the hash of the given node. It is important that nodes that have
+ * equivalent static literal values have the same hash. This is because we use
+ * these hashes to look for duplicates.
+ */
+static uint32_t
+node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_INTEGER_NODE: {
+ // Integers hash their value.
+ const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
+ return integer_hash(&cast->value);
+ }
+ case PM_SOURCE_LINE_NODE: {
+ // Source lines hash their line number.
+ const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
+ const int32_t *value = &line_column.line;
+ return murmur_hash((const uint8_t *) value, sizeof(int32_t));
+ }
+ case PM_FLOAT_NODE: {
+ // Floats hash their value.
+ const double *value = &((const pm_float_node_t *) node)->value;
+ return murmur_hash((const uint8_t *) value, sizeof(double));
+ }
+ case PM_RATIONAL_NODE: {
+ // Rationals hash their numerator and denominator.
+ const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
+ return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
+ }
+ case PM_IMAGINARY_NODE: {
+ // Imaginaries hash their numeric value. Because their numeric value
+ // is stored as a subnode, we hash that node and then mix in the
+ // fact that this is an imaginary node.
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+ return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
+ }
+ case PM_STRING_NODE: {
+ // Strings hash their value and mix in their flags so that different
+ // encodings are not considered equal.
+ const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
+
+ pm_node_flags_t flags = node->flags;
+ flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
+
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
+ }
+ case PM_SOURCE_FILE_NODE: {
+ // Source files hash their value and mix in their flags so that
+ // different encodings are not considered equal.
+ const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
+ }
+ case PM_REGULAR_EXPRESSION_NODE: {
+ // Regular expressions hash their value and mix in their flags so
+ // that different encodings are not considered equal.
+ const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
+ }
+ case PM_SYMBOL_NODE: {
+ // Symbols hash their value and mix in their flags so that different
+ // encodings are not considered equal.
+ const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
+ }
+ default:
+ assert(false && "unreachable");
+ return 0;
+ }
+}
+
+/**
+ * Insert a node into the node hash. It accepts the hash that should hold the
+ * new node, the parser that generated the node, the node to insert, and a
+ * comparison function. The comparison function is used for collision detection,
+ * and must be able to compare all node types that will be stored in this hash.
+ */
+static pm_node_t *
+pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
+ // If we are out of space, we need to resize the hash. This will cause all
+ // of the nodes to be rehashed and reinserted into the new hash.
+ if (hash->size * 2 >= hash->capacity) {
+ // First, allocate space for the new node list.
+ uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
+ pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
+ if (new_nodes == NULL) return NULL;
+
+ // It turns out to be more efficient to mask the hash value than to use
+ // the modulo operator. Because our capacities are always powers of two,
+ // we can use a bitwise AND to get the same result as the modulo
+ // operator.
+ uint32_t mask = new_capacity - 1;
+
+ // Now, rehash all of the nodes into the new list.
+ for (uint32_t index = 0; index < hash->capacity; index++) {
+ pm_node_t *node = hash->nodes[index];
+
+ if (node != NULL) {
+ uint32_t index = node_hash(metadata, node) & mask;
+ new_nodes[index] = node;
+ }
+ }
+
+ // Finally, free the old node list and update the hash.
+ xfree(hash->nodes);
+ hash->nodes = new_nodes;
+ hash->capacity = new_capacity;
+ }
+
+ // Now, insert the node into the hash.
+ uint32_t mask = hash->capacity - 1;
+ uint32_t index = node_hash(metadata, node) & mask;
+
+ // We use linear probing to resolve collisions. This means that if the
+ // current index is occupied, we will move to the next index and try again.
+ // We are guaranteed that this will eventually find an empty slot because we
+ // resize the hash when it gets too full.
+ while (hash->nodes[index] != NULL) {
+ if (compare(metadata, hash->nodes[index], node) == 0) break;
+ index = (index + 1) & mask;
+ }
+
+ // If the current index is occupied, we need to return the node that was
+ // already in the hash. Otherwise, we can just increment the size and insert
+ // the new node.
+ pm_node_t *result = hash->nodes[index];
+
+ if (result == NULL) {
+ hash->size++;
+ hash->nodes[index] = node;
+ } else if (replace) {
+ hash->nodes[index] = node;
+ }
+
+ return result;
+}
+
+/**
+ * Free the internal memory associated with the given node hash.
+ */
+static void
+pm_node_hash_free(pm_node_hash_t *hash) {
+ if (hash->capacity > 0) xfree(hash->nodes);
+}
+
+/**
+ * Compare two values that can be compared with a simple numeric comparison.
+ */
+#define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
+
+/**
+ * Return the integer value of the given node as an int64_t.
+ */
+static int64_t
+pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_INTEGER_NODE: {
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
+ if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
+
+ int64_t value = (int64_t) integer->value;
+ return integer->negative ? -value : value;
+ }
+ case PM_SOURCE_LINE_NODE:
+ return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
+ default:
+ assert(false && "unreachable");
+ return 0;
+ }
+}
+
+/**
+ * A comparison function for comparing two IntegerNode or SourceLineNode
+ * instances.
+ */
+static int
+pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+ if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
+ int64_t left_value = pm_int64_value(metadata, left);
+ int64_t right_value = pm_int64_value(metadata, right);
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
+ }
+
+ const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
+ const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
+ return pm_integer_compare(left_integer, right_integer);
+}
+
+/**
+ * A comparison function for comparing two FloatNode instances.
+ */
+static int
+pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+ const double left_value = ((const pm_float_node_t *) left)->value;
+ const double right_value = ((const pm_float_node_t *) right)->value;
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached numbers.
+ */
+static int
+pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+ if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
+ return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
+ }
+
+ switch (PM_NODE_TYPE(left)) {
+ case PM_IMAGINARY_NODE:
+ return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
+ case PM_RATIONAL_NODE: {
+ const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
+ const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
+
+ int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
+ if (result != 0) return result;
+
+ return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
+ }
+ case PM_INTEGER_NODE:
+ return pm_compare_integer_nodes(metadata, left, right);
+ case PM_FLOAT_NODE:
+ return pm_compare_float_nodes(metadata, left, right);
+ default:
+ assert(false && "unreachable");
+ return 0;
+ }
+}
+
+/**
+ * Return a pointer to the string value of the given node.
+ */
+static const pm_string_t *
+pm_string_value(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_STRING_NODE:
+ return &((const pm_string_node_t *) node)->unescaped;
+ case PM_SOURCE_FILE_NODE:
+ return &((const pm_source_file_node_t *) node)->filepath;
+ case PM_SYMBOL_NODE:
+ return &((const pm_symbol_node_t *) node)->unescaped;
+ default:
+ assert(false && "unreachable");
+ return NULL;
+ }
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached strings.
+ */
+static int
+pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+ const pm_string_t *left_string = pm_string_value(left);
+ const pm_string_t *right_string = pm_string_value(right);
+ return pm_string_compare(left_string, right_string);
+}
+
+/**
+ * A comparison function for comparing two RegularExpressionNode instances.
+ */
+static int
+pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+ const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
+ const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
+
+ int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
+ if (result != 0) return result;
+
+ return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
+}
+
+#undef PM_NUMERIC_COMPARISON
+
+/**
+ * Add a node to the set of static literals.
+ */
+pm_node_t *
+pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_INTEGER_NODE:
+ case PM_SOURCE_LINE_NODE:
+ return pm_node_hash_insert(
+ &literals->integer_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_integer_nodes
+ );
+ case PM_FLOAT_NODE:
+ return pm_node_hash_insert(
+ &literals->float_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_float_nodes
+ );
+ case PM_RATIONAL_NODE:
+ case PM_IMAGINARY_NODE:
+ return pm_node_hash_insert(
+ &literals->number_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_number_nodes
+ );
+ case PM_STRING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ return pm_node_hash_insert(
+ &literals->string_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_string_nodes
+ );
+ case PM_REGULAR_EXPRESSION_NODE:
+ return pm_node_hash_insert(
+ &literals->regexp_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_regular_expression_nodes
+ );
+ case PM_SYMBOL_NODE:
+ return pm_node_hash_insert(
+ &literals->symbol_nodes,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = NULL
+ },
+ node,
+ replace,
+ pm_compare_string_nodes
+ );
+ case PM_TRUE_NODE: {
+ pm_node_t *duplicated = literals->true_node;
+ if ((duplicated == NULL) || replace) literals->true_node = node;
+ return duplicated;
+ }
+ case PM_FALSE_NODE: {
+ pm_node_t *duplicated = literals->false_node;
+ if ((duplicated == NULL) || replace) literals->false_node = node;
+ return duplicated;
+ }
+ case PM_NIL_NODE: {
+ pm_node_t *duplicated = literals->nil_node;
+ if ((duplicated == NULL) || replace) literals->nil_node = node;
+ return duplicated;
+ }
+ case PM_SOURCE_ENCODING_NODE: {
+ pm_node_t *duplicated = literals->source_encoding_node;
+ if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
+ return duplicated;
+ }
+ default:
+ return NULL;
+ }
+}
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ */
+void
+pm_static_literals_free(pm_static_literals_t *literals) {
+ pm_node_hash_free(&literals->integer_nodes);
+ pm_node_hash_free(&literals->float_nodes);
+ pm_node_hash_free(&literals->number_nodes);
+ pm_node_hash_free(&literals->string_nodes);
+ pm_node_hash_free(&literals->regexp_nodes);
+ pm_node_hash_free(&literals->symbol_nodes);
+}
+
+/**
+ * A helper to determine if the given node is a static literal that is positive.
+ * This is used for formatting imaginary nodes.
+ */
+static bool
+pm_static_literal_positive_p(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_FLOAT_NODE:
+ return ((const pm_float_node_t *) node)->value > 0;
+ case PM_INTEGER_NODE:
+ return !((const pm_integer_node_t *) node)->value.negative;
+ case PM_RATIONAL_NODE:
+ return !((const pm_rational_node_t *) node)->numerator.negative;
+ case PM_IMAGINARY_NODE:
+ return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
+ default:
+ assert(false && "unreachable");
+ return false;
+ }
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+static inline void
+pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_FALSE_NODE:
+ pm_buffer_append_string(buffer, "false", 5);
+ break;
+ case PM_FLOAT_NODE: {
+ const double value = ((const pm_float_node_t *) node)->value;
+
+ if (PRISM_ISINF(value)) {
+ if (*node->location.start == '-') {
+ pm_buffer_append_byte(buffer, '-');
+ }
+ pm_buffer_append_string(buffer, "Infinity", 8);
+ } else if (value == 0.0) {
+ if (*node->location.start == '-') {
+ pm_buffer_append_byte(buffer, '-');
+ }
+ pm_buffer_append_string(buffer, "0.0", 3);
+ } else {
+ pm_buffer_append_format(buffer, "%g", value);
+
+ // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
+ // we check for the decimal point and add it in here if it's not
+ // present.
+ if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
+ size_t exponent_index = pm_buffer_index(buffer, 'e');
+ size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
+ pm_buffer_insert(buffer, index, ".0", 2);
+ }
+ }
+
+ break;
+ }
+ case PM_IMAGINARY_NODE: {
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+ pm_buffer_append_string(buffer, "(0", 2);
+ if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
+ pm_static_literal_inspect_node(buffer, metadata, numeric);
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
+ pm_buffer_append_byte(buffer, '*');
+ }
+ pm_buffer_append_string(buffer, "i)", 2);
+ break;
+ }
+ case PM_INTEGER_NODE:
+ pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
+ break;
+ case PM_NIL_NODE:
+ pm_buffer_append_string(buffer, "nil", 3);
+ break;
+ case PM_RATIONAL_NODE: {
+ const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
+ pm_buffer_append_byte(buffer, '(');
+ pm_integer_string(buffer, &rational->numerator);
+ pm_buffer_append_byte(buffer, '/');
+ pm_integer_string(buffer, &rational->denominator);
+ pm_buffer_append_byte(buffer, ')');
+ break;
+ }
+ case PM_REGULAR_EXPRESSION_NODE: {
+ const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, '/');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '/');
+
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
+
+ break;
+ }
+ case PM_SOURCE_ENCODING_NODE:
+ pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
+ break;
+ case PM_SOURCE_FILE_NODE: {
+ const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '"');
+ break;
+ }
+ case PM_SOURCE_LINE_NODE:
+ pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
+ break;
+ case PM_STRING_NODE: {
+ const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_byte(buffer, '"');
+ break;
+ }
+ case PM_SYMBOL_NODE: {
+ const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
+ pm_buffer_append_byte(buffer, ':');
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+ break;
+ }
+ case PM_TRUE_NODE:
+ pm_buffer_append_string(buffer, "true", 4);
+ break;
+ default:
+ assert(false && "unreachable");
+ break;
+ }
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+void
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
+ pm_static_literal_inspect_node(
+ buffer,
+ &(pm_static_literals_metadata_t) {
+ .newline_list = newline_list,
+ .start_line = start_line,
+ .encoding_name = encoding_name
+ },
+ node
+ );
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
new file mode 100644
index 0000000000..bd29761899
--- /dev/null
+++ b/prism/static_literals.h
@@ -0,0 +1,121 @@
+/**
+ * @file static_literals.h
+ *
+ * A set of static literal nodes that can be checked for duplicates.
+ */
+#ifndef PRISM_STATIC_LITERALS_H
+#define PRISM_STATIC_LITERALS_H
+
+#include "prism/defines.h"
+#include "prism/ast.h"
+#include "prism/util/pm_newline_list.h"
+
+#include <assert.h>
+#include <stdbool.h>
+
+/**
+ * An internal hash table for a set of nodes.
+ */
+typedef struct {
+ /** The array of nodes in the hash table. */
+ pm_node_t **nodes;
+
+ /** The size of the hash table. */
+ uint32_t size;
+
+ /** The space that has been allocated in the hash table. */
+ uint32_t capacity;
+} pm_node_hash_t;
+
+/**
+ * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
+ * to alert the user of potential issues. To do this, we keep a set of the nodes
+ * that have been seen so far, and compare whenever we find a new node.
+ *
+ * We bucket the nodes based on their type to minimize the number of comparisons
+ * that need to be performed.
+ */
+typedef struct {
+ /**
+ * This is the set of IntegerNode and SourceLineNode instances.
+ */
+ pm_node_hash_t integer_nodes;
+
+ /**
+ * This is the set of FloatNode instances.
+ */
+ pm_node_hash_t float_nodes;
+
+ /**
+ * This is the set of RationalNode and ImaginaryNode instances.
+ */
+ pm_node_hash_t number_nodes;
+
+ /**
+ * This is the set of StringNode and SourceFileNode instances.
+ */
+ pm_node_hash_t string_nodes;
+
+ /**
+ * This is the set of RegularExpressionNode instances.
+ */
+ pm_node_hash_t regexp_nodes;
+
+ /**
+ * This is the set of SymbolNode instances.
+ */
+ pm_node_hash_t symbol_nodes;
+
+ /**
+ * A pointer to the last TrueNode instance that was inserted, or NULL.
+ */
+ pm_node_t *true_node;
+
+ /**
+ * A pointer to the last FalseNode instance that was inserted, or NULL.
+ */
+ pm_node_t *false_node;
+
+ /**
+ * A pointer to the last NilNode instance that was inserted, or NULL.
+ */
+ pm_node_t *nil_node;
+
+ /**
+ * A pointer to the last SourceEncodingNode instance that was inserted, or
+ * NULL.
+ */
+ pm_node_t *source_encoding_node;
+} pm_static_literals_t;
+
+/**
+ * Add a node to the set of static literals.
+ *
+ * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start_line The line number that the parser starts on.
+ * @param literals The set of static literals to add the node to.
+ * @param node The node to add to the set.
+ * @param replace Whether to replace the previous node if one already exists.
+ * @return A pointer to the node that is being overwritten, if there is one.
+ */
+pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ *
+ * @param literals The set of static literals to free.
+ */
+void pm_static_literals_free(pm_static_literals_t *literals);
+
+/**
+ * Create a string-based representation of the given static literal.
+ *
+ * @param buffer The buffer to write the string to.
+ * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start_line The line number that the parser starts on.
+ * @param encoding_name The name of the encoding of the source being parsed.
+ * @param node The node to create a string representation of.
+ */
+void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
+
+#endif
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 93f67f6551..23af8886a7 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -1,4 +1,4 @@
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
#include "prism/extension.h"
extern VALUE rb_cPrism;
@@ -12,88 +12,134 @@ static VALUE rb_cPrism<%= node.name %>;
<%- end -%>
static VALUE
-pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source) {
- VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(end - start) };
- return rb_class_new_instance(3, argv, rb_cPrismLocation);
+pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) {
+ if (freeze) {
+ VALUE location_argv[] = {
+ source,
+ LONG2FIX(start - parser->start),
+ LONG2FIX(end - start)
+ };
+
+ return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
+ } else {
+ uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
+ return ULL2NUM(value);
+ }
}
VALUE
-pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) {
- ID type = rb_intern(pm_token_type_to_str(token->type));
- VALUE location = pm_location_new(parser, token->start, token->end, source);
+pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
+ ID type = rb_intern(pm_token_type_name(token->type));
+ VALUE location = pm_location_new(parser, token->start, token->end, source, freeze);
- VALUE argv[] = {
- ID2SYM(type),
- rb_enc_str_new((const char *) token->start, token->end - token->start, encoding),
- location
- };
+ VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
+ if (freeze) rb_obj_freeze(slice);
- return rb_class_new_instance(3, argv, rb_cPrismToken);
+ VALUE argv[] = { source, ID2SYM(type), slice, location };
+ VALUE value = rb_class_new_instance(4, argv, rb_cPrismToken);
+ if (freeze) rb_obj_freeze(value);
+
+ return value;
}
static VALUE
-pm_string_new(pm_string_t *string, rb_encoding *encoding) {
- return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding);
+pm_string_new(const pm_string_t *string, rb_encoding *encoding) {
+ return rb_obj_freeze(rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding));
}
-// Create a Prism::Source object from the given parser.
VALUE
-pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
- VALUE source = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
- VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
+pm_integer_new(const pm_integer_t *integer) {
+ VALUE result;
+ if (integer->values == NULL) {
+ result = UINT2NUM(integer->value);
+ } else {
+ VALUE string = rb_str_new(NULL, integer->length * 8);
+ unsigned char *bytes = (unsigned char *) RSTRING_PTR(string);
+
+ size_t offset = integer->length * 8;
+ for (size_t value_index = 0; value_index < integer->length; value_index++) {
+ uint32_t value = integer->values[value_index];
+
+ for (int index = 0; index < 8; index++) {
+ int byte = (value >> (4 * index)) & 0xf;
+ bytes[--offset] = byte < 10 ? byte + '0' : byte - 10 + 'a';
+ }
+ }
+
+ result = rb_funcall(string, rb_intern("to_i"), 1, UINT2NUM(16));
+ }
+
+ if (integer->negative) {
+ result = rb_funcall(result, rb_intern("-@"), 0);
+ }
+
+ return result;
+}
+
+// Create a Prism::Source object from the given parser, after pm_parse() was called.
+VALUE
+pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
+ VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
+ VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
for (size_t index = 0; index < parser->newline_list.size; index++) {
- rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index]));
+ rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
}
- VALUE source_argv[] = { source, LONG2NUM(parser->start_line), offsets };
- return rb_class_new_instance(3, source_argv, rb_cPrismSource);
+ if (freeze) {
+ rb_obj_freeze(source_string);
+ rb_obj_freeze(offsets);
+ }
+
+ VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
+ if (freeze) rb_obj_freeze(source);
+
+ return source;
}
typedef struct pm_node_stack_node {
struct pm_node_stack_node *prev;
- pm_node_t *visit;
+ const pm_node_t *visit;
bool visited;
} pm_node_stack_node_t;
static void
-pm_node_stack_push(pm_node_stack_node_t **stack, pm_node_t *visit) {
- pm_node_stack_node_t *node = malloc(sizeof(pm_node_stack_node_t));
+pm_node_stack_push(pm_node_stack_node_t **stack, const pm_node_t *visit) {
+ pm_node_stack_node_t *node = xmalloc(sizeof(pm_node_stack_node_t));
node->prev = *stack;
node->visit = visit;
node->visited = false;
*stack = node;
}
-static pm_node_t *
+static const pm_node_t *
pm_node_stack_pop(pm_node_stack_node_t **stack) {
pm_node_stack_node_t *current = *stack;
- pm_node_t *visit = current->visit;
+ const pm_node_t *visit = current->visit;
*stack = current->prev;
- free(current);
+ xfree(current);
return visit;
}
VALUE
-pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
- VALUE source = pm_source_new(parser, encoding);
- ID *constants = calloc(parser->constant_pool.size, sizeof(ID));
+pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE constants = rb_ary_new_capa(parser->constant_pool.size);
for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
pm_constant_t *constant = &parser->constant_pool.constants[index];
int state = 0;
VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding);
- ID value = rb_protect(rb_intern_str, string, &state);
+ VALUE value = rb_protect(rb_str_intern, string, &state);
if (state != 0) {
- value = rb_intern_const("?");
+ value = ID2SYM(rb_intern_const("?"));
rb_set_errinfo(Qnil);
}
- constants[index] = value;
+ rb_ary_push(constants, value);
}
pm_node_stack_node_t *node_stack = NULL;
@@ -108,20 +154,20 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
continue;
}
- pm_node_t *node = node_stack->visit;
+ const pm_node_t *node = node_stack->visit;
node_stack->visited = true;
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
- <%- if node.fields.any? { |field| [Prism::NodeField, Prism::OptionalNodeField, Prism::NodeListField].include?(field.class) } -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- if node.fields.any? { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- node.fields.each do |field| -%>
<%- case field -%>
- <%- when Prism::NodeField, Prism::OptionalNodeField -%>
+ <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
- <%- when Prism::NodeListField -%>
+ <%- when Prism::Template::NodeListField -%>
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
}
@@ -134,71 +180,88 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
default:
break;
}
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
} else {
- pm_node_t *node = pm_node_stack_pop(&node_stack);
+ const pm_node_t *node = pm_node_stack_pop(&node_stack);
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
- <%- if node.fields.any? { |field| ![Prism::NodeField, Prism::OptionalNodeField, Prism::FlagsField].include?(field.class) } -%>
+ <%- if node.fields.any? { |field| ![Prism::Template::NodeField, Prism::Template::OptionalNodeField].include?(field.class) } -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
- VALUE argv[<%= node.fields.length + 1 %>];
- <%- node.fields.each_with_index do |field, index| -%>
+ VALUE argv[<%= node.fields.length + 4 %>];
+
+ // source
+ argv[0] = source;
+
+ // node_id
+ argv[1] = ULONG2NUM(node->node_id);
+
+ // location
+ argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze);
+
+ // flags
+ argv[3] = ULONG2NUM(node->flags);
+ <%- node.fields.each.with_index(4) do |field, index| -%>
// <%= field.name %>
<%- case field -%>
- <%- when Prism::NodeField, Prism::OptionalNodeField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_pop(value_stack);
- <%- when Prism::NodeListField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- when Prism::Template::NodeListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
}
- <%- when Prism::StringField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ if (freeze) rb_obj_freeze(argv[<%= index %>]);
+ <%- when Prism::Template::StringField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding);
- <%- when Prism::ConstantField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- when Prism::Template::ConstantField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
assert(cast-><%= field.name %> != 0);
- argv[<%= index %>] = rb_id2sym(constants[cast-><%= field.name %> - 1]);
- <%- when Prism::OptionalConstantField -%>
- argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : rb_id2sym(constants[cast-><%= field.name %> - 1]);
- <%- when Prism::ConstantListField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+ <%- when Prism::Template::OptionalConstantField -%>
+ argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+ <%- when Prism::Template::ConstantListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
assert(cast-><%= field.name %>.ids[index] != 0);
- rb_ary_push(argv[<%= index %>], rb_id2sym(constants[cast-><%= field.name %>.ids[index] - 1]));
+ rb_ary_push(argv[<%= index %>], RARRAY_AREF(constants, cast-><%= field.name %>.ids[index] - 1));
}
- <%- when Prism::LocationField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
- argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
- <%- when Prism::OptionalLocationField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
- argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
- <%- when Prism::UInt8Field -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ if (freeze) rb_obj_freeze(argv[<%= index %>]);
+ <%- when Prism::Template::LocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+ <%- when Prism::Template::OptionalLocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+ <%- when Prism::Template::UInt8Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
- <%- when Prism::UInt32Field -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- when Prism::Template::UInt32Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
- <%- when Prism::FlagsField -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
- argv[<%= index %>] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
+ <%- when Prism::Template::IntegerField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = pm_integer_new(&cast-><%= field.name %>);
+ <%- when Prism::Template::DoubleField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = DBL2NUM(cast-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
<%- end -%>
- // location
- argv[<%= node.fields.length %>] = pm_location_new(parser, node->location.start, node->location.end, source);
+ VALUE value = rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>);
+ if (freeze) rb_obj_freeze(value);
- rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 1 %>, argv, rb_cPrism<%= node.name %>));
+ rb_ary_push(value_stack, value);
break;
}
<%- end -%>
@@ -208,9 +271,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
}
}
- VALUE result = rb_ary_pop(value_stack);
- free(constants);
- return result;
+ return rb_ary_pop(value_stack);
}
void
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index d0d935c9dc..790cf9ebb8 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -2,12 +2,15 @@
* @file ast.h
*
* The abstract syntax tree.
+ *
+ * --
*/
#ifndef PRISM_AST_H
#define PRISM_AST_H
#include "prism/defines.h"
#include "prism/util/pm_constant_pool.h"
+#include "prism/util/pm_integer.h"
#include "prism/util/pm_string.h"
#include <assert.h>
@@ -99,27 +102,8 @@ typedef uint16_t pm_node_flags_t;
* We store the flags enum in every node in the tree. Some flags are common to
* all nodes (the ones listed below). Others are specific to certain node types.
*/
-#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
-
-static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
-static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
-static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
-
-/**
- * Cast the type to an enum to allow the compiler to provide exhaustiveness
- * checking.
- */
-#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
-
-/**
- * Return true if the type of the given node matches the given type.
- */
-#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
-
-/**
- * Return true if the given flag is set on the given node.
- */
-#define PM_NODE_FLAG_P(node, flag) ((((pm_node_t *)(node))->flags & (flag)) != 0)
+static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
+static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
/**
* This is the base structure that represents a node in the syntax tree. It is
@@ -139,22 +123,57 @@ typedef struct pm_node {
pm_node_flags_t flags;
/**
+ * The unique identifier for this node, which is deterministic based on the
+ * source. It is used to identify unique nodes across parses.
+ */
+ uint32_t node_id;
+
+ /**
* This is the location of the node in the source. It's a range of bytes
* containing a start and an end.
*/
pm_location_t location;
} pm_node_t;
+
+/**
+ * Cast the given node to the base pm_node_t type.
+ */
+#define PM_NODE_UPCAST(node_) ((pm_node_t *) (node_))
+
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node_) ((enum pm_node_type) (node_)->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
+#define PM_NODE_TYPE_P(node_, type_) (PM_NODE_TYPE(node_) == (type_))
+
+/**
+ * Return the flags associated with the given node.
+ */
+#define PM_NODE_FLAGS(node_) (PM_NODE_UPCAST(node_)->flags)
+
+/**
+ * Return true if the given flag is set on the given node.
+ */
+#define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0)
<%- nodes.each do |node| -%>
/**
* <%= node.name %>
*
- * Type: <%= node.type %>
-<%- if (node_flags = node.fields.find { |field| field.is_a? Prism::FlagsField }) -%>
- * Flags:
-<%- found = flags.find { |flag| flag.name == node_flags.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
-<%- found.values.each do |value| -%>
- * PM_<%= found.human.upcase %>_<%= value.name %>
+<%- node.each_comment_line do |line| -%>
+ *<%= line %>
+<%- end -%>
+ *
+ * Type: ::<%= node.type %>
+<% if (node_flags = node.flags) %>
+ * Flags (#pm_<%= node_flags.human %>):
+<%- node_flags.values.each do |value| -%>
+ * * ::PM_<%= node_flags.human.upcase %>_<%= value.name %>
<%- end -%>
<%- end -%>
*
@@ -163,7 +182,8 @@ typedef struct pm_node {
typedef struct pm_<%= node.human %> {
/** The embedded base node. */
pm_node_t base;
-<%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
+
+<%- node.fields.each do |field| -%>
/**
* <%= node.name %>#<%= field.name %>
@@ -175,14 +195,16 @@ typedef struct pm_<%= node.human %> {
<%- end -%>
*/
<%= case field
- when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
- when Prism::NodeListField then "struct pm_node_list #{field.name}"
- when Prism::ConstantField, Prism::OptionalConstantField then "pm_constant_id_t #{field.name}"
- when Prism::ConstantListField then "pm_constant_id_list_t #{field.name}"
- when Prism::StringField then "pm_string_t #{field.name}"
- when Prism::LocationField, Prism::OptionalLocationField then "pm_location_t #{field.name}"
- when Prism::UInt8Field then "uint8_t #{field.name}"
- when Prism::UInt32Field then "uint32_t #{field.name}"
+ when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
+ when Prism::Template::NodeListField then "struct pm_node_list #{field.name}"
+ when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}"
+ when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
+ when Prism::Template::StringField then "pm_string_t #{field.name}"
+ when Prism::Template::LocationField, Prism::Template::OptionalLocationField then "pm_location_t #{field.name}"
+ when Prism::Template::UInt8Field then "uint8_t #{field.name}"
+ when Prism::Template::UInt32Field then "uint32_t #{field.name}"
+ when Prism::Template::IntegerField then "pm_integer_t #{field.name}"
+ when Prism::Template::DoubleField then "double #{field.name}"
else raise field.class.name
end
%>;
@@ -198,8 +220,10 @@ typedef enum pm_<%= flag.human %> {
<%- flag.values.each_with_index do |value, index| -%>
<%= "\n" if index > 0 -%>
/** <%= value.comment %> */
- PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << index %>,
+ PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << (index + Prism::Template::COMMON_FLAGS_COUNT) %>,
<%- end -%>
+
+ PM_<%= flag.human.upcase %>_LAST,
} pm_<%= flag.human %>_t;
<%- end -%>
@@ -209,6 +233,6 @@ typedef enum pm_<%= flag.human %> {
* to specify that through the environment. It will never be true except for in
* those build systems.
*/
-#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
+#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %>
#endif
diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb
new file mode 100644
index 0000000000..07bbc8fae7
--- /dev/null
+++ b/prism/templates/include/prism/diagnostic.h.erb
@@ -0,0 +1,130 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
+#ifndef PRISM_DIAGNOSTIC_H
+#define PRISM_DIAGNOSTIC_H
+
+#include "prism/ast.h"
+#include "prism/defines.h"
+#include "prism/util/pm_list.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/**
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+ // These are the error diagnostics.
+ <%- errors.each do |error| -%>
+ PM_ERR_<%= error.name %>,
+ <%- end -%>
+
+ // These are the warning diagnostics.
+ <%- warnings.each do |warning| -%>
+ PM_WARN_<%= warning.name %>,
+ <%- end -%>
+} pm_diagnostic_id_t;
+
+/**
+ * This struct represents a diagnostic generated during parsing.
+ *
+ * @extends pm_list_node_t
+ */
+typedef struct {
+ /** The embedded base node. */
+ pm_list_node_t node;
+
+ /** The location of the diagnostic in the source. */
+ pm_location_t location;
+
+ /** The ID of the diagnostic. */
+ pm_diagnostic_id_t diag_id;
+
+ /** The message associated with the diagnostic. */
+ const char *message;
+
+ /**
+ * Whether or not the memory related to the message of this diagnostic is
+ * owned by this diagnostic. If it is, it needs to be freed when the
+ * diagnostic is freed.
+ */
+ bool owned;
+
+ /**
+ * The level of the diagnostic, see `pm_error_level_t` and
+ * `pm_warning_level_t` for possible values.
+ */
+ uint8_t level;
+} pm_diagnostic_t;
+
+/**
+ * The levels of errors generated during parsing.
+ */
+typedef enum {
+ /** For errors that should raise a syntax error. */
+ PM_ERROR_LEVEL_SYNTAX = 0,
+
+ /** For errors that should raise an argument error. */
+ PM_ERROR_LEVEL_ARGUMENT = 1,
+
+ /** For errors that should raise a load error. */
+ PM_ERROR_LEVEL_LOAD = 2
+} pm_error_level_t;
+
+/**
+ * The levels of warnings generated during parsing.
+ */
+typedef enum {
+ /** For warnings which should be emitted if $VERBOSE != nil. */
+ PM_WARNING_LEVEL_DEFAULT = 0,
+
+ /** For warnings which should be emitted if $VERBOSE == true. */
+ PM_WARNING_LEVEL_VERBOSE = 1
+} pm_warning_level_t;
+
+/**
+ * Get the human-readable name of the given diagnostic ID.
+ *
+ * @param diag_id The diagnostic ID.
+ * @return The human-readable name of the diagnostic ID.
+ */
+const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @param ... The arguments to the format string for the message.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
+
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ *
+ * @param list The list to deallocate.
+ */
+void pm_diagnostic_list_free(pm_list_t *list);
+
+#endif
diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb
index 2c947f6ed2..66dbe666b9 100644
--- a/prism/templates/lib/prism/compiler.rb.erb
+++ b/prism/templates/lib/prism/compiler.rb.erb
@@ -16,7 +16,7 @@ module Prism
# Prism.parse("1 + 2").value.accept(SExpressions.new)
# # => [:program, [[[:call, [[:integer], [:arguments, [[:integer]]]]]]]]
#
- class Compiler
+ class Compiler < Visitor
# Visit an individual node.
def visit(node)
node&.accept(self)
@@ -29,13 +29,15 @@ module Prism
# Visit the child nodes of the given node.
def visit_child_nodes(node)
- node.compact_child_nodes.map { |node| node.accept(self) }
+ node.each_child_node.map { |node| node.accept(self) }
end
<%- nodes.each_with_index do |node, index| -%>
<%= "\n" if index != 0 -%>
# Compile a <%= node.name %> node
- alias visit_<%= node.human %> visit_child_nodes
+ def visit_<%= node.human %>(node)
+ node.each_child_node.map { |node| node.accept(self) }
+ end
<%- end -%>
end
end
diff --git a/prism/templates/lib/prism/dispatcher.rb.erb b/prism/templates/lib/prism/dispatcher.rb.erb
index 1370ca7636..52478451c9 100644
--- a/prism/templates/lib/prism/dispatcher.rb.erb
+++ b/prism/templates/lib/prism/dispatcher.rb.erb
@@ -14,7 +14,8 @@ module Prism
# end
# end
#
- # dispatcher = Dispatcher.new
+ # listener = OctalListener.new
+ # dispatcher = Prism::Dispatcher.new
# dispatcher.register(listener, :on_integer_node_enter)
#
# Then, you can walk any number of trees and dispatch events to the listeners:
@@ -43,6 +44,19 @@ module Prism
#
# def register: (Listener, *Symbol) -> void
def register(listener, *events)
+ register_events(listener, events)
+ end
+
+ # Register all public methods of a listener that match the pattern
+ # `on_<node_name>_(enter|leave)`.
+ #
+ # def register_public_methods: (Listener) -> void
+ def register_public_methods(listener)
+ register_events(listener, listener.public_methods(false).grep(/\Aon_.+_(?:enter|leave)\z/))
+ end
+
+ # Register a listener for the given events.
+ private def register_events(listener, events)
events.each { |event| (listeners[event] ||= []) << listener }
end
diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb
index fc3dd4b223..cd2998fe61 100644
--- a/prism/templates/lib/prism/dot_visitor.rb.erb
+++ b/prism/templates/lib/prism/dot_visitor.rb.erb
@@ -1,4 +1,5 @@
-require "cgi"
+require "cgi/escape"
+require "cgi/util" unless defined?(CGI::EscapeExt)
module Prism
# This visitor provides the ability to call Node#to_dot, which converts a
@@ -17,7 +18,7 @@ module Prism
if port
"<tr><td align=\"left\" colspan=\"2\" port=\"#{name}\">#{name}</td></tr>"
else
- "<tr><td align=\"left\">#{name}</td><td>#{CGI.escapeHTML(value)}</td></tr>"
+ "<tr><td align=\"left\">#{name}</td><td>#{CGI.escapeHTML(value || raise)}</td></tr>"
end
end
end
@@ -109,19 +110,24 @@ module Prism
def visit_<%= node.human %>(node)
table = Table.new("<%= node.name %>")
id = node_id(node)
+ <%- if (node_flags = node.flags) -%>
+
+ # flags
+ table.field("flags", <%= node_flags.human %>_inspect(node))
+ <%- end -%>
<%- node.fields.each do |field| -%>
# <%= field.name %>
<%- case field -%>
- <%- when Prism::NodeField -%>
+ <%- when Prism::Template::NodeField -%>
table.field("<%= field.name %>", port: true)
digraph.edge("#{id}:<%= field.name %> -> #{node_id(node.<%= field.name %>)};")
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::OptionalNodeField -%>
unless (<%= field.name %> = node.<%= field.name %>).nil?
table.field("<%= field.name %>", port: true)
digraph.edge("#{id}:<%= field.name %> -> #{node_id(<%= field.name %>)};")
end
- <%- when Prism::NodeListField -%>
+ <%- when Prism::Template::NodeListField -%>
if node.<%= field.name %>.any?
table.field("<%= field.name %>", port: true)
@@ -133,17 +139,14 @@ module Prism
else
table.field("<%= field.name %>", "[]")
end
- <%- when Prism::StringField, Prism::ConstantField, Prism::OptionalConstantField, Prism::UInt8Field, Prism::UInt32Field, Prism::ConstantListField -%>
+ <%- when Prism::Template::StringField, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantListField, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
table.field("<%= field.name %>", node.<%= field.name %>.inspect)
- <%- when Prism::LocationField -%>
+ <%- when Prism::Template::LocationField -%>
table.field("<%= field.name %>", location_inspect(node.<%= field.name %>))
- <%- when Prism::OptionalLocationField -%>
+ <%- when Prism::Template::OptionalLocationField -%>
unless (<%= field.name %> = node.<%= field.name %>).nil?
table.field("<%= field.name %>", location_inspect(<%= field.name %>))
end
- <%- when Prism::FlagsField -%>
- <%- flag = flags.find { |flag| flag.name == field.kind }.tap { |flag| raise "Expected to find #{field.kind}" unless flag } -%>
- table.field("<%= field.name %>", <%= flag.human %>_inspect(node))
<%- else -%>
<%- raise -%>
<%- end -%>
@@ -175,7 +178,7 @@ module Prism
# Inspect a node that has <%= flag.human %> flags to display the flags as a
# comma-separated list.
def <%= flag.human %>_inspect(node)
- flags = []
+ flags = [] #: Array[String]
<%- flag.values.each do |value| -%>
flags << "<%= value.name.downcase %>" if node.<%= value.name.downcase %>?
<%- end -%>
diff --git a/prism/templates/lib/prism/dsl.rb.erb b/prism/templates/lib/prism/dsl.rb.erb
index be18ad45ba..e16ebb7110 100644
--- a/prism/templates/lib/prism/dsl.rb.erb
+++ b/prism/templates/lib/prism/dsl.rb.erb
@@ -2,13 +2,20 @@ module Prism
# The DSL module provides a set of methods that can be used to create prism
# nodes in a more concise manner. For example, instead of writing:
#
- # source = Prism::Source.new("[1]")
+ # source = Prism::Source.for("[1]")
#
# Prism::ArrayNode.new(
+ # source,
+ # 0,
+ # Prism::Location.new(source, 0, 3),
+ # 0,
# [
# Prism::IntegerNode.new(
- # Prism::IntegerBaseFlags::DECIMAL,
+ # source,
+ # 0,
# Prism::Location.new(source, 1, 1),
+ # Prism::IntegerBaseFlags::DECIMAL,
+ # 1
# )
# ],
# Prism::Location.new(source, 0, 1),
@@ -17,29 +24,110 @@ module Prism
#
# you could instead write:
#
- # source = Prism::Source.new("[1]")
+ # class Builder
+ # include Prism::DSL
#
- # ArrayNode(
- # IntegerNode(Prism::IntegerBaseFlags::DECIMAL, Location(source, 1, 1))),
- # Location(source, 0, 1),
- # Location(source, 2, 1)
- # )
+ # attr_reader :default_source
+ #
+ # def initialize
+ # @default_source = source("[1]")
+ # end
+ #
+ # def build
+ # array_node(
+ # location: location(start_offset: 0, length: 3),
+ # elements: [
+ # integer_node(
+ # location: location(start_offset: 1, length: 1),
+ # flags: integer_base_flag(:decimal),
+ # value: 1
+ # )
+ # ],
+ # opening_loc: location(start_offset: 0, length: 1),
+ # closing_loc: location(start_offset: 2, length: 1)
+ # )
+ # end
+ # end
#
- # This is mostly helpful in the context of writing tests, but can also be used
- # to generate trees programmatically.
+ # This is mostly helpful in the context of generating trees programmatically.
module DSL
- private
+ # Provide all of these methods as module methods as well, to allow for
+ # building nodes like Prism::DSL.nil_node.
+ extend self
- # Create a new Location object
- def Location(source = nil, start_offset = 0, length = 0)
+ # Create a new Source object.
+ def source(string)
+ Source.for(string)
+ end
+
+ # Create a new Location object.
+ def location(source: default_source, start_offset: 0, length: 0)
Location.new(source, start_offset, length)
end
<%- nodes.each do |node| -%>
- # Create a new <%= node.name %> node
- def <%= node.name %>(<%= (node.fields.map(&:name) + ["location = Location()"]).join(", ") %>)
- <%= node.name %>.new(<%= (node.fields.map(&:name) + ["location"]).join(", ") %>)
+ # Create a new <%= node.name %> node.
+ def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
+ case field
+ when Prism::Template::NodeField
+ kind = field.specific_kind || field.union_kind&.first
+ if kind.nil?
+ "#{field.name}: default_node(source, location)"
+ else
+ "#{field.name}: #{kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}(source: source)"
+ end
+ when Prism::Template::ConstantField
+ "#{field.name}: :\"\""
+ when Prism::Template::OptionalNodeField, Prism::Template::OptionalConstantField, Prism::Template::OptionalLocationField
+ "#{field.name}: nil"
+ when Prism::Template::NodeListField, Prism::Template::ConstantListField
+ "#{field.name}: []"
+ when Prism::Template::StringField
+ "#{field.name}: \"\""
+ when Prism::Template::LocationField
+ "#{field.name}: location"
+ when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+ "#{field.name}: 0"
+ when Prism::Template::DoubleField
+ "#{field.name}: 0.0"
+ else
+ raise
+ end
+ }].join(", ") %>)
+ <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+ end
+ <%- end -%>
+ <%- flags.each do |flag| -%>
+
+ # Retrieve the value of one of the <%= flag.name %> flags.
+ def <%= flag.human.chomp("s") %>(name)
+ case name
+ <%- flag.values.each do |value| -%>
+ when :<%= value.name.downcase %> then <%= flag.name %>::<%= value.name %>
+ <%- end -%>
+ else Kernel.raise ArgumentError, "invalid <%= flag.name %> flag: #{name.inspect}"
+ end
end
<%- end -%>
+
+ private
+
+ # The default source object that gets attached to nodes and locations if no
+ # source is specified.
+ def default_source
+ Source.for("")
+ end
+
+ # The default location object that gets attached to nodes if no location is
+ # specified, which uses the given source.
+ def default_location
+ Location.new(default_source, 0, 0)
+ end
+
+ # The default node that gets attached to nodes if no node is specified for a
+ # required node field.
+ def default_node(source, location)
+ MissingNode.new(source, -1, location, 0)
+ end
end
end
diff --git a/prism/templates/lib/prism/inspect_visitor.rb.erb b/prism/templates/lib/prism/inspect_visitor.rb.erb
new file mode 100644
index 0000000000..3cfe615d85
--- /dev/null
+++ b/prism/templates/lib/prism/inspect_visitor.rb.erb
@@ -0,0 +1,131 @@
+module Prism
+ # This visitor is responsible for composing the strings that get returned by
+ # the various #inspect methods defined on each of the nodes.
+ class InspectVisitor < Visitor
+ # Most of the time, we can simply pass down the indent to the next node.
+ # However, when we are inside a list we want some extra special formatting
+ # when we hit an element in that list. In this case, we have a special
+ # command that replaces the subsequent indent with the given value.
+ class Replace # :nodoc:
+ attr_reader :value
+
+ def initialize(value)
+ @value = value
+ end
+ end
+
+ private_constant :Replace
+
+ # The current prefix string.
+ attr_reader :indent
+
+ # The list of commands that we need to execute in order to compose the
+ # final string.
+ attr_reader :commands
+
+ # Initializes a new instance of the InspectVisitor.
+ def initialize(indent = +"")
+ @indent = indent
+ @commands = []
+ end
+
+ # Compose an inspect string for the given node.
+ def self.compose(node)
+ visitor = new
+ node.accept(visitor)
+ visitor.compose
+ end
+
+ # Compose the final string.
+ def compose
+ buffer = +""
+ replace = nil
+
+ until commands.empty?
+ # @type var command: String | node | Replace
+ # @type var indent: String
+ command, indent = *commands.shift
+
+ case command
+ when String
+ buffer << (replace || indent)
+ buffer << command
+ replace = nil
+ when Node
+ visitor = InspectVisitor.new(indent)
+ command.accept(visitor)
+ @commands = [*visitor.commands, *@commands]
+ when Replace
+ replace = command.value
+ else
+ raise "Unknown command: #{command.inspect}"
+ end
+ end
+
+ buffer
+ end
+ <%- nodes.each do |node| -%>
+
+ # Inspect a <%= node.name %> node.
+ def visit_<%= node.human %>(node)
+ commands << [inspect_node(<%= node.name.inspect %>, node), indent]
+ <%- (fields = [node.flags || Prism::Template::Flags.empty, *node.fields]).each_with_index do |field, index| -%>
+ <%- pointer = index == fields.length - 1 ? "└── " : "├── " -%>
+ <%- preadd = index == fields.length - 1 ? " " : "│ " -%>
+ <%- case field -%>
+ <%- when Prism::Template::Flags -%>
+ flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), <%= field.values.map { |value| "(\"#{value.name.downcase}\" if node.#{value.name.downcase}?)" }.join(", ") %>].compact
+ commands << ["<%= pointer %>flags: #{flags.empty? ? "∅" : flags.join(", ")}\n", indent]
+ <%- when Prism::Template::NodeListField -%>
+ commands << ["<%= pointer %><%= field.name %>: (length: #{(<%= field.name %> = node.<%= field.name %>).length})\n", indent]
+ if <%= field.name %>.any?
+ <%= field.name %>[0...-1].each do |child|
+ commands << [Replace.new("#{indent}<%= preadd %>├── "), indent]
+ commands << [child, "#{indent}<%= preadd %>│ "]
+ end
+ commands << [Replace.new("#{indent}<%= preadd %>└── "), indent]
+ commands << [<%= field.name %>[-1], "#{indent}<%= preadd %> "]
+ end
+ <%- when Prism::Template::NodeField -%>
+ commands << ["<%= pointer %><%= field.name %>:\n", indent]
+ commands << [node.<%= field.name %>, "#{indent}<%= preadd %>"]
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (<%= field.name %> = node.<%= field.name %>).nil?
+ commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+ else
+ commands << ["<%= pointer %><%= field.name %>:\n", indent]
+ commands << [<%= field.name %>, "#{indent}<%= preadd %>"]
+ end
+ <%- when Prism::Template::ConstantField, Prism::Template::ConstantListField, Prism::Template::StringField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
+ commands << ["<%= pointer %><%= field.name %>: #{node.<%= field.name %>.inspect}\n", indent]
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (<%= field.name %> = node.<%= field.name %>).nil?
+ commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+ else
+ commands << ["<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n", indent]
+ end
+ <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField -%>
+ commands << ["<%= pointer %><%= field.name %>: #{inspect_location(node.<%= field.name %>)}\n", indent]
+ <%- end -%>
+ <%- end -%>
+ end
+ <%- end -%>
+
+ private
+
+ # Compose a header for the given node.
+ def inspect_node(name, node)
+ location = node.location
+ "@ #{name} (location: (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}))\n"
+ end
+
+ # Compose a string representing the given inner location field.
+ def inspect_location(location)
+ if location
+ "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}) = #{location.slice.inspect}"
+ else
+ "∅"
+ end
+ end
+ end
+end
diff --git a/prism/templates/lib/prism/mutation_compiler.rb.erb b/prism/templates/lib/prism/mutation_compiler.rb.erb
index 9a81b704eb..565ee4e315 100644
--- a/prism/templates/lib/prism/mutation_compiler.rb.erb
+++ b/prism/templates/lib/prism/mutation_compiler.rb.erb
@@ -7,9 +7,9 @@ module Prism
<%= "\n" if index != 0 -%>
# Copy a <%= node.name %> node
def visit_<%= node.human %>(node)
- <%- fields = node.fields.select { |field| [Prism::NodeField, Prism::OptionalNodeField, Prism::NodeListField].include?(field.class) } -%>
+ <%- fields = node.fields.select { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
<%- if fields.any? -%>
- node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
+ node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::Template::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
<%- else -%>
node.copy
<%- end -%>
diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb
index d7259c1269..8225bfb328 100644
--- a/prism/templates/lib/prism/node.rb.erb
+++ b/prism/templates/lib/prism/node.rb.erb
@@ -2,27 +2,163 @@ module Prism
# This represents a node in the tree. It is the parent class of all of the
# various node types.
class Node
+ # A pointer to the source that this node was created from.
+ attr_reader :source
+ private :source
+
+ # A unique identifier for this node. This is used in a very specific
+ # use case where you want to keep around a reference to a node without
+ # having to keep around the syntax tree in memory. This unique identifier
+ # will be consistent across multiple parses of the same source code.
+ attr_reader :node_id
+
+ # Save this node using a saved source so that it can be retrieved later.
+ def save(repository)
+ repository.enter(node_id, :itself)
+ end
+
# A Location instance that represents the location of this node in the
# source.
- attr_reader :location
+ def location
+ location = @location
+ return location if location.is_a?(Location)
+ @location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
- def newline? # :nodoc:
- @newline ? true : false
+ # Save the location using a saved source so that it can be retrieved later.
+ def save_location(repository)
+ repository.enter(node_id, :location)
end
- def set_newline_flag(newline_marked) # :nodoc:
- line = location.start_line
- unless newline_marked[line]
- newline_marked[line] = true
- @newline = true
- end
+ # Delegates to the start_line of the associated location object.
+ def start_line
+ location.start_line
+ end
+
+ # Delegates to the end_line of the associated location object.
+ def end_line
+ location.end_line
+ end
+
+ # The start offset of the node in the source. This method is effectively a
+ # delegate method to the location object.
+ def start_offset
+ location = @location
+ location.is_a?(Location) ? location.start_offset : location >> 32
+ end
+
+ # The end offset of the node in the source. This method is effectively a
+ # delegate method to the location object.
+ def end_offset
+ location = @location
+ location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
+ end
+
+ # Delegates to the start_character_offset of the associated location object.
+ def start_character_offset
+ location.start_character_offset
+ end
+
+ # Delegates to the end_character_offset of the associated location object.
+ def end_character_offset
+ location.end_character_offset
+ end
+
+ # Delegates to the cached_start_code_units_offset of the associated location
+ # object.
+ def cached_start_code_units_offset(cache)
+ location.cached_start_code_units_offset(cache)
+ end
+
+ # Delegates to the cached_end_code_units_offset of the associated location
+ # object.
+ def cached_end_code_units_offset(cache)
+ location.cached_end_code_units_offset(cache)
+ end
+
+ # Delegates to the start_column of the associated location object.
+ def start_column
+ location.start_column
+ end
+
+ # Delegates to the end_column of the associated location object.
+ def end_column
+ location.end_column
+ end
+
+ # Delegates to the start_character_column of the associated location object.
+ def start_character_column
+ location.start_character_column
+ end
+
+ # Delegates to the end_character_column of the associated location object.
+ def end_character_column
+ location.end_character_column
+ end
+
+ # Delegates to the cached_start_code_units_column of the associated location
+ # object.
+ def cached_start_code_units_column(cache)
+ location.cached_start_code_units_column(cache)
+ end
+
+ # Delegates to the cached_end_code_units_column of the associated location
+ # object.
+ def cached_end_code_units_column(cache)
+ location.cached_end_code_units_column(cache)
end
+ # Delegates to the leading_comments of the associated location object.
+ def leading_comments
+ location.leading_comments
+ end
+
+ # Delegates to the trailing_comments of the associated location object.
+ def trailing_comments
+ location.trailing_comments
+ end
+
+ # Delegates to the comments of the associated location object.
+ def comments
+ location.comments
+ end
+
+ # Returns all of the lines of the source code associated with this node.
+ def source_lines
+ location.source_lines
+ end
+
+ # An alias for source_lines, used to mimic the API from
+ # RubyVM::AbstractSyntaxTree to make it easier to migrate.
+ alias script_lines source_lines
+
# Slice the location of the node from the source.
def slice
location.slice
end
+ # Slice the location of the node from the source, starting at the beginning
+ # of the line that the location starts on, ending at the end of the line
+ # that the location ends on.
+ def slice_lines
+ location.slice_lines
+ end
+
+ # An bitset of flags for this node. There are certain flags that are common
+ # for all nodes, and then some nodes have specific flags.
+ attr_reader :flags
+ protected :flags
+
+ # Returns true if the node has the newline flag set.
+ def newline?
+ flags.anybits?(NodeFlags::NEWLINE)
+ end
+
+ # Returns true if the node has the static literal flag set.
+ def static_literal?
+ flags.anybits?(NodeFlags::STATIC_LITERAL)
+ end
+
# Similar to inspect, but respects the current level of indentation given by
# the pretty print object.
def pretty_print(q)
@@ -34,9 +170,64 @@ module Prism
# Convert this node into a graphviz dot graph string.
def to_dot
+ # @type self: node
DotVisitor.new.tap { |visitor| accept(visitor) }.to_dot
end
+ # Returns a list of nodes that are descendants of this node that contain the
+ # given line and column. This is useful for locating a node that is selected
+ # based on the line and column of the source code.
+ #
+ # Important to note is that the column given to this method should be in
+ # bytes, as opposed to characters or code units.
+ def tunnel(line, column)
+ queue = [self] #: Array[Prism::node]
+ result = [] #: Array[Prism::node]
+
+ search_offset = source.line_to_byte_offset(line) + column
+
+ while (node = queue.shift)
+ result << node
+
+ node.each_child_node do |child_node|
+ if child_node.start_offset <= search_offset && search_offset < child_node.end_offset
+ queue << child_node
+ break
+ end
+ end
+ end
+
+ result
+ end
+
+ # Returns the first node that matches the given block when visited in a
+ # depth-first search. This is useful for finding a node that matches a
+ # particular condition.
+ #
+ # node.breadth_first_search { |node| node.node_id == node_id }
+ #
+ def breadth_first_search(&block)
+ queue = [self] #: Array[Prism::node]
+
+ while (node = queue.shift)
+ return node if yield node
+ queue.concat(node.compact_child_nodes)
+ end
+
+ nil
+ end
+
+ # Returns a list of the fields that exist for this node class. Fields
+ # describe the structure of the node. This kind of reflection is useful for
+ # things like recursively visiting each node _and_ field in the tree.
+ def self.fields
+ # This method should only be called on subclasses of Node, not Node
+ # itself.
+ raise NoMethodError, "undefined method `fields' for #{inspect}" if self == Node
+
+ Reflection.fields_for(self)
+ end
+
# --------------------------------------------------------------------------
# :section: Node interface
# These methods are effectively abstract methods that must be implemented by
@@ -52,11 +243,18 @@ module Prism
# Returns an array of child nodes, including `nil`s in the place of optional
# nodes that were not present.
def child_nodes
- raise NoMethodError, "undefined method `#{__method__}' for #{inspect}"
+ raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
end
alias deconstruct child_nodes
+ # With a block given, yields each child node. Without a block, returns
+ # an enumerator that contains each child node. Excludes any `nil`s in
+ # the place of optional nodes that were not present.
+ def each_child_node
+ raise NoMethodError, "undefined method `each_child_node' for #{inspect}"
+ end
+
# Returns an array of child nodes, excluding any `nil`s in the place of
# optional nodes that were not present.
def compact_child_nodes
@@ -69,11 +267,34 @@ module Prism
raise NoMethodError, "undefined method `comment_targets' for #{inspect}"
end
- # Returns a symbol symbolizing the type of node that this represents. This
- # is particularly useful for case statements and array comparisons.
+ # Returns a string representation of the node.
+ def inspect
+ raise NoMethodError, "undefined method `inspect' for #{inspect}"
+ end
+
+ # Sometimes you want to check an instance of a node against a list of
+ # classes to see what kind of behavior to perform. Usually this is done by
+ # calling `[cls1, cls2].include?(node.class)` or putting the node into a
+ # case statement and doing `case node; when cls1; when cls2; end`. Both of
+ # these approaches are relatively slow because of the constant lookups,
+ # method calls, and/or array allocations.
+ #
+ # Instead, you can call #type, which will return to you a symbol that you
+ # can use for comparison. This is faster than the other approaches because
+ # it uses a single integer comparison, but also because if you're on CRuby
+ # you can take advantage of the fact that case statements with all symbol
+ # keys will use a jump table.
def type
raise NoMethodError, "undefined method `type' for #{inspect}"
end
+
+ # Similar to #type, this method returns a symbol that you can use for
+ # splitting on the type of the node without having to do a long === chain.
+ # Note that like #type, it will still be slower than using == for a single
+ # class, but should be faster in a case statement or an array comparison.
+ def self.type
+ raise NoMethodError, "undefined method `type' for #{inspect}"
+ end
end
<%- nodes.each do |node| -%>
@@ -81,70 +302,62 @@ module Prism
#<%= line %>
<%- end -%>
class <%= node.name -%> < Node
- <%- node.fields.each do |field| -%>
- <%- if field.comment.nil? -%>
- # <%= "private " if field.is_a?(Prism::FlagsField) %>attr_reader <%= field.name %>: <%= field.rbs_class %>
- <%- else -%>
- <%- field.each_comment_line do |line| -%>
- #<%= line %>
- <%- end -%>
- <%- end -%>
- <%= "private " if field.is_a?(Prism::FlagsField) %>attr_reader :<%= field.name %>
-
- <%- end -%>
- # def initialize: (<%= (node.fields.map { |field| "#{field.rbs_class} #{field.name}" } + ["Location location"]).join(", ") %>) -> void
- def initialize(<%= (node.fields.map(&:name) + ["location"]).join(", ") %>)
+ # Initialize a new <%= node.name %> node.
+ def initialize(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+ @source = source
+ @node_id = node_id
+ @location = location
+ @flags = flags
<%- node.fields.each do |field| -%>
+ <%- if Prism::Template::CHECK_FIELD_KIND && field.respond_to?(:check_field_kind) -%>
+ raise "<%= node.name %>#<%= field.name %> was of unexpected type:\n#{<%= field.name %>.inspect}" unless <%= field.check_field_kind %>
+ <%- end -%>
@<%= field.name %> = <%= field.name %>
<%- end -%>
- @location = location
end
# def accept: (Visitor visitor) -> void
def accept(visitor)
visitor.visit_<%= node.human %>(self)
end
- <%- if node.newline == false -%>
- def set_newline_flag(newline_marked) # :nodoc:
- # Never mark <%= node.name %> with a newline flag, mark children instead
- end
- <%- elsif node.newline.is_a?(String) -%>
-
- def set_newline_flag(newline_marked) # :nodoc:
- <%- field = node.fields.find { |f| f.name == node.newline } or raise node.newline -%>
- <%- case field -%>
- <%- when Prism::NodeField -%>
- <%= field.name %>.set_newline_flag(newline_marked)
- <%- when Prism::NodeListField -%>
- first = <%= field.name %>.first
- first.set_newline_flag(newline_marked) if first
- <%- else raise field.class.name -%>
- <%- end -%>
- end
- <%- end -%>
-
- # def child_nodes: () -> Array[nil | Node]
+ # def child_nodes: () -> Array[Node?]
def child_nodes
[<%= node.fields.map { |field|
case field
- when Prism::NodeField, Prism::OptionalNodeField then field.name
- when Prism::NodeListField then "*#{field.name}"
+ when Prism::Template::NodeField, Prism::Template::OptionalNodeField then field.name
+ when Prism::Template::NodeListField then "*#{field.name}"
end
}.compact.join(", ") %>]
end
+ # def each_child_node: () { (Prism::node) -> void } -> void | () -> Enumerator[Prism::node]
+ def each_child_node
+ return to_enum(:each_child_node) unless block_given?
+
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ yield <%= field.name %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ yield <%= field.name %> if <%= field.name %>
+ <%- when Prism::Template::NodeListField -%>
+ <%= field.name %>.each { |node| yield node }
+ <%- end -%>
+ <%- end -%>
+ end
+
# def compact_child_nodes: () -> Array[Node]
def compact_child_nodes
- <%- if node.fields.any? { |field| field.is_a?(Prism::OptionalNodeField) } -%>
- compact = []
+ <%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%>
+ compact = [] #: Array[Prism::node]
<%- node.fields.each do |field| -%>
<%- case field -%>
- <%- when Prism::NodeField -%>
+ <%- when Prism::Template::NodeField -%>
compact << <%= field.name %>
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::OptionalNodeField -%>
compact << <%= field.name %> if <%= field.name %>
- <%- when Prism::NodeListField -%>
+ <%- when Prism::Template::NodeListField -%>
compact.concat(<%= field.name %>)
<%- end -%>
<%- end -%>
@@ -152,8 +365,8 @@ module Prism
<%- else -%>
[<%= node.fields.map { |field|
case field
- when Prism::NodeField then field.name
- when Prism::NodeListField then "*#{field.name}"
+ when Prism::Template::NodeField then field.name
+ when Prism::Template::NodeListField then "*#{field.name}"
end
}.compact.join(", ") %>]
<%- end -%>
@@ -163,31 +376,80 @@ module Prism
def comment_targets
[<%= node.fields.map { |field|
case field
- when Prism::NodeField, Prism::LocationField then field.name
- when Prism::OptionalNodeField, Prism::NodeListField, Prism::OptionalLocationField then "*#{field.name}"
+ when Prism::Template::NodeField, Prism::Template::LocationField then field.name
+ when Prism::Template::OptionalNodeField, Prism::Template::NodeListField, Prism::Template::OptionalLocationField then "*#{field.name}"
end
- }.compact.join(", ") %>]
+ }.compact.join(", ") %>] #: Array[Prism::node | Location]
end
- # def copy: (**params) -> <%= node.name %>
- def copy(**params)
- <%= node.name %>.new(
- <%- (node.fields.map(&:name) + ["location"]).map do |name| -%>
- params.fetch(:<%= name %>) { <%= name %> },
- <%- end -%>
- )
+ # def copy: (<%= (["?node_id: Integer", "?location: Location", "?flags: Integer"] + node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }).join(", ") %>) -> <%= node.name %>
+ def copy(<%= (["node_id", "location", "flags"] + node.fields.map(&:name)).map { |field| "#{field}: self.#{field}" }.join(", ") %>)
+ <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
end
- # def deconstruct: () -> Array[nil | Node]
+ # def deconstruct: () -> Array[Node?]
alias deconstruct child_nodes
- # def deconstruct_keys: (Array[Symbol] keys) -> { <%= (node.fields.map { |field| "#{field.name}: #{field.rbs_class}" } + ["location: Location"]).join(", ") %> }
+ # def deconstruct_keys: (Array[Symbol] keys) -> { <%= (["node_id: Integer", "location: Location"] + node.fields.map { |field| "#{field.name}: #{field.rbs_class}" }).join(", ") %> }
def deconstruct_keys(keys)
- { <%= (node.fields.map { |field| "#{field.name}: #{field.name}" } + ["location: location"]).join(", ") %> }
+ { <%= (["node_id: node_id", "location: location"] + node.fields.map { |field| "#{field.name}: #{field.name}" }).join(", ") %> }
+ end
+ <%- if (node_flags = node.flags) -%>
+ <%- node_flags.values.each do |value| -%>
+
+ # def <%= value.name.downcase %>?: () -> bool
+ def <%= value.name.downcase %>?
+ flags.anybits?(<%= node_flags.name %>::<%= value.name %>)
end
+ <%- end -%>
+ <%- end -%>
<%- node.fields.each do |field| -%>
+
+ <%- if field.comment.nil? -%>
+ # attr_reader <%= field.name %>: <%= field.rbs_class %>
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ <%- end -%>
<%- case field -%>
- <%- when Prism::LocationField -%>
+ <%- when Prism::Template::LocationField -%>
+ def <%= field.name %>
+ location = @<%= field.name %>
+ return location if location.is_a?(Location)
+ @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
+
+ # Save the <%= field.name %> location using the given saved source so that
+ # it can be retrieved later.
+ def save_<%= field.name %>(repository)
+ repository.enter(node_id, :<%= field.name %>)
+ end
+ <%- when Prism::Template::OptionalLocationField -%>
+ def <%= field.name %>
+ location = @<%= field.name %>
+ case location
+ when nil
+ nil
+ when Location
+ location
+ else
+ @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
+ end
+
+ # Save the <%= field.name %> location using the given saved source so that
+ # it can be retrieved later.
+ def save_<%= field.name %>(repository)
+ repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
+ end
+ <%- else -%>
+ attr_reader :<%= field.name %>
+ <%- end -%>
+ <%- end -%>
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::LocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
@@ -195,7 +457,7 @@ module Prism
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>.slice
end
- <%- when Prism::OptionalLocationField -%>
+ <%- when Prism::Template::OptionalLocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
@@ -203,96 +465,63 @@ module Prism
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>&.slice
end
- <%- when Prism::FlagsField -%>
- <%- flags.find { |flag| flag.name == field.kind }.tap { |flag| raise "Expected to find #{field.kind}" unless flag }.values.each do |value| -%>
-
- # def <%= value.name.downcase %>?: () -> bool
- def <%= value.name.downcase %>?
- <%= field.name %>.anybits?(<%= field.kind %>::<%= value.name %>)
- end
- <%- end -%>
<%- end -%>
<%- end -%>
- # def inspect(NodeInspector inspector) -> String
- def inspect(inspector = NodeInspector.new)
- inspector << inspector.header(self)
- <%- node.fields.each_with_index do |field, index| -%>
- <%- pointer, preadd = index == node.fields.length - 1 ? ["└── ", " "] : ["├── ", "│ "] -%>
- <%- case field -%>
- <%- when Prism::NodeListField -%>
- inspector << "<%= pointer %><%= field.name %>: #{inspector.list("#{inspector.prefix}<%= preadd %>", <%= field.name %>)}"
- <%- when Prism::ConstantListField -%>
- inspector << "<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n"
- <%- when Prism::NodeField -%>
- inspector << "<%= pointer %><%= field.name %>:\n"
- inspector << inspector.child_node(<%= field.name %>, "<%= preadd %>")
- <%- when Prism::OptionalNodeField -%>
- if (<%= field.name %> = self.<%= field.name %>).nil?
- inspector << "<%= pointer %><%= field.name %>: ∅\n"
- else
- inspector << "<%= pointer %><%= field.name %>:\n"
- inspector << <%= field.name %>.inspect(inspector.child_inspector("<%= preadd %>")).delete_prefix(inspector.prefix)
- end
- <%- when Prism::ConstantField, Prism::StringField, Prism::UInt8Field, Prism::UInt32Field -%>
- inspector << "<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n"
- <%- when Prism::OptionalConstantField -%>
- if (<%= field.name %> = self.<%= field.name %>).nil?
- inspector << "<%= pointer %><%= field.name %>: ∅\n"
- else
- inspector << "<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n"
- end
- <%- when Prism::FlagsField -%>
- <%- flag = flags.find { |flag| flag.name == field.kind }.tap { |flag| raise unless flag } -%>
- flags = [<%= flag.values.map { |value| "(\"#{value.name.downcase}\" if #{value.name.downcase}?)" }.join(", ") %>].compact
- inspector << "<%= pointer %><%= field.name %>: #{flags.empty? ? "∅" : flags.join(", ")}\n"
- <%- when Prism::LocationField, Prism::OptionalLocationField -%>
- inspector << "<%= pointer %><%= field.name %>: #{inspector.location(<%= field.name %>)}\n"
- <%- else -%>
- <%- raise -%>
- <%- end -%>
- <%- end -%>
- inspector.to_str
+ # def inspect -> String
+ def inspect
+ InspectVisitor.compose(self)
end
- # Sometimes you want to check an instance of a node against a list of
- # classes to see what kind of behavior to perform. Usually this is done by
- # calling `[cls1, cls2].include?(node.class)` or putting the node into a
- # case statement and doing `case node; when cls1; when cls2; end`. Both of
- # these approaches are relatively slow because of the constant lookups,
- # method calls, and/or array allocations.
- #
- # Instead, you can call #type, which will return to you a symbol that you
- # can use for comparison. This is faster than the other approaches because
- # it uses a single integer comparison, but also because if you're on CRuby
- # you can take advantage of the fact that case statements with all symbol
- # keys will use a jump table.
- #
- # def type: () -> Symbol
+ # Return a symbol representation of this node type. See `Node#type`.
def type
:<%= node.human %>
end
- # Similar to #type, this method returns a symbol that you can use for
- # splitting on the type of the node without having to do a long === chain.
- # Note that like #type, it will still be slower than using == for a single
- # class, but should be faster in a case statement or an array comparison.
- #
- # def self.type: () -> Symbol
+ # Return a symbol representation of this node type. See `Node::type`.
def self.type
:<%= node.human %>
end
+
+ # Implements case-equality for the node. This is effectively == but without
+ # comparing the value of locations. Locations are checked only for presence.
+ def ===(other)
+ other.is_a?(<%= node.name %>)<%= " &&" if (fields = [*node.flags, *node.fields]).any? %>
+ <%- fields.each_with_index do |field, index| -%>
+ <%- if field.is_a?(Prism::Template::LocationField) || field.is_a?(Prism::Template::OptionalLocationField) -%>
+ (<%= field.name %>.nil? == other.<%= field.name %>.nil?)<%= " &&" if index != fields.length - 1 %>
+ <%- elsif field.is_a?(Prism::Template::NodeListField) || field.is_a?(Prism::Template::ConstantListField) -%>
+ (<%= field.name %>.length == other.<%= field.name %>.length) &&
+ <%= field.name %>.zip(other.<%= field.name %>).all? { |left, right| left === right }<%= " &&" if index != fields.length - 1 %>
+ <%- elsif field.is_a?(Prism::Template::Flags) -%>
+ (flags === other.flags)<%= " &&" if index != fields.length - 1 %>
+ <%- else -%>
+ (<%= field.name %> === other.<%= field.name %>)<%= " &&" if index != fields.length - 1 %>
+ <%- end -%>
+ <%- end -%>
+ end
end
<%- end -%>
- <%- flags.each_with_index do |flag, flag_index| -%>
+ <%- flags.each do |flag| -%>
# <%= flag.comment %>
module <%= flag.name %>
<%- flag.values.each_with_index do |value, index| -%>
# <%= value.comment %>
- <%= value.name %> = 1 << <%= index %>
+ <%= value.name %> = 1 << <%= index + Prism::Template::COMMON_FLAGS_COUNT %>
<%= "\n" if value != flag.values.last -%>
<%- end -%>
end
<%- end -%>
+
+ # The flags that are common to all nodes.
+ module NodeFlags
+ # A flag to indicate that the node is a candidate to emit a :line event
+ # through tracepoint when compiled.
+ NEWLINE = 1
+
+ # A flag to indicate that the value that the node represents is a value that
+ # can be determined at parse-time.
+ STATIC_LITERAL = 2
+ end
end
diff --git a/prism/templates/lib/prism/reflection.rb.erb b/prism/templates/lib/prism/reflection.rb.erb
new file mode 100644
index 0000000000..6c8b2f4d25
--- /dev/null
+++ b/prism/templates/lib/prism/reflection.rb.erb
@@ -0,0 +1,136 @@
+module Prism
+ # The Reflection module provides the ability to reflect on the structure of
+ # the syntax tree itself, as opposed to looking at a single syntax tree. This
+ # is useful in metaprogramming contexts.
+ module Reflection
+ # A field represents a single piece of data on a node. It is the base class
+ # for all other field types.
+ class Field
+ # The name of the field.
+ attr_reader :name
+
+ # Initializes the field with the given name.
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ # A node field represents a single child node in the syntax tree. It
+ # resolves to a Prism::Node in Ruby.
+ class NodeField < Field
+ end
+
+ # An optional node field represents a single child node in the syntax tree
+ # that may or may not be present. It resolves to either a Prism::Node or nil
+ # in Ruby.
+ class OptionalNodeField < Field
+ end
+
+ # A node list field represents a list of child nodes in the syntax tree. It
+ # resolves to an array of Prism::Node instances in Ruby.
+ class NodeListField < Field
+ end
+
+ # A constant field represents a constant value on a node. Effectively, it
+ # represents an identifier found within the source. It resolves to a symbol
+ # in Ruby.
+ class ConstantField < Field
+ end
+
+ # An optional constant field represents a constant value on a node that may
+ # or may not be present. It resolves to either a symbol or nil in Ruby.
+ class OptionalConstantField < Field
+ end
+
+ # A constant list field represents a list of constant values on a node. It
+ # resolves to an array of symbols in Ruby.
+ class ConstantListField < Field
+ end
+
+ # A string field represents a string value on a node. It almost always
+ # represents the unescaped value of a string-like literal. It resolves to a
+ # string in Ruby.
+ class StringField < Field
+ end
+
+ # A location field represents the location of some part of the node in the
+ # source code. For example, the location of a keyword or an operator. It
+ # resolves to a Prism::Location in Ruby.
+ class LocationField < Field
+ end
+
+ # An optional location field represents the location of some part of the
+ # node in the source code that may or may not be present. It resolves to
+ # either a Prism::Location or nil in Ruby.
+ class OptionalLocationField < Field
+ end
+
+ # An integer field represents an integer value. It is used to represent the
+ # value of an integer literal, the depth of local variables, and the number
+ # of a numbered reference. It resolves to an Integer in Ruby.
+ class IntegerField < Field
+ end
+
+ # A float field represents a double-precision floating point value. It is
+ # used exclusively to represent the value of a floating point literal. It
+ # resolves to a Float in Ruby.
+ class FloatField < Field
+ end
+
+ # A flags field represents a bitset of flags on a node. It resolves to an
+ # integer in Ruby. Note that the flags cannot be accessed directly on the
+ # node because the integer is kept private. Instead, the various flags in
+ # the bitset should be accessed through their query methods.
+ class FlagsField < Field
+ # The names of the flags in the bitset.
+ attr_reader :flags
+
+ # Initializes the flags field with the given name and flags.
+ def initialize(name, flags)
+ super(name)
+ @flags = flags
+ end
+ end
+
+ # Returns the fields for the given node.
+ def self.fields_for(node)
+ case node.type
+ <%- nodes.each do |node| -%>
+ when :<%= node.human %>
+ [<%= [*node.flags, *node.fields].map { |field|
+ case field
+ when Prism::Template::NodeField
+ "NodeField.new(:#{field.name})"
+ when Prism::Template::OptionalNodeField
+ "OptionalNodeField.new(:#{field.name})"
+ when Prism::Template::NodeListField
+ "NodeListField.new(:#{field.name})"
+ when Prism::Template::ConstantField
+ "ConstantField.new(:#{field.name})"
+ when Prism::Template::OptionalConstantField
+ "OptionalConstantField.new(:#{field.name})"
+ when Prism::Template::ConstantListField
+ "ConstantListField.new(:#{field.name})"
+ when Prism::Template::StringField
+ "StringField.new(:#{field.name})"
+ when Prism::Template::LocationField
+ "LocationField.new(:#{field.name})"
+ when Prism::Template::OptionalLocationField
+ "OptionalLocationField.new(:#{field.name})"
+ when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+ "IntegerField.new(:#{field.name})"
+ when Prism::Template::DoubleField
+ "FloatField.new(:#{field.name})"
+ when Prism::Template::Flags
+ "FlagsField.new(:flags, [#{field.values.map { |value| ":#{value.name.downcase}?" }.join(", ")}])"
+ else
+ raise field.class.name
+ end
+ }.join(", ") %>]
+ <%- end -%>
+ else
+ raise "Unknown node type: #{node.type.inspect}"
+ end
+ end
+ end
+end
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 7cbbfb63c5..6902df5c01 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -1,149 +1,422 @@
require "stringio"
-
-# Polyfill for String#unpack1 with the offset parameter.
-if String.instance_method(:unpack1).parameters.none? { |_, name| name == :offset }
- String.prepend(
- Module.new {
- def unpack1(format, offset: 0) # :nodoc:
- offset == 0 ? super(format) : self[offset..].unpack1(format)
- end
- }
- )
-end
+require_relative "polyfill/unpack1"
module Prism
# A module responsible for deserializing parse results.
module Serialize
# The major version of prism that we are expecting to find in the serialized
# strings.
- MAJOR_VERSION = 0
+ MAJOR_VERSION = 1
# The minor version of prism that we are expecting to find in the serialized
# strings.
- MINOR_VERSION = 19
+ MINOR_VERSION = 8
# The patch version of prism that we are expecting to find in the serialized
# strings.
PATCH_VERSION = 0
- # Deserialize the AST represented by the given string into a parse result.
- def self.load(input, serialized)
+ # Deserialize the dumped output from a request to parse or parse_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ def self.load_parse(input, serialized, freeze)
input = input.dup
- source = Source.new(input)
+ source = Source.for(input)
loader = Loader.new(source, serialized)
- result = loader.load_result
- input.force_encoding(loader.encoding)
+ loader.load_header
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ cpool_base = loader.load_uint32
+ cpool_size = loader.load_varuint
+
+ constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+
+ node = loader.load_node(constant_pool, encoding, freeze)
+ loader.load_constant_pool(constant_pool)
+ raise unless loader.eof?
+
+ result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source)
+ result.freeze if freeze
+
+ input.force_encoding(encoding)
+
+ # This is an extremely niche use-case where the file was marked as binary
+ # but it contained UTF-8-encoded characters. In that case we will actually
+ # put it back to UTF-8 to give the location APIs the best chance of being
+ # correct.
+ if !input.ascii_only? && input.encoding == Encoding::BINARY
+ input.force_encoding(Encoding::UTF_8)
+ input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
+ end
+
+ if freeze
+ input.freeze
+ source.deep_freeze
+ end
+
result
end
- # Deserialize the tokens represented by the given string into a parse
- # result.
- def self.load_tokens(source, serialized)
- Loader.new(source, serialized).load_tokens_result
+ # Deserialize the dumped output from a request to lex or lex_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ def self.load_lex(input, serialized, freeze)
+ source = Source.for(input)
+ loader = Loader.new(source, serialized)
+
+ tokens = loader.load_tokens
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ raise unless loader.eof?
+
+ result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source)
+
+ tokens.each do |token|
+ token[0].value.force_encoding(encoding)
+
+ if freeze
+ token[0].deep_freeze
+ token.freeze
+ end
+ end
+
+ if freeze
+ source.deep_freeze
+ tokens.freeze
+ result.freeze
+ end
+
+ result
end
- class Loader # :nodoc:
- attr_reader :encoding, :input, :serialized, :io
- attr_reader :constant_pool_offset, :constant_pool, :source
- attr_reader :start_line
+ # Deserialize the dumped output from a request to parse_comments or
+ # parse_file_comments.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ def self.load_parse_comments(input, serialized, freeze)
+ source = Source.for(input)
+ loader = Loader.new(source, serialized)
- def initialize(source, serialized)
- @encoding = Encoding::UTF_8
+ loader.load_header
+ loader.load_encoding
+ start_line = loader.load_varsint
- @input = source.source.dup
+ source.replace_start_line(start_line)
+
+ result = loader.load_comments(freeze)
+ raise unless loader.eof?
+
+ source.deep_freeze if freeze
+ result
+ end
+
+ # Deserialize the dumped output from a request to parse_lex or
+ # parse_lex_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ def self.load_parse_lex(input, serialized, freeze)
+ source = Source.for(input)
+ loader = Loader.new(source, serialized)
+
+ tokens = loader.load_tokens
+ loader.load_header
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ cpool_base = loader.load_uint32
+ cpool_size = loader.load_varuint
+
+ constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+
+ node = loader.load_node(constant_pool, encoding, freeze)
+ loader.load_constant_pool(constant_pool)
+ raise unless loader.eof?
+
+ value = [node, tokens]
+ result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
+
+ tokens.each do |token|
+ token[0].value.force_encoding(encoding)
+
+ if freeze
+ token[0].deep_freeze
+ token.freeze
+ end
+ end
+
+ if freeze
+ source.deep_freeze
+ tokens.freeze
+ value.freeze
+ result.freeze
+ end
+
+ result
+ end
+
+ class ConstantPool # :nodoc:
+ attr_reader :size
+
+ def initialize(input, serialized, base, size)
+ @input = input
@serialized = serialized
- @io = StringIO.new(serialized)
- @io.set_encoding(Encoding::BINARY)
+ @base = base
+ @size = size
+ @pool = Array.new(size, nil)
+ end
+
+ def get(index, encoding)
+ @pool[index] ||=
+ begin
+ offset = @base + index * 8
+ start = @serialized.unpack1("L", offset: offset)
+ length = @serialized.unpack1("L", offset: offset + 4)
+
+ if start.nobits?(1 << 31)
+ @input.byteslice(start, length).force_encoding(encoding).to_sym
+ else
+ @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym
+ end
+ end
+ end
+ end
+
+ if RUBY_ENGINE == "truffleruby"
+ # StringIO is synchronized and that adds a high overhead on TruffleRuby.
+ class FastStringIO # :nodoc:
+ attr_accessor :pos
+
+ def initialize(string)
+ @string = string
+ @pos = 0
+ end
+
+ def getbyte
+ byte = @string.getbyte(@pos)
+ @pos += 1
+ byte
+ end
+
+ def read(n)
+ slice = @string.byteslice(@pos, n)
+ @pos += n
+ slice
+ end
+
+ def eof?
+ @pos >= @string.bytesize
+ end
+ end
+ else
+ FastStringIO = ::StringIO # :nodoc:
+ end
- @constant_pool_offset = nil
- @constant_pool = nil
+ class Loader # :nodoc:
+ attr_reader :input, :io, :source
+ def initialize(source, serialized)
+ @input = source.source.dup
+ raise unless serialized.encoding == Encoding::BINARY
+ @io = FastStringIO.new(serialized)
@source = source
- define_load_node_lambdas unless RUBY_ENGINE == "ruby"
+ define_load_node_lambdas if RUBY_ENGINE != "ruby"
+ end
+
+ def eof?
+ io.getbyte
+ io.eof?
+ end
+
+ def load_constant_pool(constant_pool)
+ trailer = 0
+
+ constant_pool.size.times do |index|
+ start, length = io.read(8).unpack("L2")
+ trailer += length if start.anybits?(1 << 31)
+ end
+
+ io.read(trailer)
end
def load_header
raise "Invalid serialization" if io.read(5) != "PRISM"
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
- only_semantic_fields = io.read(1).unpack1("C")
- unless only_semantic_fields == 0
- raise "Invalid serialization (location fields must be included but are not)"
- end
+ raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
end
def load_encoding
- @encoding = Encoding.find(io.read(load_varuint))
- @input = input.force_encoding(@encoding).freeze
- @encoding
+ encoding = Encoding.find(io.read(load_varuint))
+ @input = input.force_encoding(encoding).freeze
+ encoding
end
- def load_start_line
- source.start_line = load_varsint
+ def load_line_offsets(freeze)
+ offsets = Array.new(load_varuint) { load_varuint }
+ offsets.freeze if freeze
+ offsets
end
- def load_comments
- load_varuint.times.map do
- case load_varuint
- when 0 then InlineComment.new(load_location)
- when 1 then EmbDocComment.new(load_location)
- when 2 then DATAComment.new(load_location)
+ def load_comments(freeze)
+ comments =
+ Array.new(load_varuint) do
+ comment =
+ case load_varuint
+ when 0 then InlineComment.new(load_location_object(freeze))
+ when 1 then EmbDocComment.new(load_location_object(freeze))
+ end
+
+ comment.freeze if freeze
+ comment
end
- end
- end
- def load_metadata
- comments = load_comments
- magic_comments = load_varuint.times.map { MagicComment.new(load_location, load_location) }
- data_loc = load_optional_location
- errors = load_varuint.times.map { ParseError.new(load_embedded_string, load_location) }
- warnings = load_varuint.times.map { ParseWarning.new(load_embedded_string, load_location) }
- [comments, magic_comments, data_loc, errors, warnings]
+ comments.freeze if freeze
+ comments
end
- def load_tokens
- tokens = []
- while type = TOKEN_TYPES.fetch(load_varuint)
- start = load_varuint
- length = load_varuint
- lex_state = load_varuint
- location = Location.new(@source, start, length)
- tokens << [Prism::Token.new(type, location.slice, location), lex_state]
- end
+ def load_magic_comments(freeze)
+ magic_comments =
+ Array.new(load_varuint) do
+ magic_comment =
+ MagicComment.new(
+ load_location_object(freeze),
+ load_location_object(freeze)
+ )
+
+ magic_comment.freeze if freeze
+ magic_comment
+ end
- tokens
+ magic_comments.freeze if freeze
+ magic_comments
end
- def load_tokens_result
- tokens = load_tokens
- encoding = load_encoding
- load_start_line
- comments, magic_comments, data_loc, errors, warnings = load_metadata
- tokens.each { |token,| token.value.force_encoding(encoding) }
+ DIAGNOSTIC_TYPES = [
+ <%- errors.each do |error| -%>
+ <%= error.name.downcase.to_sym.inspect %>,
+ <%- end -%>
+ <%- warnings.each do |warning| -%>
+ <%= warning.name.downcase.to_sym.inspect %>,
+ <%- end -%>
+ ].freeze
+
+ private_constant :DIAGNOSTIC_TYPES
- raise "Expected to consume all bytes while deserializing" unless @io.eof?
- Prism::ParseResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
+ def load_error_level
+ level = io.getbyte
+
+ case level
+ when 0
+ :syntax
+ when 1
+ :argument
+ when 2
+ :load
+ else
+ raise "Unknown level: #{level}"
+ end
end
- def load_nodes
- load_header
- load_encoding
- load_start_line
+ def load_errors(encoding, freeze)
+ errors =
+ Array.new(load_varuint) do
+ error =
+ ParseError.new(
+ DIAGNOSTIC_TYPES.fetch(load_varuint),
+ load_embedded_string(encoding),
+ load_location_object(freeze),
+ load_error_level
+ )
+
+ error.freeze if freeze
+ error
+ end
- comments, magic_comments, data_loc, errors, warnings = load_metadata
+ errors.freeze if freeze
+ errors
+ end
- @constant_pool_offset = io.read(4).unpack1("L")
- @constant_pool = Array.new(load_varuint, nil)
+ def load_warning_level
+ level = io.getbyte
- [load_node, comments, magic_comments, data_loc, errors, warnings]
+ case level
+ when 0
+ :default
+ when 1
+ :verbose
+ else
+ raise "Unknown level: #{level}"
+ end
end
- def load_result
- node, comments, magic_comments, data_loc, errors, warnings = load_nodes
- Prism::ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
+ def load_warnings(encoding, freeze)
+ warnings =
+ Array.new(load_varuint) do
+ warning =
+ ParseWarning.new(
+ DIAGNOSTIC_TYPES.fetch(load_varuint),
+ load_embedded_string(encoding),
+ load_location_object(freeze),
+ load_warning_level
+ )
+
+ warning.freeze if freeze
+ warning
+ end
+
+ warnings.freeze if freeze
+ warnings
end
- private
+ def load_tokens
+ tokens = []
+
+ while (type = TOKEN_TYPES.fetch(load_varuint))
+ start = load_varuint
+ length = load_varuint
+ lex_state = load_varuint
+
+ location = Location.new(@source, start, length)
+ token = Token.new(@source, type, location.slice, location)
+
+ tokens << [token, lex_state]
+ end
+
+ tokens
+ end
# variable-length integer using https://en.wikipedia.org/wiki/LEB128
# This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
@@ -166,132 +439,146 @@ module Prism
(n >> 1) ^ (-(n & 1))
end
- def load_serialized_length
+ def load_integer
+ negative = io.getbyte != 0
+ length = load_varuint
+
+ value = 0
+ length.times { |index| value |= (load_varuint << (index * 32)) }
+
+ value = -value if negative
+ value
+ end
+
+ def load_double
+ io.read(8).unpack1("D")
+ end
+
+ def load_uint32
io.read(4).unpack1("L")
end
- def load_optional_node
+ def load_optional_node(constant_pool, encoding, freeze)
if io.getbyte != 0
io.pos -= 1
- load_node
+ load_node(constant_pool, encoding, freeze)
end
end
- def load_embedded_string
- io.read(load_varuint).force_encoding(encoding)
+ def load_embedded_string(encoding)
+ io.read(load_varuint).force_encoding(encoding).freeze
end
- def load_string
- type = io.getbyte
- case type
+ def load_string(encoding)
+ case (type = io.getbyte)
when 1
- input.byteslice(load_varuint, load_varuint).force_encoding(encoding)
+ input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
when 2
- load_embedded_string
+ load_embedded_string(encoding)
else
raise "Unknown serialized string type: #{type}"
end
end
- def load_location
- Location.new(source, load_varuint, load_varuint)
+ def load_location_object(freeze)
+ location = Location.new(source, load_varuint, load_varuint)
+ location.freeze if freeze
+ location
end
- def load_optional_location
- load_location if io.getbyte != 0
+ def load_location(freeze)
+ return load_location_object(freeze) if freeze
+ (load_varuint << 32) | load_varuint
end
- def load_constant(index)
- constant = constant_pool[index]
-
- unless constant
- offset = constant_pool_offset + index * 8
- start = serialized.unpack1("L", offset: offset)
- length = serialized.unpack1("L", offset: offset + 4)
-
- constant =
- if start.nobits?(1 << 31)
- input.byteslice(start, length).to_sym
- else
- serialized.byteslice(start & ((1 << 31) - 1), length).to_sym
- end
-
- constant_pool[index] = constant
- end
+ def load_optional_location(freeze)
+ load_location(freeze) if io.getbyte != 0
+ end
- constant
+ def load_optional_location_object(freeze)
+ load_location_object(freeze) if io.getbyte != 0
end
- def load_required_constant
- load_constant(load_varuint - 1)
+ def load_constant(constant_pool, encoding)
+ index = load_varuint
+ constant_pool.get(index - 1, encoding)
end
- def load_optional_constant
+ def load_optional_constant(constant_pool, encoding)
index = load_varuint
- load_constant(index - 1) if index != 0
+ constant_pool.get(index - 1, encoding) if index != 0
end
- if RUBY_ENGINE == 'ruby'
- def load_node
+ if RUBY_ENGINE == "ruby"
+ def load_node(constant_pool, encoding, freeze)
type = io.getbyte
- location = load_location
-
- case type
+ node_id = load_varuint
+ location = load_location(freeze)
+ value = case type
<%- nodes.each_with_index do |node, index| -%>
when <%= index + 1 %> then
<%- if node.needs_serialized_length? -%>
- load_serialized_length
+ load_uint32
<%- end -%>
- <%= node.name %>.new(<%= (node.fields.map { |field|
+ <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
case field
- when Prism::NodeField then "load_node"
- when Prism::OptionalNodeField then "load_optional_node"
- when Prism::StringField then "load_string"
- when Prism::NodeListField then "Array.new(load_varuint) { load_node }"
- when Prism::ConstantField then "load_required_constant"
- when Prism::OptionalConstantField then "load_optional_constant"
- when Prism::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
- when Prism::LocationField then "load_location"
- when Prism::OptionalLocationField then "load_optional_location"
- when Prism::UInt8Field then "io.getbyte"
- when Prism::UInt32Field, Prism::FlagsField then "load_varuint"
+ when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
+ when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
+ when Prism::Template::StringField then "load_string(encoding)"
+ when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }"
+ when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
+ when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
+ when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }"
+ when Prism::Template::LocationField then "load_location(freeze)"
+ when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
+ when Prism::Template::UInt8Field then "io.getbyte"
+ when Prism::Template::UInt32Field then "load_varuint"
+ when Prism::Template::IntegerField then "load_integer"
+ when Prism::Template::DoubleField then "load_double"
else raise
end
- } + ["location"]).join(", ") -%>)
+ }].join(", ") -%>)
<%- end -%>
end
+
+ value.freeze if freeze
+ value
end
else
- def load_node
- type = io.getbyte
- @load_node_lambdas[type].call
+ def load_node(constant_pool, encoding, freeze)
+ @load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze)
end
def define_load_node_lambdas
@load_node_lambdas = [
nil,
<%- nodes.each do |node| -%>
- -> {
- location = load_location
+ -> (constant_pool, encoding, freeze) {
+ node_id = load_varuint
+ location = load_location(freeze)
<%- if node.needs_serialized_length? -%>
- load_serialized_length
+ load_uint32
<%- end -%>
- <%= node.name %>.new(<%= (node.fields.map { |field|
+ value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
case field
- when Prism::NodeField then "load_node"
- when Prism::OptionalNodeField then "load_optional_node"
- when Prism::StringField then "load_string"
- when Prism::NodeListField then "Array.new(load_varuint) { load_node }"
- when Prism::ConstantField then "load_required_constant"
- when Prism::OptionalConstantField then "load_optional_constant"
- when Prism::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
- when Prism::LocationField then "load_location"
- when Prism::OptionalLocationField then "load_optional_location"
- when Prism::UInt8Field then "io.getbyte"
- when Prism::UInt32Field, Prism::FlagsField then "load_varuint"
+ when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
+ when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
+ when Prism::Template::StringField then "load_string(encoding)"
+ when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }"
+ when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
+ when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
+ when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }"
+ when Prism::Template::LocationField then "load_location(freeze)"
+ when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
+ when Prism::Template::UInt8Field then "io.getbyte"
+ when Prism::Template::UInt32Field then "load_varuint"
+ when Prism::Template::IntegerField then "load_integer"
+ when Prism::Template::DoubleField then "load_double"
else raise
end
- } + ["location"]).join(", ") -%>)
+ }].join(", ") -%>)
+ value.freeze if freeze
+ value
},
<%- end -%>
]
@@ -305,6 +592,11 @@ module Prism
<%- tokens.each do |token| -%>
<%= token.name.to_sym.inspect %>,
<%- end -%>
- ]
+ ].freeze
+
+ private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
+ private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
end
+
+ private_constant :Serialize
end
diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb
index 04156cc7a9..76f907724f 100644
--- a/prism/templates/lib/prism/visitor.rb.erb
+++ b/prism/templates/lib/prism/visitor.rb.erb
@@ -7,17 +7,20 @@ module Prism
# Calls `accept` on the given node if it is not `nil`, which in turn should
# call back into this visitor by calling the appropriate `visit_*` method.
def visit(node)
+ # @type self: _Visitor
node&.accept(self)
end
# Visits each node in `nodes` by calling `accept` on each one.
def visit_all(nodes)
+ # @type self: _Visitor
nodes.each { |node| node&.accept(self) }
end
# Visits the child nodes of `node` by calling `accept` on each one.
def visit_child_nodes(node)
- node.compact_child_nodes.each { |node| node.accept(self) }
+ # @type self: _Visitor
+ node.each_child_node { |node| node.accept(self) }
end
end
@@ -31,7 +34,7 @@ module Prism
#
# class FooCalls < Prism::Visitor
# def visit_call_node(node)
- # if node.name == "foo"
+ # if node.name == :foo
# # Do something with the node
# end
#
@@ -44,7 +47,9 @@ module Prism
<%- nodes.each_with_index do |node, index| -%>
<%= "\n" if index != 0 -%>
# Visit a <%= node.name %> node
- alias visit_<%= node.human %> visit_child_nodes
+ def visit_<%= node.human %>(node)
+ node.each_child_node { |node| node.accept(self) }
+ end
<%- end -%>
end
end
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
new file mode 100644
index 0000000000..121dd4b2b6
--- /dev/null
+++ b/prism/templates/src/diagnostic.c.erb
@@ -0,0 +1,526 @@
+#include "prism/diagnostic.h"
+
+#define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
+
+/** This struct holds the data for each diagnostic. */
+typedef struct {
+ /** The message associated with the diagnostic. */
+ const char* message;
+
+ /** The level associated with the diagnostic. */
+ uint8_t level;
+} pm_diagnostic_data_t;
+
+/**
+ * ## Message composition
+ *
+ * When composing an error message, use sentence fragments.
+ *
+ * Try describing the property of the code that caused the error, rather than
+ * the rule that is being violated. It may help to use a fragment that completes
+ * a sentence beginning, "the parser encountered (a) ...". If appropriate, add a
+ * description of the rule violation (or other helpful context) after a
+ * semicolon.
+ *
+ * For example:, instead of "control escape sequence cannot be doubled", prefer:
+ *
+ * > "invalid control escape sequence; control cannot be repeated"
+ *
+ * In some cases, where the failure is more general or syntax expectations are
+ * violated, it may make more sense to use a fragment that completes a sentence
+ * beginning, "the parser ...".
+ *
+ * For example:
+ *
+ * > "expected an expression after `(`"
+ * > "cannot parse the expression"
+ *
+ * ## Message style guide
+ *
+ * - Use articles like "a", "an", and "the" when appropriate.
+ * - e.g., prefer "cannot parse the expression" to "cannot parse expression".
+ * - Use the common name for tokens and nodes.
+ * - e.g., prefer "keyword splat" to "assoc splat"
+ * - e.g., prefer "embedded document" to "embdoc"
+ * - Do not capitalize the initial word of the message.
+ * - Use back ticks around token literals
+ * - e.g., "Expected a `=>` between the hash key and value"
+ * - Do not use `.` or other punctuation at the end of the message.
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
+ *
+ * ## Error names (PM_ERR_*)
+ *
+ * - When appropriate, prefer node name to token name.
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
+ * - Prefer token name to common name.
+ * - e.g., prefer "STAR" to "ASTERISK".
+ * - Try to order the words in the name from more general to more specific,
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
+ *
+ * ## Level
+ *
+ * For errors, they are:
+ *
+ * * `PM_ERROR_LEVEL_SYNTAX` - Errors that should raise SyntaxError.
+ * * `PM_ERROR_LEVEL_ARGUMENT` - Errors that should raise ArgumentError.
+ * * `PM_ERROR_LEVEL_LOAD` - Errors that should raise LoadError.
+ *
+ * For warnings, they are:
+ *
+ * * `PM_WARNING_LEVEL_DEFAULT` - Warnings that appear for `ruby -c -e 'code'`.
+ * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
+ */
+static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
+ // Special error that can be replaced
+ [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX },
+
+ // Errors that should raise argument errors
+ [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
+
+ // Errors that should raise load errors
+ [PM_ERR_SCRIPT_NOT_FOUND] = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD },
+
+ // Errors that should raise syntax errors
+ [PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE] = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_AFTER_BLOCK] = { "unexpected argument after a block argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES] = { "unexpected argument after `...`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_BARE_HASH] = { "unexpected bare hash argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_BLOCK_MULTI] = { "both block arg and actual block given; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_AMPERSAND] = { "unexpected `&`; anonymous block parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_STAR] = { "unexpected `*`; anonymous rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_STAR_STAR] = { "unexpected `**`; anonymous keyword rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_CLASS] = { "invalid formal argument; formal argument cannot be a class variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_CONSTANT] = { "invalid formal argument; formal argument cannot be a constant", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_GLOBAL] = { "invalid formal argument; formal argument cannot be a global variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_IVAR] = { "invalid formal argument; formal argument cannot be an instance variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORWARDING_UNBOUND] = { "unexpected `...` in an non-parenthesized call", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND] = { "unexpected `&`; no anonymous block parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = { "unexpected ... when the parent method is not forwarding", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = { "unexpected `*`; no anonymous rest parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR] = { "unexpected `**`; no anonymous keyword rest parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = { "unexpected `*` splat argument after a `**` keyword splat argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = { "unexpected `*` splat argument after a `*` splat argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_TERM_PAREN] = { "unexpected %s; expected a `)` to close the arguments", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = { "unexpected '{' after a method call without parenthesis", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_ELEMENT] = { "expected an element for the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_EXPRESSION] = { "expected an expression for the array element", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = { "expected an expression after `*` in the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_SEPARATOR] = { "unexpected %s; expected a `,` separator for the array elements", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_TERM] = { "unexpected %s; expected a `]` to close the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_LONELY_ELSE] = { "unexpected `else` in `begin` block; else without rescue is useless", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_TERM] = { "expected an `end` to close the `begin` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_BRACE] = { "expected a `{` after `BEGIN`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_TERM] = { "expected a `}` to close the `BEGIN` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_TOPLEVEL] = { "BEGIN is permitted only at toplevel", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = { "expected a local variable name in the block parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_PARAM_PIPE_TERM] = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_TERM_BRACE] = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_TERM_END] = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CANNOT_PARSE_STRING_PART] = { "cannot parse the string part", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_EXPRESSION_AFTER_CASE] = { "expected an expression after `case`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = { "expected an expression after `when`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_MATCH_MISSING_PREDICATE] = { "expected a predicate for a case matching statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_MISSING_CONDITIONS] = { "expected a `when` or `in` clause after `case`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_TERM] = { "expected an `end` to close the `case` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_IN_METHOD] = { "unexpected class definition in method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_NAME] = { "unexpected constant path after `class`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_SUPERCLASS] = { "expected a superclass after `<`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_TERM] = { "expected an `end` to close the `class` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_UNEXPECTED_END] = { "unexpected `end`, expecting ';' or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_VARIABLE_BARE] = { "'@@' without identifiers is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = { "expected a predicate expression for the `elsif` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_IF_PREDICATE] = { "expected a predicate expression for the `if` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_PREDICATE_TERM] = { "expected `then` or `;` or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_TERM] = { "expected an `end` to close the conditional clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_TERM_ELSE] = { "expected an `end` to close the `else` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = { "expected a predicate expression for the `unless` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = { "expected a predicate expression for the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_WHILE_PREDICATE] = { "expected a predicate expression for the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = { "expected a constant after the `::` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS] = { "could not parse the endless method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_PARAMETERS] = { "could not parse the endless method parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_SETTER] = { "invalid method name; a setter method cannot be defined in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_NAME] = { "unexpected %s; expected a method name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_PARAMS_TERM] = { "expected a delimiter to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_PARAMS_TERM_PAREN] = { "unexpected %s; expected a `)` to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_RECEIVER] = { "expected a receiver for the method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_RECEIVER_TERM] = { "expected a `.` or `::` after the receiver in a method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_TERM] = { "expected an `end` to close the `def` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEFINED_EXPRESSION] = { "expected an expression after `defined?`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBDOC_TERM] = { "embedded document meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBEXPR_END] = { "expected a `}` to close the embedded expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBVAR_INVALID] = { "invalid embedded variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_END_UPCASE_BRACE] = { "expected a `{` after `END`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_END_UPCASE_TERM] = { "expected a `}` to close the `END` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_CONTROL] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = { "invalid control escape sequence; control cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = { "invalid hex escape sequence", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_META] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_META_REPEAT] = { "invalid meta escape sequence; meta cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE] = { "invalid Unicode escape sequence", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LIST] = { "invalid Unicode list: %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT] = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_ARGUMENT] = { "unexpected %s; expected an argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = { "expected an expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = { "expected an expression after `=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = { "expected an expression after `<<`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = { "expected an expression after `(`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = { "unexpected %s; expected an expression after the operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = { "expected an expression after `*` splat in an argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = { "expected an expression after `**` in a hash", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = { "expected an expression after `*`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN] = { "expected a matching `)`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = { "expected a `)` after multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = { "expected a `)` to end a required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER] = { "unexpected %s; expected a newline or a ';' after the singleton class", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_STRING_CONTENT] = { "expected string content after opening string delimiter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_WHEN_DELIMITER] = { "expected a delimiter after the predicates of a `when` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_BARE_HASH] = { "unexpected bare hash in expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE] = { "unexpected '='; target cannot be written", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING] = { "Can't assign to __ENCODING__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE] = { "Can't assign to false", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_FILE] = { "Can't assign to __FILE__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_LINE] = { "Can't assign to __LINE__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_NIL] = { "Can't assign to nil", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED] = { "Can't assign to numbered parameter %.2s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_SELF] = { "Can't change the value of self", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE] = { "Can't assign to true", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FLOAT_PARSE] = { "could not parse the float '%.*s'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_COLLECTION] = { "expected a collection after the `in` in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_INDEX] = { "expected an index after `for`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_IN] = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_TERM] = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_GLOBAL_VARIABLE_BARE] = { "'$' without identifiers is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_KEY] = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_ROCKET] = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_TERM] = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_VALUE] = { "unexpected %s; expected a value in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HEREDOC_IDENTIFIER] = { "unterminated here document identifier", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HEREDOC_TERM] = { "unterminated heredoc; can't find string \"%.*s\" anywhere before EOF", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_QUESTION_MARK] = { "incomplete expression at `?`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3] = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "'%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3] = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "'%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INSTANCE_VARIABLE_BARE] = { "'@' without identifiers is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_BLOCK_EXIT] = { "Invalid %s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_COMMA] = { "invalid comma", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_ESCAPE_CHARACTER] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_FLOAT_EXPONENT] = { "invalid exponent", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_LOCAL_VARIABLE_READ] = { "identifier %.*s is not valid to get", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_LOCAL_VARIABLE_WRITE] = { "identifier %.*s is not valid to set", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_BINARY] = { "invalid binary number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_DECIMAL] = { "invalid decimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_FRACTION] = { "unexpected fraction part after numeric literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER] = { "invalid underscore placement in number", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING] = { "trailing '_' in number", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_CHARACTER] = { "Invalid char '\\x%02X' in expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_CHAR] = { "invalid multibyte char (%s)", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_ESCAPE] = { "invalid multibyte escape: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PERCENT] = { "unknown type of %string", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PERCENT_EOF] = { "unterminated quoted string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_AFTER_ELSE] = { "Invalid retry after else", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_AFTER_ENSURE] = { "Invalid retry after ensure", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_WITHOUT_RESCUE] = { "Invalid retry without rescue", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_SYMBOL] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_VARIABLE_GLOBAL_3_3] = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "'%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_YIELD] = { "Invalid yield", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_IT_NOT_ALLOWED_NUMBERED] = { "'it' is not allowed when a numbered parameter is already used", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_IT_NOT_ALLOWED_ORDINARY] = { "'it' is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_TERM_BRACE] = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_TERM_END] = { "expected a lambda block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_LOWER_ELEMENT] = { "expected a symbol in a `%i` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%i`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_UPPER_ELEMENT] = { "expected a symbol in a `%I` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%I`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_LOWER_ELEMENT] = { "expected a string in a `%w` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%w`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_UPPER_ELEMENT] = { "expected a string in a `%W` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%W`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MALLOC_FAILED] = { "failed to allocate memory", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MIXED_ENCODING] = { "UTF-8 mixed within %s source", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_IN_METHOD] = { "unexpected module definition in method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_NAME] = { "unexpected constant path after `module`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_TERM] = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST] = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NESTING_TOO_DEEP] = { "nesting too deep", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NO_LOCAL_VARIABLE] = { "%.*s: no such local variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NON_ASSOCIATIVE_OPERATOR] = { "unexpected %s; %s is a non-associative operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NOT_EXPRESSION] = { "expected an expression after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBER_LITERAL_UNDERSCORE] = { "number literal ending with a `_`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK] = { "numbered parameter is already used in inner block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_IT] = { "numbered parameters are not allowed when 'it' is already used", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_ORDINARY] = { "numbered parameters are not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK] = { "numbered parameter is already used in outer block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_MULTI_ASSIGN] = { "unexpected operator for a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_WRITE_ARGUMENTS] = { "unexpected operator after a call with arguments", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_WRITE_BLOCK] = { "unexpected operator after a call with a block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = { "unexpected multiple `**` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_BLOCK_MULTI] = { "multiple block parameters; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_CIRCULAR] = { "circular argument reference - %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_FORWARDING_AFTER_REST] = { "... after rest argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_METHOD_NAME] = { "unexpected name for a parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NAME_DUPLICATED] = { "duplicated argument name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NO_DEFAULT] = { "expected a default value for the parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NO_DEFAULT_KW] = { "expected a default value for the keyword parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NUMBERED_RESERVED] = { "%.2s is reserved for numbered parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_ORDER] = { "unexpected parameter order", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_SPLAT_MULTI] = { "unexpected multiple `*` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_STAR] = { "unexpected parameter `*`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_UNEXPECTED_FWD] = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_UNEXPECTED_NO_KW] = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS] = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_CAPTURE_DUPLICATE] = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE] = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = { "expected a pattern expression after the `in` keyword", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = { "expected a pattern expression after the key", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = { "expected a pattern expression after the `(` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = { "expected a pattern expression after the `^` pin operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = { "expected a pattern expression after the `|` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = { "expected a pattern expression after the range operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_REST] = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_FIND_MISSING_INNER] = { "find patterns need at least one required inner pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_IMPLICIT] = { "unexpected implicit hash in pattern; use '{' to delineate", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY] = { "unexpected %s; expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_DUPLICATE] = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_INTERPOLATED] = { "symbol literal with interpolation is not allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_LABEL] = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_LOCALS] = { "key must be valid as local variables", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_LABEL_AFTER_COMMA] = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_REST] = { "unexpected rest pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_BRACE] = { "expected a `}` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_BRACKET] = { "expected a `]` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_PAREN] = { "expected a `)` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = { "unexpected `||=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH] = { "regexp encoding option '%c' differs from source encoding '%s'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING] = { "incompatible character encoding: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_NON_ESCAPED_MBC] = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INVALID_UNICODE_RANGE] = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_PARSE_ERROR] = { "%s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_UNKNOWN_OPTIONS] = { "unknown regexp %s - %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_TERM] = { "unterminated regexp meets end of file; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP] = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_EXPRESSION] = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_MODIFIER_VALUE] = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_TERM] = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_VARIABLE] = { "expected an exception variable after `=>` in a rescue statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RETURN_INVALID] = { "Invalid return in class/module body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SINGLETON_FOR_LITERALS] = { "cannot define singleton method for literals", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_ALIAS] = { "unexpected an `alias` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_POSTEXE_END] = { "unexpected an `END` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_PREEXE_BEGIN] = { "unexpected a `BEGIN` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
+ [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNARY_RECEIVER] = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNARY_DISALLOWED] = { "unexpected %s; unary calls are not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNDEF_ARGUMENT] = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_BLOCK_ARGUMENT] = { "block argument should not be given", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_INDEX_BLOCK] = { "unexpected block arg given in index assignment; blocks are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_INDEX_KEYWORDS] = { "unexpected keyword arg given in index assignment; keywords are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_LABEL] = { "unexpected label", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_MULTI_WRITE] = { "unexpected multiple assignment; multiple assignment is not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE] = { "unexpected %s; expected a default value for a parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_RANGE_OPERATOR] = { "unexpected range operator; .. and ... are non-associative and cannot be chained", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_SAFE_NAVIGATION] = { "&. inside multiple assignment destination", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WHILE_TERM] = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_IN_METHOD] = { "dynamic constant assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_READONLY] = { "Can't set variable %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX },
+
+ // Warnings
+ [PM_WARN_AMBIGUOUS_BINARY_OPERATOR] = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND] = { "ambiguous `&` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_STAR] = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR] = { "ambiguous `**` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_SLASH] = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_COMPARISON_AFTER_COMPARISON] = { "comparison '%.*s' after comparison", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_DOT_DOT_DOT_EOL] = { "... at EOL, should be parenthesized?", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_DUPLICATED_HASH_KEY] = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_DUPLICATED_WHEN_CLAUSE] = { "'when' clause on line %" PRIi32 " duplicates 'when' clause on line %" PRIi32 " and is ignored", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_EQUAL_IN_CONDITIONAL_3_3] = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_EQUAL_IN_CONDITIONAL] = { "found '= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_END_IN_METHOD] = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_IGNORED_FROZEN_STRING_LITERAL] = { "'frozen_string_literal' is ignored after any tokens", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INDENTATION_MISMATCH] = { "mismatched indentations at '%.*s' with '%.*s' at %" PRIi32, PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_INVALID_CHARACTER] = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_INVALID_MAGIC_COMMENT_VALUE] = { "invalid value for %.*s: %.*s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INVALID_NUMBERED_REFERENCE] = { "'%.*s' is too big for a number variable, always nil", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_LITERAL_IN_CONDITION_DEFAULT] = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE] = { "'shareable_constant_value' is ignored unless in comment-only line", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_SHEBANG_CARRIAGE_RETURN] = { "shebang line ending with \\r may cause problems", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_UNEXPECTED_CARRIAGE_RETURN] = { "encountered \\r in middle of line, treated as a mere space", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_UNREACHABLE_STATEMENT] = { "statement not reached", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_UNUSED_LOCAL_VARIABLE] = { "assigned but unused variable - %.*s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_VOID_STATEMENT] = { "possibly useless use of %.*s in void context", PM_WARNING_LEVEL_VERBOSE }
+};
+
+/**
+ * Get the human-readable name of the given diagnostic ID.
+ */
+const char *
+pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
+ switch (diag_id) {
+ <%- errors.each do |error| -%>
+ case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>";
+ <%- end -%>
+ <%- warnings.each do |warning| -%>
+ case PM_WARN_<%= warning.name %>: return "<%= warning.name.downcase %>";
+ <%- end -%>
+ }
+
+ assert(false && "unreachable");
+ return "";
+}
+
+static inline const char *
+pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
+ assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+ const char *message = diagnostic_messages[diag_id].message;
+ assert(message);
+
+ return message;
+}
+
+static inline uint8_t
+pm_diagnostic_level(pm_diagnostic_id_t diag_id) {
+ assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+ return (uint8_t) diagnostic_messages[diag_id].level;
+}
+
+/**
+ * Append an error to the given list of diagnostic.
+ */
+bool
+pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
+ if (diagnostic == NULL) return false;
+
+ *diagnostic = (pm_diagnostic_t) {
+ .location = { start, end },
+ .diag_id = diag_id,
+ .message = pm_diagnostic_message(diag_id),
+ .owned = false,
+ .level = pm_diagnostic_level(diag_id)
+ };
+
+ pm_list_append(list, (pm_list_node_t *) diagnostic);
+ return true;
+}
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+bool
+pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) {
+ va_list arguments;
+ va_start(arguments, diag_id);
+
+ const char *format = pm_diagnostic_message(diag_id);
+ int result = vsnprintf(NULL, 0, format, arguments);
+ va_end(arguments);
+
+ if (result < 0) {
+ return false;
+ }
+
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
+ if (diagnostic == NULL) {
+ return false;
+ }
+
+ size_t length = (size_t) (result + 1);
+ char *message = (char *) xmalloc(length);
+ if (message == NULL) {
+ xfree(diagnostic);
+ return false;
+ }
+
+ va_start(arguments, diag_id);
+ vsnprintf(message, length, format, arguments);
+ va_end(arguments);
+
+ *diagnostic = (pm_diagnostic_t) {
+ .location = { start, end },
+ .diag_id = diag_id,
+ .message = message,
+ .owned = true,
+ .level = pm_diagnostic_level(diag_id)
+ };
+
+ pm_list_append(list, (pm_list_node_t *) diagnostic);
+ return true;
+}
+
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ */
+void
+pm_diagnostic_list_free(pm_list_t *list) {
+ pm_diagnostic_t *node = (pm_diagnostic_t *) list->head;
+
+ while (node != NULL) {
+ pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next;
+
+ if (node->owned) xfree((void *) node->message);
+ xfree(node);
+
+ node = next;
+ }
+}
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 0552767f4d..2357e55200 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -1,19 +1,43 @@
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
#include "prism/node.h"
-static void
-pm_node_memsize_node(pm_node_t *node, pm_memsize_t *memsize);
-
/**
- * Calculate the size of the node list in bytes.
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
*/
-static size_t
-pm_node_list_memsize(pm_node_list_t *node_list, pm_memsize_t *memsize) {
- size_t size = sizeof(pm_node_list_t) + (node_list->capacity * sizeof(pm_node_t *));
- for (size_t index = 0; index < node_list->size; index++) {
- pm_node_memsize_node(node_list->nodes[index], memsize);
+static bool
+pm_node_list_grow(pm_node_list_t *list, size_t size) {
+ size_t requested_size = list->size + size;
+
+ // If the requested size caused overflow, return false.
+ if (requested_size < list->size) return false;
+
+ // If the requested size is within the existing capacity, return true.
+ if (requested_size < list->capacity) return true;
+
+ // Otherwise, reallocate the list to be twice as large as it was before.
+ size_t next_capacity = list->capacity == 0 ? 4 : list->capacity * 2;
+
+ // If multiplying by 2 caused overflow, return false.
+ if (next_capacity < list->capacity) return false;
+
+ // If we didn't get enough by doubling, keep doubling until we do.
+ while (requested_size > next_capacity) {
+ size_t double_capacity = next_capacity * 2;
+
+ // Ensure we didn't overflow by multiplying by 2.
+ if (double_capacity < next_capacity) return false;
+ next_capacity = double_capacity;
}
- return size;
+
+ pm_node_t **nodes = (pm_node_t **) xrealloc(list->nodes, sizeof(pm_node_t *) * next_capacity);
+ if (nodes == NULL) return false;
+
+ list->nodes = nodes;
+ list->capacity = next_capacity;
+ return true;
}
/**
@@ -21,28 +45,56 @@ pm_node_list_memsize(pm_node_list_t *node_list, pm_memsize_t *memsize) {
*/
void
pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
- if (list->size == list->capacity) {
- list->capacity = list->capacity == 0 ? 4 : list->capacity * 2;
- list->nodes = (pm_node_t **) realloc(list->nodes, sizeof(pm_node_t *) * list->capacity);
+ if (pm_node_list_grow(list, 1)) {
+ list->nodes[list->size++] = node;
+ }
+}
+
+/**
+ * Prepend a new node onto the beginning of the node list.
+ */
+void
+pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node) {
+ if (pm_node_list_grow(list, 1)) {
+ memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
+ list->nodes[0] = node;
+ list->size++;
+ }
+}
+
+/**
+ * Concatenate the given node list onto the end of the other node list.
+ */
+void
+pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other) {
+ if (other->size > 0 && pm_node_list_grow(list, other->size)) {
+ memcpy(list->nodes + list->size, other->nodes, other->size * sizeof(pm_node_t *));
+ list->size += other->size;
+ }
+}
+
+/**
+ * Free the internal memory associated with the given node list.
+ */
+void
+pm_node_list_free(pm_node_list_t *list) {
+ if (list->capacity > 0) {
+ xfree(list->nodes);
+ *list = (pm_node_list_t) { 0 };
}
- list->nodes[list->size++] = node;
}
PRISM_EXPORTED_FUNCTION void
pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
/**
- * Deallocate the inner memory of a list of nodes. The parser argument is not
- * used, but is here for the future possibility of pre-allocating memory pools.
+ * Destroy the nodes that are contained within the given node list.
*/
static void
-pm_node_list_free(pm_parser_t *parser, pm_node_list_t *list) {
- if (list->capacity > 0) {
- for (size_t index = 0; index < list->size; index++) {
- pm_node_destroy(parser, list->nodes[index]);
- }
- free(list->nodes);
- }
+pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
+ pm_node_t *node;
+ PM_NODE_LIST_FOREACH(list, index, node) pm_node_destroy(parser, node);
+ pm_node_list_free(list);
}
/**
@@ -54,26 +106,28 @@ PRISM_EXPORTED_FUNCTION void
pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
- <%- if node.fields.any? { |field| ![Prism::LocationField, Prism::OptionalLocationField, Prism::UInt8Field, Prism::UInt32Field, Prism::FlagsField, Prism::ConstantField, Prism::OptionalConstantField].include?(field.class) } -%>
+ <%- if node.fields.any? { |field| ![Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField].include?(field.class) } -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
- <%- when Prism::LocationField, Prism::OptionalLocationField, Prism::UInt8Field, Prism::UInt32Field, Prism::FlagsField, Prism::ConstantField, Prism::OptionalConstantField -%>
- <%- when Prism::NodeField -%>
+ <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField -%>
+ <%- when Prism::Template::NodeField -%>
pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
}
- <%- when Prism::StringField -%>
+ <%- when Prism::Template::StringField -%>
pm_string_free(&cast-><%= field.name %>);
- <%- when Prism::NodeListField -%>
- pm_node_list_free(parser, &cast-><%= field.name %>);
- <%- when Prism::ConstantListField -%>
+ <%- when Prism::Template::NodeListField -%>
+ pm_node_list_destroy(parser, &cast-><%= field.name %>);
+ <%- when Prism::Template::ConstantListField -%>
pm_constant_id_list_free(&cast-><%= field.name %>);
+ <%- when Prism::Template::IntegerField -%>
+ pm_integer_free(&cast-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
@@ -81,82 +135,199 @@ pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
break;
}
<%- end -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
default:
assert(false && "unreachable");
break;
}
- free(node);
+ xfree(node);
}
-static void
-pm_node_memsize_node(pm_node_t *node, pm_memsize_t *memsize) {
- memsize->node_count++;
+/**
+ * Returns a string representation of the given node type.
+ */
+PRISM_EXPORTED_FUNCTION const char *
+pm_node_type_to_str(pm_node_type_t node_type)
+{
+ switch (node_type) {
+<%- nodes.each do |node| -%>
+ case <%= node.type %>:
+ return "<%= node.type %>";
+<%- end -%>
+ }
+ return "";
+}
+
+/**
+ * Visit each of the nodes in this subtree using the given visitor callback. The
+ * callback function will be called for each node in the subtree. If it returns
+ * false, then that node's children will not be visited. If it returns true,
+ * then the children will be visited. The data parameter is treated as an opaque
+ * pointer and is passed to the visitor callback for consumers to use as they
+ * see fit.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
+ if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data);
+}
+/**
+ * Visit the children of the given node with the given callback. This is the
+ * default behavior for walking the tree that is called from pm_visit_node if
+ * the callback returns true.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
switch (PM_NODE_TYPE(node)) {
- // We do not calculate memsize of a ScopeNode
- // as it should never be generated
- case PM_SCOPE_NODE:
- return;
<%- nodes.each do |node| -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ <%- if (fields = node.fields.select { |field| field.is_a?(Prism::Template::NodeField) || field.is_a?(Prism::Template::OptionalNodeField) || field.is_a?(Prism::Template::NodeListField) }).any? -%>
case <%= node.type %>: {
- pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
- memsize->memsize += sizeof(*cast);
- <%- if node.fields.any? { |f| f.is_a?(Prism::NodeListField) } -%>
- // Node lists will add in their own sizes below.
- memsize->memsize -= sizeof(pm_node_list_t) * <%= node.fields.count { |f| f.is_a?(Prism::NodeListField) } %>;
- <%- end -%>
- <%- if node.fields.any? { |f| f.is_a?(Prism::ConstantListField) } -%>
- // Constant id lists will add in their own sizes below.
- memsize->memsize -= sizeof(pm_constant_id_list_t) * <%= node.fields.count { |f| f.is_a?(Prism::ConstantListField) } %>;
- <%- end -%>
- <%- node.fields.each do |field| -%>
+ const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+ <%- fields.each do |field| -%>
+
+ // Visit the <%= field.name %> field
<%- case field -%>
- <%- when Prism::ConstantField, Prism::OptionalConstantField, Prism::UInt8Field, Prism::UInt32Field, Prism::FlagsField, Prism::LocationField, Prism::OptionalLocationField -%>
- <%- when Prism::NodeField -%>
- pm_node_memsize_node((pm_node_t *)cast-><%= field.name %>, memsize);
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ <%- when Prism::Template::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
- pm_node_memsize_node((pm_node_t *)cast-><%= field.name %>, memsize);
+ pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ }
+ <%- when Prism::Template::NodeListField -%>
+ const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ pm_visit_node(<%= field.name %>->nodes[index], visitor, data);
}
- <%- when Prism::StringField -%>
- memsize->memsize += pm_string_memsize(&cast-><%= field.name %>);
- <%- when Prism::NodeListField -%>
- memsize->memsize += pm_node_list_memsize(&cast-><%= field.name %>, memsize);
- <%- when Prism::ConstantListField -%>
- memsize->memsize += pm_constant_id_list_memsize(&cast-><%= field.name %>);
- <%- else -%>
- <%- raise -%>
<%- end -%>
<%- end -%>
+
break;
}
+ <%- else -%>
+ case <%= node.type %>:
+ break;
+ <%- end -%>
<%- end -%>
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ case PM_SCOPE_NODE:
+ break;
}
}
-/**
- * Calculates the memory footprint of a given node.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize) {
- *memsize = (pm_memsize_t) { .memsize = 0, .node_count = 0 };
- pm_node_memsize_node(node, memsize);
+// We optionally support dumping to JSON. For systems that don't want or need
+// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+#ifndef PRISM_EXCLUDE_JSON
+
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+ const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
+ uint32_t start = (uint32_t) (location->start - parser->start);
+ uint32_t end = (uint32_t) (location->end - parser->start);
+ pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
}
/**
- * Returns a string representation of the given node type.
+ * Dump JSON to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION const char *
-pm_node_type_to_str(pm_node_type_t node_type)
-{
- switch (node_type) {
-<%- nodes.each do |node| -%>
- case <%= node.type %>:
- return "<%= node.type %>";
-<%- end -%>
+PRISM_EXPORTED_FUNCTION void
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+ case <%= node.type %>: {
+ pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
+ const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+ pm_dump_json_location(buffer, parser, &cast->base.location);
+ <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
+
+ // Dump the <%= field.name %> field
+ pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (cast-><%= field.name %> != NULL) {
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::NodeListField -%>
+ const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::StringField -%>
+ const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+ <%- when Prism::Template::ConstantField -%>
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::ConstantListField -%>
+ const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::LocationField -%>
+ pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalLocationField -%>
+ if (cast-><%= field.name %>.start != NULL) {
+ pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+ <%- when Prism::Template::Flags -%>
+ size_t flags = 0;
+ pm_buffer_append_byte(buffer, '[');
+ <%- node.flags.values.each_with_index do |value, index| -%>
+ if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
+ if (flags != 0) pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+ flags++;
+ }
+ <%- end -%>
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::IntegerField -%>
+ pm_integer_string(buffer, &cast-><%= field.name %>);
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
+ <%- else -%>
+ <%- raise %>
+ <%- end -%>
+ <%- end -%>
+
+ pm_buffer_append_byte(buffer, '}');
+ break;
+ }
+ <%- end -%>
+ case PM_SCOPE_NODE:
+ break;
}
- return "";
}
+
+#endif
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
index 61831ce59b..639c2fecf3 100644
--- a/prism/templates/src/prettyprint.c.erb
+++ b/prism/templates/src/prettyprint.c.erb
@@ -1,46 +1,20 @@
<%# encoding: ASCII -%>
#include "prism/prettyprint.h"
-static void
-prettyprint_source(pm_buffer_t *output_buffer, const uint8_t *source, size_t length) {
- for (size_t index = 0; index < length; index++) {
- const uint8_t byte = source[index];
+// We optionally support pretty printing nodes. For systems that don't want or
+// need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PRETTYPRINT define.
+#ifdef PRISM_EXCLUDE_PRETTYPRINT
- if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
- pm_buffer_append_format(output_buffer, "\\x%02X", byte);
- } else {
- switch (byte) {
- case '\a': pm_buffer_append_string(output_buffer, "\\a", 2); break;
- case '\b': pm_buffer_append_string(output_buffer, "\\b", 2); break;
- case '\t': pm_buffer_append_string(output_buffer, "\\t", 2); break;
- case '\n': pm_buffer_append_string(output_buffer, "\\n", 2); break;
- case '\v': pm_buffer_append_string(output_buffer, "\\v", 2); break;
- case '\f': pm_buffer_append_string(output_buffer, "\\f", 2); break;
- case '\r': pm_buffer_append_string(output_buffer, "\\r", 2); break;
- case '"': pm_buffer_append_string(output_buffer, "\\\"", 2); break;
- case '#': {
- if (index + 1 < length) {
- const uint8_t next_byte = source[index + 1];
- if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
- pm_buffer_append_byte(output_buffer, '\\');
- }
- }
+void pm_prettyprint(void) {}
- pm_buffer_append_byte(output_buffer, '#');
- break;
- }
- case '\\': pm_buffer_append_string(output_buffer, "\\\\", 2); break;
- default: pm_buffer_append_byte(output_buffer, byte); break;
- }
- }
- }
-}
+#else
static inline void
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
- pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start);
- pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end);
- pm_buffer_append_format(output_buffer, "(%lu,%lu)-(%lu,%lu)", (unsigned long) (start.line + 1), (unsigned long) start.column, (unsigned long) (end.line + 1), (unsigned long) end.column);
+ pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
+ pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line);
+ pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
}
static inline void
@@ -57,115 +31,116 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
return;
<%- nodes.each do |node| -%>
case <%= node.type %>: {
- <%- if node.fields.any? -%>
+ <%- if !node.flags.nil? || node.fields.any? -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
pm_buffer_append_string(output_buffer, "@ <%= node.name %> (location: ", <%= node.name.length + 14 %>);
prettyprint_location(output_buffer, parser, &node->location);
pm_buffer_append_string(output_buffer, ")\n", 2);
- <%- node.fields.each_with_index do |field, index| -%>
- <%- pointer, preadd, preadd_bytesize = index == node.fields.length - 1 ? ["\\xe2\\x94\\x94\\xe2\\x94\\x80\\xe2\\x94\\x80 ", " ", 4] : ["\\xe2\\x94\\x9c\\xe2\\x94\\x80\\xe2\\x94\\x80 ", "\\xe2\\x94\\x82 ", 6] -%>
+ <%- (fields = [*node.flags, *node.fields]).each_with_index do |field, index| -%>
+ <%- preadd = index == fields.length - 1 ? " " : "| " -%>
// <%= field.name %>
{
pm_buffer_concat(output_buffer, prefix_buffer);
- pm_buffer_append_string(output_buffer, "<%= pointer %><%= field.name %>:", <%= 10 + field.name.length + 1 %>);
+ pm_buffer_append_string(output_buffer, "+-- <%= field.name %>:", <%= 4 + field.name.length + 1 %>);
<%- case field -%>
- <%- when Prism::NodeField -%>
+ <%- when Prism::Template::NodeField -%>
pm_buffer_append_byte(output_buffer, '\n');
size_t prefix_length = prefix_buffer->length;
- pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
pm_buffer_concat(output_buffer, prefix_buffer);
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
prefix_buffer->length = prefix_length;
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::OptionalNodeField -%>
if (cast-><%= field.name %> == NULL) {
- pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
} else {
pm_buffer_append_byte(output_buffer, '\n');
size_t prefix_length = prefix_buffer->length;
- pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
pm_buffer_concat(output_buffer, prefix_buffer);
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
prefix_buffer->length = prefix_length;
}
- <%- when Prism::StringField -%>
+ <%- when Prism::Template::StringField -%>
pm_buffer_append_string(output_buffer, " \"", 2);
- prettyprint_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>));
+ pm_buffer_append_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
- <%- when Prism::NodeListField -%>
+ <%- when Prism::Template::NodeListField -%>
pm_buffer_append_format(output_buffer, " (length: %lu)\n", (unsigned long) (cast-><%= field.name %>.size));
size_t last_index = cast-><%= field.name %>.size;
for (uint32_t index = 0; index < last_index; index++) {
size_t prefix_length = prefix_buffer->length;
- pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
pm_buffer_concat(output_buffer, prefix_buffer);
-
- if (index == last_index - 1) {
- pm_buffer_append_string(output_buffer, "\xe2\x94\x94\xe2\x94\x80\xe2\x94\x80 ", 10);
- pm_buffer_append_string(prefix_buffer, " ", 4);
- } else {
- pm_buffer_append_string(output_buffer, "\xe2\x94\x9c\xe2\x94\x80\xe2\x94\x80 ", 10);
- pm_buffer_append_string(prefix_buffer, "\xe2\x94\x82 ", 6);
- }
-
+ pm_buffer_append_string(output_buffer, "+-- ", 4);
+ pm_buffer_append_string(prefix_buffer, (index == last_index - 1) ? " " : "| ", 4);
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>.nodes[index], prefix_buffer);
prefix_buffer->length = prefix_length;
}
- <%- when Prism::ConstantField -%>
+ <%- when Prism::Template::ConstantField -%>
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
pm_buffer_append_byte(output_buffer, '\n');
- <%- when Prism::OptionalConstantField -%>
+ <%- when Prism::Template::OptionalConstantField -%>
if (cast-><%= field.name %> == 0) {
- pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
} else {
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
pm_buffer_append_byte(output_buffer, '\n');
}
- <%- when Prism::ConstantListField -%>
+ <%- when Prism::Template::ConstantListField -%>
pm_buffer_append_string(output_buffer, " [", 2);
for (uint32_t index = 0; index < cast-><%= field.name %>.size; index++) {
if (index != 0) pm_buffer_append_string(output_buffer, ", ", 2);
prettyprint_constant(output_buffer, parser, cast-><%= field.name %>.ids[index]);
}
pm_buffer_append_string(output_buffer, "]\n", 2);
- <%- when Prism::LocationField -%>
+ <%- when Prism::Template::LocationField -%>
pm_location_t *location = &cast-><%= field.name %>;
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
+ pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
- <%- when Prism::OptionalLocationField -%>
+ <%- when Prism::Template::OptionalLocationField -%>
pm_location_t *location = &cast-><%= field.name %>;
if (location->start == NULL) {
- pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
} else {
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
+ pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
}
- <%- when Prism::UInt8Field, Prism::UInt32Field -%>
- pm_buffer_append_format(output_buffer, " %d\n", cast-><%= field.name %>);
- <%- when Prism::FlagsField -%>
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_format(output_buffer, " %" PRIu8 "\n", cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_format(output_buffer, " %" PRIu32 "\n", cast-><%= field.name %>);
+ <%- when Prism::Template::Flags -%>
bool found = false;
- <%- found = flags.find { |flag| flag.name == field.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
- <%- found.values.each do |value| -%>
- if (cast->base.<%= field.name %> & PM_<%= found.human.upcase %>_<%= value.name %>) {
+ <%- field.values.each do |value| -%>
+ if (cast->base.flags & PM_<%= field.human.upcase %>_<%= value.name %>) {
if (found) pm_buffer_append_byte(output_buffer, ',');
pm_buffer_append_string(output_buffer, " <%= value.name.downcase %>", <%= value.name.bytesize + 1 %>);
found = true;
}
<%- end -%>
- if (!found) pm_buffer_append_string(output_buffer, " \xe2\x88\x85", 4);
+ if (!found) pm_buffer_append_string(output_buffer, " nil", 4);
pm_buffer_append_byte(output_buffer, '\n');
+ <%- when Prism::Template::IntegerField -%>
+ const pm_integer_t *integer = &cast-><%= field.name %>;
+ pm_buffer_append_byte(output_buffer, ' ');
+ pm_integer_string(output_buffer, integer);
+ pm_buffer_append_byte(output_buffer, '\n');
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_format(output_buffer, " %f\n", cast-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
@@ -187,3 +162,5 @@ pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_n
prettyprint_node(output_buffer, parser, node, &prefix_buffer);
pm_buffer_free(&prefix_buffer);
}
+
+#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index e9cdd1e82c..0f0aace445 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -1,5 +1,10 @@
#include "prism.h"
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
#include <stdio.h>
static inline uint32_t
@@ -25,7 +30,7 @@ pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location,
}
static void
-pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffer) {
+pm_serialize_string(const pm_parser_t *parser, const pm_string_t *string, pm_buffer_t *buffer) {
switch (string->type) {
case PM_STRING_SHARED: {
pm_buffer_append_byte(buffer, 1);
@@ -41,9 +46,25 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
pm_buffer_append_bytes(buffer, pm_string_source(string), length);
break;
}
+#ifdef PRISM_HAS_MMAP
case PM_STRING_MAPPED:
assert(false && "Cannot serialize mapped strings.");
break;
+#endif
+ }
+}
+
+static void
+pm_serialize_integer(const pm_integer_t *integer, pm_buffer_t *buffer) {
+ pm_buffer_append_byte(buffer, integer->negative ? 1 : 0);
+ if (integer->values == NULL) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(1));
+ pm_buffer_append_varuint(buffer, integer->value);
+ } else {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(integer->length));
+ for (size_t i = 0; i < integer->length; i++) {
+ pm_buffer_append_varuint(buffer, integer->values[i]);
+ }
}
}
@@ -53,6 +74,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
size_t offset = buffer->length;
+ <%- if Prism::Template::INCLUDE_NODE_ID -%>
+ pm_buffer_append_varuint(buffer, node->node_id);
+ <%- end -%>
pm_serialize_location(parser, &node->location, buffer);
switch (PM_NODE_TYPE(node)) {
@@ -68,37 +92,40 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
size_t length_offset = buffer->length;
pm_buffer_append_string(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
<%- end -%>
+ <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS && !node.flags -%>
+ pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
+ <%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
- <%- when Prism::NodeField -%>
+ <%- when Prism::Template::NodeField -%>
pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
- <%- when Prism::OptionalNodeField -%>
+ <%- when Prism::Template::OptionalNodeField -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
pm_buffer_append_byte(buffer, 0);
} else {
pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
- <%- when Prism::StringField -%>
+ <%- when Prism::Template::StringField -%>
pm_serialize_string(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
- <%- when Prism::NodeListField -%>
+ <%- when Prism::Template::NodeListField -%>
uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
pm_buffer_append_varuint(buffer, <%= field.name %>_size);
for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
pm_serialize_node(parser, (pm_node_t *) ((pm_<%= node.human %>_t *)node)-><%= field.name %>.nodes[index], buffer);
}
- <%- when Prism::ConstantField, Prism::OptionalConstantField -%>
+ <%- when Prism::Template::ConstantField, Prism::Template::OptionalConstantField -%>
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>));
- <%- when Prism::ConstantListField -%>
+ <%- when Prism::Template::ConstantListField -%>
uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
pm_buffer_append_varuint(buffer, <%= field.name %>_size);
for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
}
- <%- when Prism::LocationField -%>
+ <%- when Prism::Template::LocationField -%>
<%- if field.should_be_serialized? -%>
pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- end -%>
- <%- when Prism::OptionalLocationField -%>
+ <%- when Prism::Template::OptionalLocationField -%>
<%- if field.should_be_serialized? -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
pm_buffer_append_byte(buffer, 0);
@@ -107,12 +134,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- end -%>
- <%- when Prism::UInt8Field -%>
+ <%- when Prism::Template::UInt8Field -%>
pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
- <%- when Prism::UInt32Field -%>
+ <%- when Prism::Template::UInt32Field -%>
pm_buffer_append_varuint(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
- <%- when Prism::FlagsField -%>
- pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
+ <%- when Prism::Template::IntegerField -%>
+ pm_serialize_integer(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_double(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
@@ -129,6 +158,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
}
static void
+pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
+ uint32_t size = pm_sizet_to_u32(list->size);
+ pm_buffer_append_varuint(buffer, size);
+
+ for (uint32_t i = 0; i < size; i++) {
+ uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
+ pm_buffer_append_varuint(buffer, offset);
+ }
+}
+
+static void
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
// serialize type
pm_buffer_append_byte(buffer, (uint8_t) comment->type);
@@ -183,6 +223,9 @@ pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
static void
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
+ // serialize the type
+ pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id);
+
// serialize message
size_t message_length = strlen(diagnostic->message);
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(message_length));
@@ -190,6 +233,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
// serialize location
pm_serialize_location(parser, &diagnostic->location, buffer);
+
+ pm_buffer_append_byte(buffer, diagnostic->level);
}
static void
@@ -212,21 +257,27 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
pm_buffer_append_string(buffer, encoding->name, encoding_length);
}
-#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
-/**
- * Serialize the encoding, metadata, nodes, and constant pool.
- */
-void
-pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+static void
+pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
pm_serialize_encoding(parser->encoding, buffer);
pm_buffer_append_varsint(buffer, parser->start_line);
-<%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
+ pm_serialize_newline_list(&parser->newline_list, buffer);
+<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
<%- end -%>
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
pm_serialize_data_loc(parser, buffer);
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
+}
+
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+/**
+ * Serialize the metadata, nodes, and constant pool.
+ */
+void
+pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+ pm_serialize_metadata(parser, buffer);
// Here we're going to leave space for the offset of the constant pool in
// the buffer.
@@ -264,7 +315,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
// buffer offset. We will add a leading 1 to indicate that this
// is a buffer offset.
uint32_t content_offset = pm_sizet_to_u32(buffer->length);
- uint32_t owned_mask = (uint32_t) (1 << 31);
+ uint32_t owned_mask = 1U << 31;
assert(content_offset < owned_mask);
content_offset |= owned_mask;
@@ -317,13 +368,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
// Append 0 to mark end of tokens.
pm_buffer_append_byte(buffer, 0);
- pm_serialize_encoding(parser.encoding, buffer);
- pm_buffer_append_varsint(buffer, parser.start_line);
- pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
- pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
- pm_serialize_data_loc(&parser, buffer);
- pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
- pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
+ pm_serialize_metadata(&parser, buffer);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@@ -358,23 +403,4 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size,
pm_options_free(&options);
}
-/**
- * Parse the source and return true if it parses without errors or warnings.
- */
-PRISM_EXPORTED_FUNCTION bool
-pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
- pm_options_t options = { 0 };
- pm_options_read(&options, data);
-
- pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
-
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
-
- bool result = parser.error_list.size == 0 && parser.warning_list.size == 0;
- pm_parser_free(&parser);
- pm_options_free(&options);
-
- return result;
-}
+#endif
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
index d3c1c3f1b8..f196393ee1 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/token_type.c.erb
@@ -6,15 +6,364 @@
* Returns a string representation of the given token type.
*/
PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_to_str(pm_token_type_t token_type)
-{
+pm_token_type_name(pm_token_type_t token_type) {
switch (token_type) {
<%- tokens.each do |token| -%>
case PM_TOKEN_<%= token.name %>:
return "<%= token.name %>";
<%- end -%>
case PM_TOKEN_MAXIMUM:
- return "MAXIMUM";
+ assert(false && "unreachable");
+ return "";
}
- return "\0";
+
+ // Provide a default, because some compilers can't determine that the above
+ // switch is exhaustive.
+ assert(false && "unreachable");
+ return "";
+}
+
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_type_human(pm_token_type_t token_type) {
+ switch (token_type) {
+ case PM_TOKEN_EOF:
+ return "end-of-input";
+ case PM_TOKEN_MISSING:
+ return "missing token";
+ case PM_TOKEN_NOT_PROVIDED:
+ return "not provided token";
+ case PM_TOKEN_AMPERSAND:
+ return "'&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
+ return "'&&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+ return "'&&='";
+ case PM_TOKEN_AMPERSAND_DOT:
+ return "'&.'";
+ case PM_TOKEN_AMPERSAND_EQUAL:
+ return "'&='";
+ case PM_TOKEN_BACKTICK:
+ return "'`'";
+ case PM_TOKEN_BACK_REFERENCE:
+ return "back reference";
+ case PM_TOKEN_BANG:
+ return "'!'";
+ case PM_TOKEN_BANG_EQUAL:
+ return "'!='";
+ case PM_TOKEN_BANG_TILDE:
+ return "'!~'";
+ case PM_TOKEN_BRACE_LEFT:
+ return "'{'";
+ case PM_TOKEN_BRACE_RIGHT:
+ return "'}'";
+ case PM_TOKEN_BRACKET_LEFT:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
+ return "'[]'";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+ return "'[]='";
+ case PM_TOKEN_BRACKET_RIGHT:
+ return "']'";
+ case PM_TOKEN_CARET:
+ return "'^'";
+ case PM_TOKEN_CARET_EQUAL:
+ return "'^='";
+ case PM_TOKEN_CHARACTER_LITERAL:
+ return "character literal";
+ case PM_TOKEN_CLASS_VARIABLE:
+ return "class variable";
+ case PM_TOKEN_COLON:
+ return "':'";
+ case PM_TOKEN_COLON_COLON:
+ return "'::'";
+ case PM_TOKEN_COMMA:
+ return "','";
+ case PM_TOKEN_COMMENT:
+ return "comment";
+ case PM_TOKEN_CONSTANT:
+ return "constant";
+ case PM_TOKEN_DOT:
+ return "'.'";
+ case PM_TOKEN_DOT_DOT:
+ return "..";
+ case PM_TOKEN_DOT_DOT_DOT:
+ return "...";
+ case PM_TOKEN_EMBDOC_BEGIN:
+ return "'=begin'";
+ case PM_TOKEN_EMBDOC_END:
+ return "'=end'";
+ case PM_TOKEN_EMBDOC_LINE:
+ return "embedded documentation line";
+ case PM_TOKEN_EMBEXPR_BEGIN:
+ return "'#{'";
+ case PM_TOKEN_EMBEXPR_END:
+ return "'}'";
+ case PM_TOKEN_EMBVAR:
+ return "'#'";
+ case PM_TOKEN_EQUAL:
+ return "'='";
+ case PM_TOKEN_EQUAL_EQUAL:
+ return "'=='";
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+ return "'==='";
+ case PM_TOKEN_EQUAL_GREATER:
+ return "'=>'";
+ case PM_TOKEN_EQUAL_TILDE:
+ return "'=~'";
+ case PM_TOKEN_FLOAT:
+ return "float";
+ case PM_TOKEN_FLOAT_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_FLOAT_RATIONAL:
+ return "rational";
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ return "global variable";
+ case PM_TOKEN_GREATER:
+ return "'>'";
+ case PM_TOKEN_GREATER_EQUAL:
+ return "'>='";
+ case PM_TOKEN_GREATER_GREATER:
+ return ">>";
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
+ return ">>=";
+ case PM_TOKEN_HEREDOC_END:
+ return "heredoc ending";
+ case PM_TOKEN_HEREDOC_START:
+ return "heredoc beginning";
+ case PM_TOKEN_IDENTIFIER:
+ return "local variable or method";
+ case PM_TOKEN_IGNORED_NEWLINE:
+ return "ignored newline";
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ return "instance variable";
+ case PM_TOKEN_INTEGER:
+ return "integer";
+ case PM_TOKEN_INTEGER_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_INTEGER_RATIONAL:
+ return "rational";
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_KEYWORD_ALIAS:
+ return "'alias'";
+ case PM_TOKEN_KEYWORD_AND:
+ return "'and'";
+ case PM_TOKEN_KEYWORD_BEGIN:
+ return "'begin'";
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+ return "'BEGIN'";
+ case PM_TOKEN_KEYWORD_BREAK:
+ return "'break'";
+ case PM_TOKEN_KEYWORD_CASE:
+ return "'case'";
+ case PM_TOKEN_KEYWORD_CLASS:
+ return "'class'";
+ case PM_TOKEN_KEYWORD_DEF:
+ return "'def'";
+ case PM_TOKEN_KEYWORD_DEFINED:
+ return "'defined?'";
+ case PM_TOKEN_KEYWORD_DO:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_DO_LOOP:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_ELSE:
+ return "'else'";
+ case PM_TOKEN_KEYWORD_ELSIF:
+ return "'elsif'";
+ case PM_TOKEN_KEYWORD_END:
+ return "'end'";
+ case PM_TOKEN_KEYWORD_END_UPCASE:
+ return "'END'";
+ case PM_TOKEN_KEYWORD_ENSURE:
+ return "'ensure'";
+ case PM_TOKEN_KEYWORD_FALSE:
+ return "'false'";
+ case PM_TOKEN_KEYWORD_FOR:
+ return "'for'";
+ case PM_TOKEN_KEYWORD_IF:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IN:
+ return "'in'";
+ case PM_TOKEN_KEYWORD_MODULE:
+ return "'module'";
+ case PM_TOKEN_KEYWORD_NEXT:
+ return "'next'";
+ case PM_TOKEN_KEYWORD_NIL:
+ return "'nil'";
+ case PM_TOKEN_KEYWORD_NOT:
+ return "'not'";
+ case PM_TOKEN_KEYWORD_OR:
+ return "'or'";
+ case PM_TOKEN_KEYWORD_REDO:
+ return "'redo'";
+ case PM_TOKEN_KEYWORD_RESCUE:
+ return "'rescue'";
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+ return "'rescue' modifier";
+ case PM_TOKEN_KEYWORD_RETRY:
+ return "'retry'";
+ case PM_TOKEN_KEYWORD_RETURN:
+ return "'return'";
+ case PM_TOKEN_KEYWORD_SELF:
+ return "'self'";
+ case PM_TOKEN_KEYWORD_SUPER:
+ return "'super'";
+ case PM_TOKEN_KEYWORD_THEN:
+ return "'then'";
+ case PM_TOKEN_KEYWORD_TRUE:
+ return "'true'";
+ case PM_TOKEN_KEYWORD_UNDEF:
+ return "'undef'";
+ case PM_TOKEN_KEYWORD_UNLESS:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNTIL:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_WHEN:
+ return "'when'";
+ case PM_TOKEN_KEYWORD_WHILE:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_YIELD:
+ return "'yield'";
+ case PM_TOKEN_KEYWORD___ENCODING__:
+ return "'__ENCODING__'";
+ case PM_TOKEN_KEYWORD___FILE__:
+ return "'__FILE__'";
+ case PM_TOKEN_KEYWORD___LINE__:
+ return "'__LINE__'";
+ case PM_TOKEN_LABEL:
+ return "label";
+ case PM_TOKEN_LABEL_END:
+ return "label terminator";
+ case PM_TOKEN_LAMBDA_BEGIN:
+ return "'{'";
+ case PM_TOKEN_LESS:
+ return "'<'";
+ case PM_TOKEN_LESS_EQUAL:
+ return "'<='";
+ case PM_TOKEN_LESS_EQUAL_GREATER:
+ return "'<=>'";
+ case PM_TOKEN_LESS_LESS:
+ return "<<";
+ case PM_TOKEN_LESS_LESS_EQUAL:
+ return "<<=";
+ case PM_TOKEN_METHOD_NAME:
+ return "method name";
+ case PM_TOKEN_MINUS:
+ return "'-'";
+ case PM_TOKEN_MINUS_EQUAL:
+ return "'-='";
+ case PM_TOKEN_MINUS_GREATER:
+ return "'->'";
+ case PM_TOKEN_NEWLINE:
+ return "newline";
+ case PM_TOKEN_NUMBERED_REFERENCE:
+ return "numbered reference";
+ case PM_TOKEN_PARENTHESIS_LEFT:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_RIGHT:
+ return "')'";
+ case PM_TOKEN_PERCENT:
+ return "'%'";
+ case PM_TOKEN_PERCENT_EQUAL:
+ return "'%='";
+ case PM_TOKEN_PERCENT_LOWER_I:
+ return "'%i'";
+ case PM_TOKEN_PERCENT_LOWER_W:
+ return "'%w'";
+ case PM_TOKEN_PERCENT_LOWER_X:
+ return "'%x'";
+ case PM_TOKEN_PERCENT_UPPER_I:
+ return "'%I'";
+ case PM_TOKEN_PERCENT_UPPER_W:
+ return "'%W'";
+ case PM_TOKEN_PIPE:
+ return "'|'";
+ case PM_TOKEN_PIPE_EQUAL:
+ return "'|='";
+ case PM_TOKEN_PIPE_PIPE:
+ return "'||'";
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
+ return "'||='";
+ case PM_TOKEN_PLUS:
+ return "'+'";
+ case PM_TOKEN_PLUS_EQUAL:
+ return "'+='";
+ case PM_TOKEN_QUESTION_MARK:
+ return "'?'";
+ case PM_TOKEN_REGEXP_BEGIN:
+ return "regular expression beginning";
+ case PM_TOKEN_REGEXP_END:
+ return "regular expression ending";
+ case PM_TOKEN_SEMICOLON:
+ return "';'";
+ case PM_TOKEN_SLASH:
+ return "'/'";
+ case PM_TOKEN_SLASH_EQUAL:
+ return "'/='";
+ case PM_TOKEN_STAR:
+ return "'*'";
+ case PM_TOKEN_STAR_EQUAL:
+ return "'*='";
+ case PM_TOKEN_STAR_STAR:
+ return "'**'";
+ case PM_TOKEN_STAR_STAR_EQUAL:
+ return "'**='";
+ case PM_TOKEN_STRING_BEGIN:
+ return "string literal";
+ case PM_TOKEN_STRING_CONTENT:
+ return "string content";
+ case PM_TOKEN_STRING_END:
+ return "string ending";
+ case PM_TOKEN_SYMBOL_BEGIN:
+ return "symbol literal";
+ case PM_TOKEN_TILDE:
+ return "'~'";
+ case PM_TOKEN_UAMPERSAND:
+ return "'&'";
+ case PM_TOKEN_UCOLON_COLON:
+ return "'::'";
+ case PM_TOKEN_UDOT_DOT:
+ return "'..'";
+ case PM_TOKEN_UDOT_DOT_DOT:
+ return "'...'";
+ case PM_TOKEN_UMINUS:
+ return "'-'";
+ case PM_TOKEN_UMINUS_NUM:
+ return "'-'";
+ case PM_TOKEN_UPLUS:
+ return "'+'";
+ case PM_TOKEN_USTAR:
+ return "*";
+ case PM_TOKEN_USTAR_STAR:
+ return "**";
+ case PM_TOKEN_WORDS_SEP:
+ return "string separator";
+ case PM_TOKEN___END__:
+ return "'__END__'";
+ case PM_TOKEN_MAXIMUM:
+ assert(false && "unreachable");
+ return "";
+ }
+
+ // Provide a default, because some compilers can't determine that the above
+ // switch is exhaustive.
+ assert(false && "unreachable");
+ return "";
}
diff --git a/prism/templates/template.rb b/prism/templates/template.rb
index 4576191701..6c3efd7e6c 100755
--- a/prism/templates/template.rb
+++ b/prism/templates/template.rb
@@ -1,487 +1,689 @@
#!/usr/bin/env ruby
+# typed: ignore
require "erb"
require "fileutils"
require "yaml"
module Prism
- SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
+ module Template
+ SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
+ REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS
+ CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
- JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
- JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
+ JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
+ JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
+ INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
- # This represents a field on a node. It contains all of the necessary
- # information to template out the code for that field.
- class Field
- attr_reader :name, :comment, :options
+ COMMON_FLAGS_COUNT = 2
- def initialize(name:, comment: nil, **options)
- @name = name
- @comment = comment
- @options = options
- end
-
- def each_comment_line
- comment.each_line { |line| yield line.prepend(" ").rstrip } if comment
- end
+ class Error
+ attr_reader :name
- def semantic_field?
- true
+ def initialize(name)
+ @name = name
+ end
end
- def should_be_serialized?
- SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
- end
- end
+ class Warning
+ attr_reader :name
- # Some node fields can be specialized if they point to a specific kind of
- # node and not just a generic node.
- class NodeKindField < Field
- def c_type
- if options[:kind]
- "pm_#{options[:kind].gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
- else
- "pm_node"
+ def initialize(name)
+ @name = name
end
end
- def ruby_type
- options[:kind] || "Node"
- end
+ # This module contains methods for escaping characters in JavaDoc comments.
+ module JavaDoc
+ ESCAPES = {
+ "'" => "&#39;",
+ "\"" => "&quot;",
+ "@" => "&#64;",
+ "&" => "&amp;",
+ "<" => "&lt;",
+ ">" => "&gt;"
+ }.freeze
- def java_type
- options[:kind] || "Node"
+ def self.escape(value)
+ value.gsub(/['&"<>@]/, ESCAPES)
+ end
end
- def java_cast
- if options[:kind]
- "(Nodes.#{options[:kind]}) "
- else
- ""
+ # A comment attached to a field or node.
+ class ConfigComment
+ attr_reader :value
+
+ def initialize(value)
+ @value = value
end
- end
- end
- # This represents a field on a node that is itself a node. We pass them as
- # references and store them as references.
- class NodeField < NodeKindField
- def rbs_class
- ruby_type
- end
+ def each_line(&block)
+ value.each_line { |line| yield line.prepend(" ").rstrip }
+ end
- def rbi_class
- "Prism::#{ruby_type}"
+ def each_java_line(&block)
+ ConfigComment.new(JavaDoc.escape(value)).each_line(&block)
+ end
end
- end
- # This represents a field on a node that is itself a node and can be
- # optionally null. We pass them as references and store them as references.
- class OptionalNodeField < NodeKindField
- def rbs_class
- "#{ruby_type}?"
- end
+ # This represents a field on a node. It contains all of the necessary
+ # information to template out the code for that field.
+ class Field
+ attr_reader :name, :comment, :options
- def rbi_class
- "T.nilable(Prism::#{ruby_type})"
- end
- end
+ def initialize(name:, comment: nil, **options)
+ @name = name
+ @comment = comment
+ @options = options
+ end
- # This represents a field on a node that is a list of nodes. We pass them as
- # references and store them directly on the struct.
- class NodeListField < Field
- def rbs_class
- "Array[Node]"
- end
+ def each_comment_line(&block)
+ ConfigComment.new(comment).each_line(&block) if comment
+ end
- def rbi_class
- "T::Array[Prism::Node]"
- end
+ def each_comment_java_line(&block)
+ ConfigComment.new(comment).each_java_line(&block) if comment
+ end
- def java_type
- "Node[]"
- end
- end
+ def semantic_field?
+ true
+ end
- # This represents a field on a node that is the ID of a string interned
- # through the parser's constant pool.
- class ConstantField < Field
- def rbs_class
- "Symbol"
+ def should_be_serialized?
+ SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
+ end
end
- def rbi_class
- "Symbol"
- end
+ # Some node fields can be specialized if they point to a specific kind of
+ # node and not just a generic node.
+ class NodeKindField < Field
+ def initialize(kind:, **options)
+ @kind = kind
+ super(**options)
+ end
- def java_type
- JAVA_STRING_TYPE
- end
- end
+ def c_type
+ if specific_kind
+ "pm_#{specific_kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
+ else
+ "pm_node"
+ end
+ end
- # This represents a field on a node that is the ID of a string interned
- # through the parser's constant pool and can be optionally null.
- class OptionalConstantField < Field
- def rbs_class
- "Symbol?"
- end
+ def ruby_type
+ specific_kind || "Node"
+ end
- def rbi_class
- "T.nilable(Symbol)"
- end
+ def java_type
+ specific_kind || "Node"
+ end
- def java_type
- JAVA_STRING_TYPE
- end
- end
+ def java_cast
+ if specific_kind
+ "(Nodes.#{@kind}) "
+ else
+ ""
+ end
+ end
- # This represents a field on a node that is a list of IDs that are associated
- # with strings interned through the parser's constant pool.
- class ConstantListField < Field
- def rbs_class
- "Array[Symbol]"
- end
+ def specific_kind
+ @kind unless @kind.is_a?(Array)
+ end
- def rbi_class
- "T::Array[Symbol]"
+ def union_kind
+ @kind if @kind.is_a?(Array)
+ end
end
- def java_type
- "#{JAVA_STRING_TYPE}[]"
- end
- end
+ # This represents a field on a node that is itself a node. We pass them as
+ # references and store them as references.
+ class NodeField < NodeKindField
+ def rbs_class
+ if specific_kind
+ specific_kind
+ elsif union_kind
+ union_kind.join(" | ")
+ else
+ "Prism::node"
+ end
+ end
- # This represents a field on a node that is a string.
- class StringField < Field
- def rbs_class
- "String"
- end
+ def rbi_class
+ if specific_kind
+ "Prism::#{specific_kind}"
+ elsif union_kind
+ "T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})"
+ else
+ "Prism::Node"
+ end
+ end
- def rbi_class
- "String"
+ def check_field_kind
+ if union_kind
+ "[#{union_kind.join(', ')}].include?(#{name}.class)"
+ else
+ "#{name}.is_a?(#{ruby_type})"
+ end
+ end
end
- def java_type
- "byte[]"
- end
- end
+ # This represents a field on a node that is itself a node and can be
+ # optionally null. We pass them as references and store them as references.
+ class OptionalNodeField < NodeKindField
+ def rbs_class
+ if specific_kind
+ "#{specific_kind}?"
+ elsif union_kind
+ [*union_kind, "nil"].join(" | ")
+ else
+ "Prism::node?"
+ end
+ end
- # This represents a field on a node that is a location.
- class LocationField < Field
- def semantic_field?
- false
- end
+ def rbi_class
+ if specific_kind
+ "T.nilable(Prism::#{specific_kind})"
+ elsif union_kind
+ "T.nilable(T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")}))"
+ else
+ "T.nilable(Prism::Node)"
+ end
+ end
- def rbs_class
- "Location"
+ def check_field_kind
+ if union_kind
+ "[#{union_kind.join(', ')}, NilClass].include?(#{name}.class)"
+ else
+ "#{name}.nil? || #{name}.is_a?(#{ruby_type})"
+ end
+ end
end
- def rbi_class
- "Prism::Location"
- end
+ # This represents a field on a node that is a list of nodes. We pass them as
+ # references and store them directly on the struct.
+ class NodeListField < NodeKindField
+ def rbs_class
+ if specific_kind
+ "Array[#{specific_kind}]"
+ elsif union_kind
+ "Array[#{union_kind.join(" | ")}]"
+ else
+ "Array[Prism::node]"
+ end
+ end
- def java_type
- "Location"
- end
- end
+ def rbi_class
+ if specific_kind
+ "T::Array[Prism::#{specific_kind}]"
+ elsif union_kind
+ "T::Array[T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})]"
+ else
+ "T::Array[Prism::Node]"
+ end
+ end
- # This represents a field on a node that is a location that is optional.
- class OptionalLocationField < Field
- def semantic_field?
- false
- end
+ def java_type
+ "#{super}[]"
+ end
- def rbs_class
- "Location?"
+ def check_field_kind
+ if union_kind
+ "#{name}.all? { |n| [#{union_kind.join(', ')}].include?(n.class) }"
+ else
+ "#{name}.all? { |n| n.is_a?(#{ruby_type}) }"
+ end
+ end
end
- def rbi_class
- "T.nilable(Prism::Location)"
- end
+ # This represents a field on a node that is the ID of a string interned
+ # through the parser's constant pool.
+ class ConstantField < Field
+ def rbs_class
+ "Symbol"
+ end
- def java_type
- "Location"
- end
- end
+ def rbi_class
+ "Symbol"
+ end
- # This represents an integer field.
- class UInt8Field < Field
- def rbs_class
- "Integer"
+ def java_type
+ JAVA_STRING_TYPE
+ end
end
- def rbi_class
- "Integer"
- end
+ # This represents a field on a node that is the ID of a string interned
+ # through the parser's constant pool and can be optionally null.
+ class OptionalConstantField < Field
+ def rbs_class
+ "Symbol?"
+ end
- def java_type
- "int"
- end
- end
+ def rbi_class
+ "T.nilable(Symbol)"
+ end
- # This represents an integer field.
- class UInt32Field < Field
- def rbs_class
- "Integer"
+ def java_type
+ JAVA_STRING_TYPE
+ end
end
- def rbi_class
- "Integer"
- end
+ # This represents a field on a node that is a list of IDs that are associated
+ # with strings interned through the parser's constant pool.
+ class ConstantListField < Field
+ def rbs_class
+ "Array[Symbol]"
+ end
- def java_type
- "int"
- end
- end
+ def rbi_class
+ "T::Array[Symbol]"
+ end
- # This represents a set of flags. It is very similar to the UInt32Field, but
- # can be directly embedded into the flags field on the struct and provides
- # convenient methods for checking if a flag is set.
- class FlagsField < Field
- def rbs_class
- "Integer"
+ def java_type
+ "#{JAVA_STRING_TYPE}[]"
+ end
end
- def rbi_class
- "Integer"
- end
+ # This represents a field on a node that is a string.
+ class StringField < Field
+ def rbs_class
+ "String"
+ end
- def java_type
- "short"
- end
+ def rbi_class
+ "String"
+ end
- def kind
- options.fetch(:kind)
+ def java_type
+ "byte[]"
+ end
end
- end
- # This class represents a node in the tree, configured by the config.yml file
- # in YAML format. It contains information about the name of the node and the
- # various child nodes it contains.
- class NodeType
- attr_reader :name, :type, :human, :fields, :newline, :comment
+ # This represents a field on a node that is a location.
+ class LocationField < Field
+ def semantic_field?
+ false
+ end
- def initialize(config)
- @name = config.fetch("name")
+ def rbs_class
+ "Location"
+ end
- type = @name.gsub(/(?<=.)[A-Z]/, "_\\0")
- @type = "PM_#{type.upcase}"
- @human = type.downcase
+ def rbi_class
+ "Prism::Location"
+ end
- @fields =
- config.fetch("fields", []).map do |field|
- type = field_type_for(field.fetch("type"))
+ def java_type
+ "Location"
+ end
+ end
- options = field.transform_keys(&:to_sym)
- options.delete(:type)
+ # This represents a field on a node that is a location that is optional.
+ class OptionalLocationField < Field
+ def semantic_field?
+ false
+ end
- # If/when we have documentation on every field, this should be changed
- # to use fetch instead of delete.
- comment = options.delete(:comment)
+ def rbs_class
+ "Location?"
+ end
- type.new(comment: comment, **options)
- end
+ def rbi_class
+ "T.nilable(Prism::Location)"
+ end
- @newline = config.fetch("newline", true)
- @comment = config.fetch("comment")
+ def java_type
+ "Location"
+ end
end
- def each_comment_line
- comment.each_line { |line| yield line.prepend(" ").rstrip }
- end
+ # This represents an integer field.
+ class UInt8Field < Field
+ def rbs_class
+ "Integer"
+ end
- def semantic_fields
- @semantic_fields ||= @fields.select(&:semantic_field?)
+ def rbi_class
+ "Integer"
+ end
+
+ def java_type
+ "int"
+ end
end
- # Should emit serialized length of node so implementations can skip
- # the node to enable lazy parsing.
- def needs_serialized_length?
- name == "DefNode"
+ # This represents an integer field.
+ class UInt32Field < Field
+ def rbs_class
+ "Integer"
+ end
+
+ def rbi_class
+ "Integer"
+ end
+
+ def java_type
+ "int"
+ end
end
- private
-
- def field_type_for(name)
- case name
- when "node" then NodeField
- when "node?" then OptionalNodeField
- when "node[]" then NodeListField
- when "string" then StringField
- when "constant" then ConstantField
- when "constant?" then OptionalConstantField
- when "constant[]" then ConstantListField
- when "location" then LocationField
- when "location?" then OptionalLocationField
- when "uint8" then UInt8Field
- when "uint32" then UInt32Field
- when "flags" then FlagsField
- else raise("Unknown field type: #{name.inspect}")
+ # This represents an arbitrarily-sized integer. When it gets to Ruby it will
+ # be an Integer.
+ class IntegerField < Field
+ def rbs_class
+ "Integer"
+ end
+
+ def rbi_class
+ "Integer"
+ end
+
+ def java_type
+ "Object"
end
end
- end
- # This represents a token in the lexer.
- class Token
- attr_reader :name, :value, :comment
+ # This represents a double-precision floating point number. When it gets to
+ # Ruby it will be a Float.
+ class DoubleField < Field
+ def rbs_class
+ "Float"
+ end
+
+ def rbi_class
+ "Float"
+ end
- def initialize(config)
- @name = config.fetch("name")
- @value = config["value"]
- @comment = config.fetch("comment")
+ def java_type
+ "double"
+ end
end
- end
- # Represents a set of flags that should be internally represented with an enum.
- class Flags
- # Represents an individual flag within a set of flags.
- class Flag
- attr_reader :name, :camelcase, :comment
+ # This class represents a node in the tree, configured by the config.yml file
+ # in YAML format. It contains information about the name of the node and the
+ # various child nodes it contains.
+ class NodeType
+ attr_reader :name, :type, :human, :flags, :fields, :newline, :comment
- def initialize(config)
+ def initialize(config, flags)
@name = config.fetch("name")
- @camelcase = @name.split("_").map(&:capitalize).join
+
+ type = @name.gsub(/(?<=.)[A-Z]/, "_\\0")
+ @type = "PM_#{type.upcase}"
+ @human = type.downcase
+
+ @fields =
+ config.fetch("fields", []).map do |field|
+ type = field_type_for(field.fetch("type"))
+
+ options = field.transform_keys(&:to_sym)
+ options.delete(:type)
+
+ # If/when we have documentation on every field, this should be
+ # changed to use fetch instead of delete.
+ comment = options.delete(:comment)
+
+ if kinds = options[:kind]
+ kinds = [kinds] unless kinds.is_a?(Array)
+ kinds = kinds.map do |kind|
+ case kind
+ when "non-void expression"
+ # the actual list of types would be way too long
+ "Node"
+ when "pattern expression"
+ # the list of all possible types is too long with 37+ different classes
+ "Node"
+ when Hash
+ kind = kind.fetch("on error")
+ REMOVE_ON_ERROR_TYPES ? nil : kind
+ else
+ kind
+ end
+ end.compact
+ if kinds.size == 1
+ kinds = kinds.first
+ kinds = nil if kinds == "Node"
+ end
+ options[:kind] = kinds
+ else
+ if type < NodeKindField
+ raise "Missing kind in config.yml for field #{@name}##{options.fetch(:name)}"
+ end
+ end
+
+ type.new(comment: comment, **options)
+ end
+
+ @flags = config.key?("flags") ? flags.fetch(config.fetch("flags")) : nil
+ @newline = config.fetch("newline", true)
@comment = config.fetch("comment")
end
+
+ def each_comment_line(&block)
+ ConfigComment.new(comment).each_line(&block)
+ end
+
+ def each_comment_java_line(&block)
+ ConfigComment.new(comment).each_java_line(&block)
+ end
+
+ def semantic_fields
+ @semantic_fields ||= @fields.select(&:semantic_field?)
+ end
+
+ # Should emit serialized length of node so implementations can skip
+ # the node to enable lazy parsing.
+ def needs_serialized_length?
+ name == "DefNode"
+ end
+
+ private
+
+ def field_type_for(name)
+ case name
+ when "node" then NodeField
+ when "node?" then OptionalNodeField
+ when "node[]" then NodeListField
+ when "string" then StringField
+ when "constant" then ConstantField
+ when "constant?" then OptionalConstantField
+ when "constant[]" then ConstantListField
+ when "location" then LocationField
+ when "location?" then OptionalLocationField
+ when "uint8" then UInt8Field
+ when "uint32" then UInt32Field
+ when "integer" then IntegerField
+ when "double" then DoubleField
+ else raise("Unknown field type: #{name.inspect}")
+ end
+ end
end
- attr_reader :name, :human, :values, :comment
+ # This represents a token in the lexer.
+ class Token
+ attr_reader :name, :value, :comment
- def initialize(config)
- @name = config.fetch("name")
- @human = @name.gsub(/(?<=.)[A-Z]/, "_\\0").downcase
- @values = config.fetch("values").map { |flag| Flag.new(flag) }
- @comment = config.fetch("comment")
+ def initialize(config)
+ @name = config.fetch("name")
+ @value = config["value"]
+ @comment = config.fetch("comment")
+ end
end
- end
- class << self
- # This templates out a file using ERB with the given locals. The locals are
- # derived from the config.yml file.
- def template(name, write_to: nil)
- filepath = "templates/#{name}.erb"
- template = File.expand_path("../#{filepath}", __dir__)
-
- erb = read_template(template)
- extension = File.extname(filepath.gsub(".erb", ""))
-
- heading = case extension
- when ".rb"
- <<~HEADING
- # frozen_string_literal: true
- =begin
- This file is generated by the templates/template.rb script and should not be
- modified manually. See #{filepath}
- if you are looking to modify the template
- =end
-
- HEADING
- when ".rbs"
- <<~HEADING
- # This file is generated by the templates/template.rb script and should not be
- # modified manually. See #{filepath}
- # if you are looking to modify the template
-
- HEADING
- when ".rbi"
- <<~HEADING
- =begin
- This file is generated by the templates/template.rb script and should not be
- modified manually. See #{filepath}
- if you are looking to modify the template
- =end
- HEADING
- else
- <<~HEADING
- /******************************************************************************/
- /* This file is generated by the templates/template.rb script and should not */
- /* be modified manually. See */
- /* #{filepath + " " * (74 - filepath.size) } */
- /* if you are looking to modify the */
- /* template */
- /******************************************************************************/
- HEADING
- end
-
- write_to ||= File.expand_path("../#{name}", __dir__)
- contents = heading + erb.result_with_hash(locals)
-
- if (extension == ".c" || extension == ".h") && !contents.ascii_only?
- # Enforce that we only have ASCII characters here. This is necessary
- # for non-UTF-8 locales that only allow ASCII characters in C source
- # files.
- contents.each_line.with_index(1) do |line, line_number|
- raise "Non-ASCII character on line #{line_number} of #{write_to}" unless line.ascii_only?
+ # Represents a set of flags that should be internally represented with an enum.
+ class Flags
+ # Represents an individual flag within a set of flags.
+ class Flag
+ attr_reader :name, :camelcase, :comment
+
+ def initialize(config)
+ @name = config.fetch("name")
+ @camelcase = @name.split("_").map(&:capitalize).join
+ @comment = config.fetch("comment")
end
end
- FileUtils.mkdir_p(File.dirname(write_to))
- File.write(write_to, contents)
- end
+ attr_reader :name, :human, :values, :comment
- private
+ def initialize(config)
+ @name = config.fetch("name")
+ @human = @name.gsub(/(?<=.)[A-Z]/, "_\\0").downcase
+ @values = config.fetch("values").map { |flag| Flag.new(flag) }
+ @comment = config.fetch("comment")
+ end
- def read_template(filepath)
- template = File.read(filepath, encoding: Encoding::UTF_8)
- erb = erb(template)
- erb.filename = filepath
- erb
+ def self.empty
+ new("name" => "", "values" => [], "comment" => "")
+ end
end
- if ERB.instance_method(:initialize).parameters.assoc(:key) # Ruby 2.6+
+ class << self
+ # This templates out a file using ERB with the given locals. The locals are
+ # derived from the config.yml file.
+ def render(name, write_to: nil)
+ filepath = "templates/#{name}.erb"
+ template = File.expand_path("../#{filepath}", __dir__)
+
+ erb = read_template(template)
+ extension = File.extname(filepath.gsub(".erb", ""))
+
+ heading =
+ case extension
+ when ".rb"
+ <<~HEADING
+ # frozen_string_literal: true
+ # :markup: markdown
+
+ =begin
+ --
+ This file is generated by the templates/template.rb script and should not be
+ modified manually. See #{filepath}
+ if you are looking to modify the template
+ ++
+ =end
+
+ HEADING
+ when ".rbs"
+ <<~HEADING
+ # This file is generated by the templates/template.rb script and should not be
+ # modified manually. See #{filepath}
+ # if you are looking to modify the template
+
+ HEADING
+ when ".rbi"
+ <<~HEADING
+ # typed: strict
+
+ =begin
+ This file is generated by the templates/template.rb script and should not be
+ modified manually. See #{filepath}
+ if you are looking to modify the template
+ =end
+
+ HEADING
+ else
+ <<~HEADING
+ /* :markup: markdown */
+
+ /*----------------------------------------------------------------------------*/
+ /* This file is generated by the templates/template.rb script and should not */
+ /* be modified manually. See */
+ /* #{filepath.ljust(74)} */
+ /* if you are looking to modify the */
+ /* template */
+ /*----------------------------------------------------------------------------*/
+
+ HEADING
+ end
+
+ write_to ||= File.expand_path("../#{name}", __dir__)
+ contents = heading + erb.result_with_hash(locals)
+
+ if (extension == ".c" || extension == ".h") && !contents.ascii_only?
+ # Enforce that we only have ASCII characters here. This is necessary
+ # for non-UTF-8 locales that only allow ASCII characters in C source
+ # files.
+ contents.each_line.with_index(1) do |line, line_number|
+ raise "Non-ASCII character on line #{line_number} of #{write_to}" unless line.ascii_only?
+ end
+ end
+
+ FileUtils.mkdir_p(File.dirname(write_to))
+ File.write(write_to, contents)
+ end
+
+ private
+
+ def read_template(filepath)
+ template = File.read(filepath, encoding: Encoding::UTF_8)
+ erb = erb(template)
+ erb.filename = filepath
+ erb
+ end
+
def erb(template)
ERB.new(template, trim_mode: "-")
end
- else
- def erb(template)
- ERB.new(template, nil, "-")
+
+ def locals
+ @locals ||=
+ begin
+ config = YAML.load_file(File.expand_path("../config.yml", __dir__))
+ flags = config.fetch("flags").to_h { |flags| [flags["name"], Flags.new(flags)] }
+
+ {
+ errors: config.fetch("errors").map { |name| Error.new(name) },
+ warnings: config.fetch("warnings").map { |name| Warning.new(name) },
+ nodes: config.fetch("nodes").map { |node| NodeType.new(node, flags) }.sort_by(&:name),
+ tokens: config.fetch("tokens").map { |token| Token.new(token) },
+ flags: flags.values
+ }
+ end
end
end
- def locals
- @locals ||=
- begin
- config = YAML.load_file(File.expand_path("../config.yml", __dir__))
-
- {
- nodes: config.fetch("nodes").map { |node| NodeType.new(node) }.sort_by(&:name),
- tokens: config.fetch("tokens").map { |token| Token.new(token) },
- flags: config.fetch("flags").map { |flags| Flags.new(flags) }
- }
- end
- end
+ TEMPLATES = [
+ "ext/prism/api_node.c",
+ "include/prism/ast.h",
+ "include/prism/diagnostic.h",
+ "javascript/src/deserialize.js",
+ "javascript/src/nodes.js",
+ "javascript/src/visitor.js",
+ "java/org/prism/Loader.java",
+ "java/org/prism/Nodes.java",
+ "java/org/prism/AbstractNodeVisitor.java",
+ "lib/prism/compiler.rb",
+ "lib/prism/dispatcher.rb",
+ "lib/prism/dot_visitor.rb",
+ "lib/prism/dsl.rb",
+ "lib/prism/inspect_visitor.rb",
+ "lib/prism/mutation_compiler.rb",
+ "lib/prism/node.rb",
+ "lib/prism/reflection.rb",
+ "lib/prism/serialize.rb",
+ "lib/prism/visitor.rb",
+ "src/diagnostic.c",
+ "src/node.c",
+ "src/prettyprint.c",
+ "src/serialize.c",
+ "src/token_type.c",
+ "rbi/prism/dsl.rbi",
+ "rbi/prism/node.rbi",
+ "rbi/prism/visitor.rbi",
+ "sig/prism.rbs",
+ "sig/prism/dsl.rbs",
+ "sig/prism/mutation_compiler.rbs",
+ "sig/prism/node.rbs",
+ "sig/prism/visitor.rbs",
+ "sig/prism/_private/dot_visitor.rbs"
+ ]
end
-
- TEMPLATES = [
- "ext/prism/api_node.c",
- "include/prism/ast.h",
- "javascript/src/deserialize.js",
- "javascript/src/nodes.js",
- "javascript/src/visitor.js",
- "java/org/prism/Loader.java",
- "java/org/prism/Nodes.java",
- "java/org/prism/AbstractNodeVisitor.java",
- "lib/prism/compiler.rb",
- "lib/prism/dispatcher.rb",
- "lib/prism/dot_visitor.rb",
- "lib/prism/dsl.rb",
- "lib/prism/mutation_compiler.rb",
- "lib/prism/node.rb",
- "lib/prism/serialize.rb",
- "lib/prism/visitor.rb",
- "src/node.c",
- "src/prettyprint.c",
- "src/serialize.c",
- "src/token_type.c",
- "rbi/prism.rbi",
- "sig/prism.rbs",
- ]
end
if __FILE__ == $0
if ARGV.empty?
- Prism::TEMPLATES.each { |filepath| Prism.template(filepath) }
+ Prism::Template::TEMPLATES.each { |filepath| Prism::Template.render(filepath) }
else # ruby/ruby
name, write_to = ARGV
- Prism.template(name, write_to: write_to)
+ Prism::Template.render(name, write_to: write_to)
end
end
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
index 0ae9445428..2136a7c43e 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/util/pm_buffer.c
@@ -16,7 +16,7 @@ pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
buffer->length = 0;
buffer->capacity = capacity;
- buffer->value = (char *) malloc(capacity);
+ buffer->value = (char *) xmalloc(capacity);
return buffer->value != NULL;
}
@@ -32,7 +32,7 @@ pm_buffer_init(pm_buffer_t *buffer) {
* Return the value of the buffer.
*/
char *
-pm_buffer_value(pm_buffer_t *buffer) {
+pm_buffer_value(const pm_buffer_t *buffer) {
return buffer->value;
}
@@ -40,14 +40,14 @@ pm_buffer_value(pm_buffer_t *buffer) {
* Return the length of the buffer.
*/
size_t
-pm_buffer_length(pm_buffer_t *buffer) {
+pm_buffer_length(const pm_buffer_t *buffer) {
return buffer->length;
}
/**
* Append the given amount of space to the buffer.
*/
-static inline void
+static inline bool
pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
size_t next_length = buffer->length + length;
@@ -60,10 +60,12 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
buffer->capacity *= 2;
}
- buffer->value = realloc(buffer->value, buffer->capacity);
+ buffer->value = xrealloc(buffer->value, buffer->capacity);
+ if (buffer->value == NULL) return false;
}
buffer->length = next_length;
+ return true;
}
/**
@@ -72,8 +74,9 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
static inline void
pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
size_t cursor = buffer->length;
- pm_buffer_append_length(buffer, length);
- memcpy(buffer->value + cursor, source, length);
+ if (pm_buffer_append_length(buffer, length)) {
+ memcpy(buffer->value + cursor, source, length);
+ }
}
/**
@@ -82,8 +85,9 @@ pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
void
pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
size_t cursor = buffer->length;
- pm_buffer_append_length(buffer, length);
- memset(buffer->value + cursor, 0, length);
+ if (pm_buffer_append_length(buffer, length)) {
+ memset(buffer->value + cursor, 0, length);
+ }
}
/**
@@ -100,13 +104,12 @@ pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) {
size_t length = (size_t) (result + 1);
size_t cursor = buffer->length;
- pm_buffer_append_length(buffer, length);
-
- va_start(arguments, format);
- vsnprintf(buffer->value + cursor, length, format, arguments);
- va_end(arguments);
-
- buffer->length--;
+ if (pm_buffer_append_length(buffer, length)) {
+ va_start(arguments, format);
+ vsnprintf(buffer->value + cursor, length, format, arguments);
+ va_end(arguments);
+ buffer->length--;
+ }
}
/**
@@ -161,14 +164,134 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
}
/**
+ * Append a double to the buffer.
+ */
+void
+pm_buffer_append_double(pm_buffer_t *buffer, double value) {
+ const void *source = &value;
+ pm_buffer_append(buffer, source, sizeof(double));
+}
+
+/**
+ * Append a unicode codepoint to the buffer.
+ */
+bool
+pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) {
+ if (value <= 0x7F) {
+ pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx
+ return true;
+ } else if (value <= 0x7FF) {
+ uint8_t bytes[] = {
+ (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
+ };
+
+ pm_buffer_append_bytes(buffer, bytes, 2);
+ return true;
+ } else if (value <= 0xFFFF) {
+ uint8_t bytes[] = {
+ (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
+ };
+
+ pm_buffer_append_bytes(buffer, bytes, 3);
+ return true;
+ } else if (value <= 0x10FFFF) {
+ uint8_t bytes[] = {
+ (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx
+ (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx
+ (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
+ (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
+ };
+
+ pm_buffer_append_bytes(buffer, bytes, 4);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/**
+ * Append a slice of source code to the buffer.
+ */
+void
+pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
+ for (size_t index = 0; index < length; index++) {
+ const uint8_t byte = source[index];
+
+ if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_format(buffer, "\\x%02X", byte);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ } else {
+ switch (byte) {
+ case '\a':
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_string(buffer, "\\a", 2);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ break;
+ case '\b':
+ pm_buffer_append_string(buffer, "\\b", 2);
+ break;
+ case '\t':
+ pm_buffer_append_string(buffer, "\\t", 2);
+ break;
+ case '\n':
+ pm_buffer_append_string(buffer, "\\n", 2);
+ break;
+ case '\v':
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_string(buffer, "\\v", 2);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ break;
+ case '\f':
+ pm_buffer_append_string(buffer, "\\f", 2);
+ break;
+ case '\r':
+ pm_buffer_append_string(buffer, "\\r", 2);
+ break;
+ case '"':
+ pm_buffer_append_string(buffer, "\\\"", 2);
+ break;
+ case '#': {
+ if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
+ const uint8_t next_byte = source[index + 1];
+ if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
+ pm_buffer_append_byte(buffer, '\\');
+ }
+ }
+
+ pm_buffer_append_byte(buffer, '#');
+ break;
+ }
+ case '\\':
+ pm_buffer_append_string(buffer, "\\\\", 2);
+ break;
+ default:
+ pm_buffer_append_byte(buffer, byte);
+ break;
+ }
+ }
+ }
+}
+
+/**
* Prepend the given string to the buffer.
*/
void
pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
size_t cursor = buffer->length;
- pm_buffer_append_length(buffer, length);
- memmove(buffer->value + length, buffer->value, cursor);
- memcpy(buffer->value, value, length);
+ if (pm_buffer_append_length(buffer, length)) {
+ memmove(buffer->value + length, buffer->value, cursor);
+ memcpy(buffer->value, value, length);
+ }
}
/**
@@ -182,9 +305,53 @@ pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source) {
}
/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ */
+void
+pm_buffer_clear(pm_buffer_t *buffer) {
+ buffer->length = 0;
+}
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ */
+void
+pm_buffer_rstrip(pm_buffer_t *buffer) {
+ while (buffer->length > 0 && pm_char_is_whitespace((uint8_t) buffer->value[buffer->length - 1])) {
+ buffer->length--;
+ }
+}
+
+/**
+ * Checks if the buffer includes the given value.
+ */
+size_t
+pm_buffer_index(const pm_buffer_t *buffer, char value) {
+ const char *first = memchr(buffer->value, value, buffer->length);
+ return (first == NULL) ? SIZE_MAX : (size_t) (first - buffer->value);
+}
+
+/**
+ * Insert the given string into the buffer at the given index.
+ */
+void
+pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
+ assert(index <= buffer->length);
+
+ if (index == buffer->length) {
+ pm_buffer_append_string(buffer, value, length);
+ } else {
+ pm_buffer_append_zeroes(buffer, length);
+ memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
+ memcpy(buffer->value + index, value, length);
+ }
+}
+
+/**
* Free the memory associated with the buffer.
*/
void
pm_buffer_free(pm_buffer_t *buffer) {
- free(buffer->value);
+ xfree(buffer->value);
}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index f0cca84af5..cb80f8b3ce 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -7,6 +7,7 @@
#define PRISM_BUFFER_H
#include "prism/defines.h"
+#include "prism/util/pm_char.h"
#include <assert.h>
#include <stdbool.h>
@@ -50,6 +51,8 @@ bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
*
* @param buffer The buffer to initialize.
* @returns True if the buffer was initialized successfully, false otherwise.
+ *
+ * \public \memberof pm_buffer_t
*/
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
@@ -58,16 +61,20 @@ PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
*
* @param buffer The buffer to get the value of.
* @returns The value of the buffer.
+ *
+ * \public \memberof pm_buffer_t
*/
-PRISM_EXPORTED_FUNCTION char * pm_buffer_value(pm_buffer_t *buffer);
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
/**
* Return the length of the buffer.
*
* @param buffer The buffer to get the length of.
* @returns The length of the buffer.
+ *
+ * \public \memberof pm_buffer_t
*/
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(pm_buffer_t *buffer);
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
/**
* Append the given amount of space as zeroes to the buffer.
@@ -129,6 +136,43 @@ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
/**
+ * Append a double to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The double to append.
+ */
+void pm_buffer_append_double(pm_buffer_t *buffer, double value);
+
+/**
+ * Append a unicode codepoint to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The character to append.
+ * @returns True if the codepoint was valid and appended successfully, false
+ * otherwise.
+ */
+bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
+
+/**
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+ PM_BUFFER_ESCAPING_RUBY,
+ PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/**
+ * Append a slice of source code to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param source The source code to append.
+ * @param length The length of the source code to append.
+ * @param escaping The type of escaping to perform.
+ */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/**
* Prepend the given string to the buffer.
*
* @param buffer The buffer to prepend to.
@@ -146,9 +190,46 @@ void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t len
void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ *
+ * @param buffer The buffer to clear.
+ */
+void pm_buffer_clear(pm_buffer_t *buffer);
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ *
+ * @param buffer The buffer to strip.
+ */
+void pm_buffer_rstrip(pm_buffer_t *buffer);
+
+/**
+ * Checks if the buffer includes the given value.
+ *
+ * @param buffer The buffer to check.
+ * @param value The value to check for.
+ * @returns The index of the first occurrence of the value in the buffer, or
+ * SIZE_MAX if the value is not found.
+ */
+size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/**
+ * Insert the given string into the buffer at the given index.
+ *
+ * @param buffer The buffer to insert into.
+ * @param index The index to insert at.
+ * @param value The string to insert.
+ * @param length The length of the string to insert.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+/**
* Free the memory associated with the buffer.
*
* @param buffer The buffer to free.
+ *
+ * \public \memberof pm_buffer_t
*/
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
index 13eddbba48..a51dc11645 100644
--- a/prism/util/pm_char.c
+++ b/prism/util/pm_char.c
@@ -19,10 +19,10 @@ static const uint8_t pm_byte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
- 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
- 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
+ 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
+ 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
@@ -185,7 +185,7 @@ pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const
size++;
}
- if (string[size - 1] == '_') *invalid = string + size - 1;
+ if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
return size;
}
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
index 32f698a42b..deeafd6321 100644
--- a/prism/util/pm_char.h
+++ b/prism/util/pm_char.h
@@ -34,8 +34,7 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
* @return The number of characters at the start of the string that are
* whitespace.
*/
-size_t
-pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
/**
* Returns the number of characters at the start of the string that are inline
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
index 7b8ed0654d..922ce6a18c 100644
--- a/prism/util/pm_constant_pool.c
+++ b/prism/util/pm_constant_pool.c
@@ -11,6 +11,22 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
}
/**
+ * Initialize a list of constant ids with a given capacity.
+ */
+void
+pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+ if (capacity) {
+ list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
+ if (list->ids == NULL) abort();
+ } else {
+ list->ids = NULL;
+ }
+
+ list->size = 0;
+ list->capacity = capacity;
+}
+
+/**
* Append a constant id to a list of constant ids. Returns false if any
* potential reallocations fail.
*/
@@ -18,7 +34,7 @@ bool
pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
if (list->size >= list->capacity) {
list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
- list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
+ list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
if (list->ids == NULL) return false;
}
@@ -27,6 +43,18 @@ pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
}
/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ */
+void
+pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
+ assert(index < list->capacity);
+ assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
+
+ list->ids[index] = id;
+ list->size++;
+}
+
+/**
* Checks if the current constant id list includes the given constant id.
*/
bool
@@ -38,20 +66,12 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
}
/**
- * Get the memory size of a list of constant ids.
- */
-size_t
-pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
- return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
-}
-
-/**
* Free the memory associated with a list of constant ids.
*/
void
pm_constant_id_list_free(pm_constant_id_list_t *list) {
if (list->ids != NULL) {
- free(list->ids);
+ xfree(list->ids);
}
}
@@ -111,7 +131,7 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
const uint32_t mask = next_capacity - 1;
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
- void *next = calloc(next_capacity, element_size);
+ void *next = xcalloc(next_capacity, element_size);
if (next == NULL) return false;
pm_constant_pool_bucket_t *next_buckets = next;
@@ -145,7 +165,7 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
// pool->constants and pool->buckets are allocated out of the same chunk
// of memory, with the buckets coming first.
- free(pool->buckets);
+ xfree(pool->buckets);
pool->constants = next_constants;
pool->buckets = next_buckets;
pool->capacity = next_capacity;
@@ -162,7 +182,7 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
capacity = next_power_of_two(capacity);
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
- void *memory = calloc(capacity, element_size);
+ void *memory = xcalloc(capacity, element_size);
if (memory == NULL) return false;
pool->buckets = memory;
@@ -182,6 +202,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
}
/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t
+pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ assert(is_power_of_two(pool->capacity));
+ const uint32_t mask = pool->capacity - 1;
+
+ uint32_t hash = pm_constant_pool_hash(start, length);
+ uint32_t index = hash & mask;
+ pm_constant_pool_bucket_t *bucket;
+
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+ return bucket->id;
+ }
+
+ index = (index + 1) & mask;
+ }
+
+ return PM_CONSTANT_ID_UNSET;
+}
+
+/**
* Insert a constant into a constant pool and return its index in the pool.
*/
static inline pm_constant_id_t
@@ -212,14 +257,14 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
// an existing constant, then either way we don't want the given
// memory. Either it's duplicated with the existing constant or
// it's not necessary because we have a shared version.
- free((void *) start);
+ xfree((void *) start);
} else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
// If we're attempting to insert a shared constant and the
// existing constant is owned, then we can free the owned
// constant and replace it with the shared constant.
- free((void *) constant->start);
+ xfree((void *) constant->start);
constant->start = start;
- bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
+ bucket->type = (unsigned int) (type & 0x3);
}
return bucket->id;
@@ -228,7 +273,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
index = (index + 1) & mask;
}
- // IDs are allocated starting at 1, since the value 0 denotes a non-existant
+ // IDs are allocated starting at 1, since the value 0 denotes a non-existent
// constant.
uint32_t id = ++pool->size;
assert(pool->size < ((uint32_t) (1 << 30)));
@@ -262,7 +307,7 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
* potential calls to resize fail.
*/
pm_constant_id_t
-pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
}
@@ -289,9 +334,9 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
// If an id is set on this constant, then we know we have content here.
if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
pm_constant_t *constant = &pool->constants[bucket->id - 1];
- free((void *) constant->start);
+ xfree((void *) constant->start);
}
}
- free(pool->buckets);
+ xfree(pool->buckets);
}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
index 3743d9f58d..6df23f8f50 100644
--- a/prism/util/pm_constant_pool.h
+++ b/prism/util/pm_constant_pool.h
@@ -52,6 +52,14 @@ typedef struct {
void pm_constant_id_list_init(pm_constant_id_list_t *list);
/**
+ * Initialize a list of constant ids with a given capacity.
+ *
+ * @param list The list to initialize.
+ * @param capacity The initial capacity of the list.
+ */
+void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
+
+/**
* Append a constant id to a list of constant ids. Returns false if any
* potential reallocations fail.
*
@@ -62,6 +70,15 @@ void pm_constant_id_list_init(pm_constant_id_list_t *list);
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ *
+ * @param list The list to insert into.
+ * @param index The index at which to insert.
+ * @param id The id to insert.
+ */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/**
* Checks if the current constant id list includes the given constant id.
*
* @param list The list to check.
@@ -71,14 +88,6 @@ bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
/**
- * Get the memory size of a list of constant ids.
- *
- * @param list The list to get the memory size of.
- * @return The memory size of the list.
- */
-size_t pm_constant_id_list_memsize(pm_constant_id_list_t *list);
-
-/**
* Free the memory associated with a list of constant ids.
*
* @param list The list to free.
@@ -155,6 +164,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ *
+ * @param pool The pool to find the constant in.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
* Insert a constant into a constant pool that is a slice of a source string.
* Returns the id of the constant, or 0 if any potential calls to resize fail.
*
@@ -175,7 +195,7 @@ pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const
* @param length The length of the constant.
* @return The id of the constant.
*/
-pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
/**
* Insert a constant into a constant pool from memory that is constant. Returns
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
new file mode 100644
index 0000000000..4170ecc58d
--- /dev/null
+++ b/prism/util/pm_integer.c
@@ -0,0 +1,670 @@
+#include "prism/util/pm_integer.h"
+
+/**
+ * Pull out the length and values from the integer, regardless of the form in
+ * which the length/values are stored.
+ */
+#define INTEGER_EXTRACT(integer, length_variable, values_variable) \
+ if ((integer)->values == NULL) { \
+ length_variable = 1; \
+ values_variable = &(integer)->value; \
+ } else { \
+ length_variable = (integer)->length; \
+ values_variable = (integer)->values; \
+ }
+
+/**
+ * Adds two positive pm_integer_t with the given base.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+ size_t left_length;
+ uint32_t *left_values;
+ INTEGER_EXTRACT(left, left_length, left_values)
+
+ size_t right_length;
+ uint32_t *right_values;
+ INTEGER_EXTRACT(right, right_length, right_values)
+
+ size_t length = left_length < right_length ? right_length : left_length;
+ uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
+ if (values == NULL) return;
+
+ uint64_t carry = 0;
+ for (size_t index = 0; index < length; index++) {
+ uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
+ values[index] = (uint32_t) (sum % base);
+ carry = sum / base;
+ }
+
+ if (carry > 0) {
+ values[length] = (uint32_t) carry;
+ length++;
+ }
+
+ *destination = (pm_integer_t) { length, values, 0, false };
+}
+
+/**
+ * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
+ * base. Assume a, b, c, a - b - c all to be positive.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
+ size_t a_length;
+ uint32_t *a_values;
+ INTEGER_EXTRACT(a, a_length, a_values)
+
+ size_t b_length;
+ uint32_t *b_values;
+ INTEGER_EXTRACT(b, b_length, b_values)
+
+ size_t c_length;
+ uint32_t *c_values;
+ INTEGER_EXTRACT(c, c_length, c_values)
+
+ uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
+ int64_t carry = 0;
+
+ for (size_t index = 0; index < a_length; index++) {
+ int64_t sub = (
+ carry +
+ a_values[index] -
+ (index < b_length ? b_values[index] : 0) -
+ (index < c_length ? c_values[index] : 0)
+ );
+
+ if (sub >= 0) {
+ values[index] = (uint32_t) sub;
+ carry = 0;
+ } else {
+ sub += 2 * (int64_t) base;
+ values[index] = (uint32_t) ((uint64_t) sub % base);
+ carry = sub / (int64_t) base - 2;
+ }
+ }
+
+ while (a_length > 1 && values[a_length - 1] == 0) a_length--;
+ *destination = (pm_integer_t) { a_length, values, 0, false };
+}
+
+/**
+ * Multiply two positive integers with the given base using karatsuba algorithm.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+ size_t left_length;
+ uint32_t *left_values;
+ INTEGER_EXTRACT(left, left_length, left_values)
+
+ size_t right_length;
+ uint32_t *right_values;
+ INTEGER_EXTRACT(right, right_length, right_values)
+
+ if (left_length > right_length) {
+ size_t temporary_length = left_length;
+ left_length = right_length;
+ right_length = temporary_length;
+
+ uint32_t *temporary_values = left_values;
+ left_values = right_values;
+ right_values = temporary_values;
+ }
+
+ if (left_length <= 10) {
+ size_t length = left_length + right_length;
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+ if (values == NULL) return;
+
+ for (size_t left_index = 0; left_index < left_length; left_index++) {
+ uint32_t carry = 0;
+ for (size_t right_index = 0; right_index < right_length; right_index++) {
+ uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
+ values[left_index + right_index] = (uint32_t) (product % base);
+ carry = (uint32_t) (product / base);
+ }
+ values[left_index + right_length] = carry;
+ }
+
+ while (length > 1 && values[length - 1] == 0) length--;
+ *destination = (pm_integer_t) { length, values, 0, false };
+ return;
+ }
+
+ if (left_length * 2 <= right_length) {
+ uint32_t *values = (uint32_t *) xcalloc(left_length + right_length, sizeof(uint32_t));
+
+ for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
+ size_t end_offset = start_offset + left_length;
+ if (end_offset > right_length) end_offset = right_length;
+
+ pm_integer_t sliced_left = {
+ .length = left_length,
+ .values = left_values,
+ .value = 0,
+ .negative = false
+ };
+
+ pm_integer_t sliced_right = {
+ .length = end_offset - start_offset,
+ .values = right_values + start_offset,
+ .value = 0,
+ .negative = false
+ };
+
+ pm_integer_t product;
+ karatsuba_multiply(&product, &sliced_left, &sliced_right, base);
+
+ uint32_t carry = 0;
+ for (size_t index = 0; index < product.length; index++) {
+ uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
+ values[start_offset + index] = (uint32_t) (sum % base);
+ carry = (uint32_t) (sum / base);
+ }
+
+ if (carry > 0) values[start_offset + product.length] += carry;
+ pm_integer_free(&product);
+ }
+
+ *destination = (pm_integer_t) { left_length + right_length, values, 0, false };
+ return;
+ }
+
+ size_t half = left_length / 2;
+ pm_integer_t x0 = { half, left_values, 0, false };
+ pm_integer_t x1 = { left_length - half, left_values + half, 0, false };
+ pm_integer_t y0 = { half, right_values, 0, false };
+ pm_integer_t y1 = { right_length - half, right_values + half, 0, false };
+
+ pm_integer_t z0 = { 0 };
+ karatsuba_multiply(&z0, &x0, &y0, base);
+
+ pm_integer_t z2 = { 0 };
+ karatsuba_multiply(&z2, &x1, &y1, base);
+
+ // For simplicity to avoid considering negative values,
+ // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
+ pm_integer_t x01 = { 0 };
+ big_add(&x01, &x0, &x1, base);
+
+ pm_integer_t y01 = { 0 };
+ big_add(&y01, &y0, &y1, base);
+
+ pm_integer_t xy = { 0 };
+ karatsuba_multiply(&xy, &x01, &y01, base);
+
+ pm_integer_t z1;
+ big_sub2(&z1, &xy, &z0, &z2, base);
+
+ size_t length = left_length + right_length;
+ uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
+
+ assert(z0.values != NULL);
+ memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
+
+ assert(z2.values != NULL);
+ memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
+
+ uint32_t carry = 0;
+ for(size_t index = 0; index < z1.length; index++) {
+ uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
+ values[index + half] = (uint32_t) (sum % base);
+ carry = (uint32_t) (sum / base);
+ }
+
+ for(size_t index = half + z1.length; carry > 0; index++) {
+ uint64_t sum = (uint64_t) carry + values[index];
+ values[index] = (uint32_t) (sum % base);
+ carry = (uint32_t) (sum / base);
+ }
+
+ while (length > 1 && values[length - 1] == 0) length--;
+ pm_integer_free(&z0);
+ pm_integer_free(&z1);
+ pm_integer_free(&z2);
+ pm_integer_free(&x01);
+ pm_integer_free(&y01);
+ pm_integer_free(&xy);
+
+ *destination = (pm_integer_t) { length, values, 0, false };
+}
+
+/**
+ * The values of a hexadecimal digit, where the index is the ASCII character.
+ * Note that there's an odd exception here where _ is mapped to 0. This is
+ * because it's possible for us to end up trying to parse a number that has
+ * already had an error attached to it, and we want to provide _something_ to
+ * the user.
+ */
+static const int8_t pm_integer_parse_digit_values[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, // 5x
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
+};
+
+/**
+ * Return the value of a hexadecimal digit in a uint8_t.
+ */
+static uint8_t
+pm_integer_parse_digit(const uint8_t character) {
+ int8_t value = pm_integer_parse_digit_values[character];
+ assert(value != -1 && "invalid digit");
+
+ return (uint8_t) value;
+}
+
+/**
+ * Create a pm_integer_t from uint64_t with the given base. It is assumed that
+ * the memory for the pm_integer_t pointer has been zeroed.
+ */
+static void
+pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
+ if (value < base) {
+ integer->value = (uint32_t) value;
+ return;
+ }
+
+ size_t length = 0;
+ uint64_t length_value = value;
+ while (length_value > 0) {
+ length++;
+ length_value /= base;
+ }
+
+ uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
+ if (values == NULL) return;
+
+ for (size_t value_index = 0; value_index < length; value_index++) {
+ values[value_index] = (uint32_t) (value % base);
+ value /= base;
+ }
+
+ integer->length = length;
+ integer->values = values;
+}
+
+/**
+ * Normalize pm_integer_t.
+ * Heading zero values will be removed. If the integer fits into uint32_t,
+ * values is set to NULL, length is set to 0, and value field will be used.
+ */
+static void
+pm_integer_normalize(pm_integer_t *integer) {
+ if (integer->values == NULL) {
+ return;
+ }
+
+ while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
+ integer->length--;
+ }
+
+ if (integer->length > 1) {
+ return;
+ }
+
+ uint32_t value = integer->values[0];
+ bool negative = integer->negative && value != 0;
+
+ pm_integer_free(integer);
+ *integer = (pm_integer_t) { .values = NULL, .value = value, .length = 0, .negative = negative };
+}
+
+/**
+ * Convert base of the integer.
+ * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
+ */
+static void
+pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
+ size_t source_length;
+ const uint32_t *source_values;
+ INTEGER_EXTRACT(source, source_length, source_values)
+
+ size_t bigints_length = (source_length + 1) / 2;
+ assert(bigints_length > 0);
+
+ pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
+ if (bigints == NULL) return;
+
+ for (size_t index = 0; index < source_length; index += 2) {
+ uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
+ pm_integer_from_uint64(&bigints[index / 2], value, base_to);
+ }
+
+ pm_integer_t base = { 0 };
+ pm_integer_from_uint64(&base, base_from, base_to);
+
+ while (bigints_length > 1) {
+ pm_integer_t next_base;
+ karatsuba_multiply(&next_base, &base, &base, base_to);
+
+ pm_integer_free(&base);
+ base = next_base;
+
+ size_t next_length = (bigints_length + 1) / 2;
+ pm_integer_t *next_bigints = (pm_integer_t *) xcalloc(next_length, sizeof(pm_integer_t));
+
+ for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
+ if (bigints_index + 1 == bigints_length) {
+ next_bigints[bigints_index / 2] = bigints[bigints_index];
+ } else {
+ pm_integer_t multiplied = { 0 };
+ karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
+
+ big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
+ pm_integer_free(&bigints[bigints_index]);
+ pm_integer_free(&bigints[bigints_index + 1]);
+ pm_integer_free(&multiplied);
+ }
+ }
+
+ xfree(bigints);
+ bigints = next_bigints;
+ bigints_length = next_length;
+ }
+
+ *destination = bigints[0];
+ destination->negative = source->negative;
+ pm_integer_normalize(destination);
+
+ xfree(bigints);
+ pm_integer_free(&base);
+}
+
+#undef INTEGER_EXTRACT
+
+/**
+ * Convert digits to integer with the given power-of-two base.
+ */
+static void
+pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
+ size_t bit = 1;
+ while (base > (uint32_t) (1 << bit)) bit++;
+
+ size_t length = (digits_length * bit + 31) / 32;
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+
+ for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
+ size_t bit_position = bit * (digits_length - digit_index - 1);
+ uint32_t value = digits[digit_index];
+
+ size_t index = bit_position / 32;
+ size_t shift = bit_position % 32;
+
+ values[index] |= value << shift;
+ if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
+ }
+
+ while (length > 1 && values[length - 1] == 0) length--;
+ *integer = (pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false };
+ pm_integer_normalize(integer);
+}
+
+/**
+ * Convert decimal digits to pm_integer_t.
+ */
+static void
+pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
+ const size_t batch = 9;
+ size_t length = (digits_length + batch - 1) / batch;
+
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+ uint32_t value = 0;
+
+ for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
+ value = value * 10 + digits[digits_index];
+
+ size_t reverse_index = digits_length - digits_index - 1;
+ if (reverse_index % batch == 0) {
+ values[reverse_index / batch] = value;
+ value = 0;
+ }
+ }
+
+ // Convert base from 10**9 to 1<<32.
+ pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
+ xfree(values);
+}
+
+/**
+ * Parse a large integer from a string that does not fit into uint32_t.
+ */
+static void
+pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
+ // Allocate an array to store digits.
+ uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
+ size_t digits_length = 0;
+
+ for (; start < end; start++) {
+ if (*start == '_') continue;
+ digits[digits_length++] = pm_integer_parse_digit(*start);
+ }
+
+ // Construct pm_integer_t from the digits.
+ if (multiplier == 10) {
+ pm_integer_parse_decimal(integer, digits, digits_length);
+ } else {
+ pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
+ }
+
+ xfree(digits);
+}
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ */
+void
+pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
+ // Ignore unary +. Unary - is parsed differently and will not end up here.
+ // Instead, it will modify the parsed integer later.
+ if (*start == '+') start++;
+
+ // Determine the multiplier from the base, and skip past any prefixes.
+ uint32_t multiplier = 10;
+ switch (base) {
+ case PM_INTEGER_BASE_DEFAULT:
+ while (*start == '0') start++; // 01 -> 1
+ break;
+ case PM_INTEGER_BASE_BINARY:
+ start += 2; // 0b
+ multiplier = 2;
+ break;
+ case PM_INTEGER_BASE_OCTAL:
+ start++; // 0
+ if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
+ multiplier = 8;
+ break;
+ case PM_INTEGER_BASE_DECIMAL:
+ if (*start == '0' && (end - start) > 1) start += 2; // 0d
+ break;
+ case PM_INTEGER_BASE_HEXADECIMAL:
+ start += 2; // 0x
+ multiplier = 16;
+ break;
+ case PM_INTEGER_BASE_UNKNOWN:
+ if (*start == '0' && (end - start) > 1) {
+ switch (start[1]) {
+ case '_': start += 2; multiplier = 8; break;
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
+ case 'b': case 'B': start += 2; multiplier = 2; break;
+ case 'o': case 'O': start += 2; multiplier = 8; break;
+ case 'd': case 'D': start += 2; break;
+ case 'x': case 'X': start += 2; multiplier = 16; break;
+ default: assert(false && "unreachable"); break;
+ }
+ }
+ break;
+ }
+
+ // It's possible that we've consumed everything at this point if there is an
+ // invalid integer. If this is the case, we'll just return 0.
+ if (start >= end) return;
+
+ const uint8_t *cursor = start;
+ uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
+
+ for (; cursor < end; cursor++) {
+ if (*cursor == '_') continue;
+ value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
+
+ if (value > UINT32_MAX) {
+ // If the integer is too large to fit into a single uint32_t, then
+ // we'll parse it as a big integer.
+ pm_integer_parse_big(integer, multiplier, start, end);
+ return;
+ }
+ }
+
+ integer->value = (uint32_t) value;
+}
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int
+pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
+ if (left->negative != right->negative) return left->negative ? -1 : 1;
+ int negative = left->negative ? -1 : 1;
+
+ if (left->values == NULL && right->values == NULL) {
+ if (left->value < right->value) return -1 * negative;
+ if (left->value > right->value) return 1 * negative;
+ return 0;
+ }
+
+ if (left->values == NULL || left->length < right->length) return -1 * negative;
+ if (right->values == NULL || left->length > right->length) return 1 * negative;
+
+ for (size_t index = 0; index < left->length; index++) {
+ size_t value_index = left->length - index - 1;
+ uint32_t left_value = left->values[value_index];
+ uint32_t right_value = right->values[value_index];
+
+ if (left_value < right_value) return -1 * negative;
+ if (left_value > right_value) return 1 * negative;
+ }
+
+ return 0;
+}
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
+ // If either the numerator or denominator do not fit into a 32-bit integer,
+ // then this function is a no-op. In the future, we may consider reducing
+ // even the larger numbers, but for now we're going to keep it simple.
+ if (
+ // If the numerator doesn't fit into a 32-bit integer, return early.
+ numerator->length != 0 ||
+ // If the denominator doesn't fit into a 32-bit integer, return early.
+ denominator->length != 0 ||
+ // If the numerator is 0, then return early.
+ numerator->value == 0 ||
+ // If the denominator is 1, then return early.
+ denominator->value == 1
+ ) return;
+
+ // Find the greatest common divisor of the numerator and denominator.
+ uint32_t divisor = numerator->value;
+ uint32_t remainder = denominator->value;
+
+ while (remainder != 0) {
+ uint32_t temporary = remainder;
+ remainder = divisor % remainder;
+ divisor = temporary;
+ }
+
+ // Divide the numerator and denominator by the greatest common divisor.
+ numerator->value /= divisor;
+ denominator->value /= divisor;
+}
+
+/**
+ * Convert an integer to a decimal string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
+ if (integer->negative) {
+ pm_buffer_append_byte(buffer, '-');
+ }
+
+ // If the integer fits into a single uint32_t, then we can just append the
+ // value directly to the buffer.
+ if (integer->values == NULL) {
+ pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
+ return;
+ }
+
+ // If the integer is two uint32_t values, then we can | them together and
+ // append the result to the buffer.
+ if (integer->length == 2) {
+ const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
+ pm_buffer_append_format(buffer, "%" PRIu64, value);
+ return;
+ }
+
+ // Otherwise, first we'll convert the base from 1<<32 to 10**9.
+ pm_integer_t converted = { 0 };
+ pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
+
+ if (converted.values == NULL) {
+ pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
+ pm_integer_free(&converted);
+ return;
+ }
+
+ // Allocate a buffer that we'll copy the decimal digits into.
+ size_t digits_length = converted.length * 9;
+ char *digits = xcalloc(digits_length, sizeof(char));
+ if (digits == NULL) return;
+
+ // Pack bigdecimal to digits.
+ for (size_t value_index = 0; value_index < converted.length; value_index++) {
+ uint32_t value = converted.values[value_index];
+
+ for (size_t digit_index = 0; digit_index < 9; digit_index++) {
+ digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
+ value /= 10;
+ }
+ }
+
+ size_t start_offset = 0;
+ while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
+
+ // Finally, append the string to the buffer and free the digits.
+ pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
+ xfree(digits);
+ pm_integer_free(&converted);
+}
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single uint32_t.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_free(pm_integer_t *integer) {
+ if (integer->values) {
+ xfree(integer->values);
+ }
+}
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
new file mode 100644
index 0000000000..304665e620
--- /dev/null
+++ b/prism/util/pm_integer.h
@@ -0,0 +1,130 @@
+/**
+ * @file pm_integer.h
+ *
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_NUMBER_H
+#define PRISM_NUMBER_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_buffer.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * A structure represents an arbitrary-sized integer.
+ */
+typedef struct {
+ /**
+ * The number of allocated values. length is set to 0 if the integer fits
+ * into uint32_t.
+ */
+ size_t length;
+
+ /**
+ * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
+ */
+ uint32_t *values;
+
+ /**
+ * Embedded value for small integer. This value is set to 0 if the value
+ * does not fit into uint32_t.
+ */
+ uint32_t value;
+
+ /**
+ * Whether or not the integer is negative. It is stored this way so that a
+ * zeroed pm_integer_t is always positive zero.
+ */
+ bool negative;
+} pm_integer_t;
+
+/**
+ * An enum controlling the base of an integer. It is expected that the base is
+ * already known before parsing the integer, even though it could be derived
+ * from the string itself.
+ */
+typedef enum {
+ /** The default decimal base, with no prefix. Leading 0s will be ignored. */
+ PM_INTEGER_BASE_DEFAULT,
+
+ /** The binary base, indicated by a 0b or 0B prefix. */
+ PM_INTEGER_BASE_BINARY,
+
+ /** The octal base, indicated by a 0, 0o, or 0O prefix. */
+ PM_INTEGER_BASE_OCTAL,
+
+ /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
+ PM_INTEGER_BASE_DECIMAL,
+
+ /** The hexadecimal base, indicated by a 0x or 0X prefix. */
+ PM_INTEGER_BASE_HEXADECIMAL,
+
+ /**
+ * An unknown base, in which case pm_integer_parse will derive it based on
+ * the content of the string. This is less efficient and does more
+ * comparisons, so if callers know the base ahead of time, they should use
+ * that instead.
+ */
+ PM_INTEGER_BASE_UNKNOWN
+} pm_integer_base_t;
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ *
+ * @param integer The integer to parse into.
+ * @param base The base of the integer.
+ * @param start The start of the string.
+ * @param end The end of the string.
+ */
+void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ *
+ * @param left The left integer to compare.
+ * @param right The right integer to compare.
+ * @return The result of the comparison.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ *
+ * If either the numerator or denominator do not fit into a 32-bit integer, then
+ * this function is a no-op. In the future, we may consider reducing even the
+ * larger numbers, but for now we're going to keep it simple.
+ *
+ * @param numerator The numerator of the ratio.
+ * @param denominator The denominator of the ratio.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
+
+/**
+ * Convert an integer to a decimal string.
+ *
+ * @param buffer The buffer to append the string to.
+ * @param integer The integer to convert to a string.
+ *
+ * \public \memberof pm_integer_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single node in the linked list.
+ *
+ * @param integer The integer to free.
+ *
+ * \public \memberof pm_integer_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
+
+#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
index 62cfe47cfa..ad2294cd60 100644
--- a/prism/util/pm_list.c
+++ b/prism/util/pm_list.c
@@ -41,7 +41,7 @@ pm_list_free(pm_list_t *list) {
while (node != NULL) {
next = node->next;
- free(node);
+ xfree(node);
node = next;
}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
index d29fe07c52..f544bb2943 100644
--- a/prism/util/pm_list.h
+++ b/prism/util/pm_list.h
@@ -33,7 +33,7 @@
* } pm_int_node_t;
*
* pm_list_t list = { 0 };
- * pm_int_node_t *node = malloc(sizeof(pm_int_node_t));
+ * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t));
* node->value = 5;
*
* pm_list_append(&list, &node->node);
@@ -68,6 +68,8 @@ typedef struct {
*
* @param list The list to check.
* @return True if the given list is empty, otherwise false.
+ *
+ * \public \memberof pm_list_t
*/
PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
@@ -76,6 +78,8 @@ PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
*
* @param list The list to check.
* @return The size of the list.
+ *
+ * \public \memberof pm_list_t
*/
PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
@@ -91,6 +95,8 @@ void pm_list_append(pm_list_t *list, pm_list_node_t *node);
* Deallocate the internal state of the given list.
*
* @param list The list to free.
+ *
+ * \public \memberof pm_list_t
*/
PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
index 32a4a050fe..8331618f54 100644
--- a/prism/util/pm_newline_list.c
+++ b/prism/util/pm_newline_list.c
@@ -6,7 +6,7 @@
*/
bool
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
- list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
+ list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
if (list->offsets == NULL) return false;
list->start = start;
@@ -20,6 +20,14 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
}
/**
+ * Clear out the newlines that have been appended to the list.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list) {
+ list->size = 1;
+}
+
+/**
* Append a new offset to the newline list. Returns true if the reallocation of
* the offsets succeeds (if one was necessary), otherwise returns false.
*/
@@ -29,10 +37,11 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
size_t *original_offsets = list->offsets;
list->capacity = (list->capacity * 3) / 2;
- list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
- memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
- free(original_offsets);
+ list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
if (list->offsets == NULL) return false;
+
+ memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
+ xfree(original_offsets);
}
assert(*cursor == '\n');
@@ -46,12 +55,41 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
}
/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t
+pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
+ assert(cursor >= list->start);
+ size_t offset = (size_t) (cursor - list->start);
+
+ size_t left = 0;
+ size_t right = list->size - 1;
+
+ while (left <= right) {
+ size_t mid = left + (right - left) / 2;
+
+ if (list->offsets[mid] == offset) {
+ return ((int32_t) mid) + start_line;
+ }
+
+ if (list->offsets[mid] < offset) {
+ left = mid + 1;
+ } else {
+ right = mid - 1;
+ }
+ }
+
+ return ((int32_t) left) + start_line - 1;
+}
+
+/**
* Returns the line and column of the given offset. If the offset is not in the
* list, the line and column of the closest offset less than the given offset
* are returned.
*/
pm_line_column_t
-pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
+pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
assert(cursor >= list->start);
size_t offset = (size_t) (cursor - list->start);
@@ -62,7 +100,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
size_t mid = left + (right - left) / 2;
if (list->offsets[mid] == offset) {
- return ((pm_line_column_t) { mid, 0 });
+ return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
}
if (list->offsets[mid] < offset) {
@@ -72,7 +110,10 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
}
}
- return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
+ return ((pm_line_column_t) {
+ .line = ((int32_t) left) + start_line - 1,
+ .column = (uint32_t) (offset - list->offsets[left - 1])
+ });
}
/**
@@ -80,5 +121,5 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
*/
void
pm_newline_list_free(pm_newline_list_t *list) {
- free(list->offsets);
+ xfree(list->offsets);
}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
index 181283644f..406abe8ba5 100644
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@@ -44,10 +44,10 @@ typedef struct {
*/
typedef struct {
/** The line number. */
- size_t line;
+ int32_t line;
/** The column number. */
- size_t column;
+ uint32_t column;
} pm_line_column_t;
/**
@@ -62,6 +62,14 @@ typedef struct {
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
/**
+ * Clear out the newlines that have been appended to the list.
+ *
+ * @param list The list to clear.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list);
+
+/**
* Append a new offset to the newline list. Returns true if the reallocation of
* the offsets succeeds (if one was necessary), otherwise returns false.
*
@@ -73,15 +81,27 @@ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @param start_line The line to start counting from.
+ * @return The line of the given offset.
+ */
+int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+
+/**
* Returns the line and column of the given offset. If the offset is not in the
* list, the line and column of the closest offset less than the given offset
* are returned.
*
* @param list The list to search.
* @param cursor A pointer to the offset to search for.
+ * @param start_line The line to start counting from.
* @return The line and column of the given offset.
*/
-pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor);
+pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
/**
* Free the internal memory allocated for the newline list.
diff --git a/prism/util/pm_state_stack.c b/prism/util/pm_state_stack.c
deleted file mode 100644
index 2a424b4c03..0000000000
--- a/prism/util/pm_state_stack.c
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "prism/util/pm_state_stack.h"
-
-/**
- * Pushes a value onto the stack.
- */
-void
-pm_state_stack_push(pm_state_stack_t *stack, bool value) {
- *stack = (*stack << 1) | (value & 1);
-}
-
-/**
- * Pops a value off the stack.
- */
-void
-pm_state_stack_pop(pm_state_stack_t *stack) {
- *stack >>= 1;
-}
-
-/**
- * Returns the value at the top of the stack.
- */
-bool
-pm_state_stack_p(pm_state_stack_t *stack) {
- return *stack & 1;
-}
diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h
deleted file mode 100644
index 1ce57a2209..0000000000
--- a/prism/util/pm_state_stack.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * @file pm_state_stack.h
- *
- * A stack of boolean values.
- */
-#ifndef PRISM_STATE_STACK_H
-#define PRISM_STATE_STACK_H
-
-#include "prism/defines.h"
-
-#include <stdbool.h>
-#include <stdint.h>
-
-/**
- * A struct that represents a stack of boolean values.
- */
-typedef uint32_t pm_state_stack_t;
-
-/**
- * Pushes a value onto the stack.
- *
- * @param stack The stack to push the value onto.
- * @param value The value to push onto the stack.
- */
-void pm_state_stack_push(pm_state_stack_t *stack, bool value);
-
-/**
- * Pops a value off the stack.
- *
- * @param stack The stack to pop the value off of.
- */
-void pm_state_stack_pop(pm_state_stack_t *stack);
-
-/**
- * Returns the value at the top of the stack.
- *
- * @param stack The stack to get the value from.
- * @return The value at the top of the stack.
- */
-bool pm_state_stack_p(pm_state_stack_t *stack);
-
-#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
index f4d3033a1b..a7493c468b 100644
--- a/prism/util/pm_string.c
+++ b/prism/util/pm_string.c
@@ -1,5 +1,7 @@
#include "prism/util/pm_string.h"
+static const uint8_t empty_source[] = "";
+
/**
* Returns the size of the pm_string_t struct. This is necessary to allocate the
* correct amount of memory in the FFI backend.
@@ -47,6 +49,62 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
};
}
+#ifdef _WIN32
+/**
+ * Represents a file handle on Windows, where the path will need to be freed
+ * when the file is closed.
+ */
+typedef struct {
+ /** The path to the file, which will become allocated memory. */
+ WCHAR *path;
+
+ /** The handle to the file, which will start as uninitialized memory. */
+ HANDLE file;
+} pm_string_file_handle_t;
+
+/**
+ * Open the file indicated by the filepath parameter for reading on Windows.
+ * Perform any kind of normalization that needs to happen on the filepath.
+ */
+static pm_string_init_result_t
+pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
+ int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
+ if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
+
+ handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
+ if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
+ xfree(handle->path);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+ if (handle->file == INVALID_HANDLE_VALUE) {
+ pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
+
+ if (GetLastError() == ERROR_ACCESS_DENIED) {
+ DWORD attributes = GetFileAttributesW(handle->path);
+ if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ result = PM_STRING_INIT_ERROR_DIRECTORY;
+ }
+ }
+
+ xfree(handle->path);
+ return result;
+ }
+
+ return PM_STRING_INIT_SUCCESS;
+}
+
+/**
+ * Close the file handle and free the path.
+ */
+static void
+pm_string_file_handle_close(pm_string_file_handle_t *handle) {
+ xfree(handle->path);
+ CloseHandle(handle->file);
+}
+#endif
+
/**
* Read the file indicated by the filepath parameter into source and load its
* contents and size into the given `pm_string_t`. The given `pm_string_t`
@@ -58,68 +116,65 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
* `mmap`, and on other POSIX systems we'll use `read`.
*/
-bool
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
#ifdef _WIN32
// Open the file for reading.
- HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-
- if (file == INVALID_HANDLE_VALUE) {
- perror("CreateFile failed");
- return false;
- }
+ pm_string_file_handle_t handle;
+ pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
+ if (result != PM_STRING_INIT_SUCCESS) return result;
// Get the file size.
- DWORD file_size = GetFileSize(file, NULL);
+ DWORD file_size = GetFileSize(handle.file, NULL);
if (file_size == INVALID_FILE_SIZE) {
- CloseHandle(file);
- perror("GetFileSize failed");
- return false;
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
}
// If the file is empty, then we don't need to do anything else, we'll set
// the source to a constant empty string and return.
if (file_size == 0) {
- CloseHandle(file);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return true;
+ pm_string_file_handle_close(&handle);
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+ return PM_STRING_INIT_SUCCESS;
}
// Create a mapping of the file.
- HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+ HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
if (mapping == NULL) {
- CloseHandle(file);
- perror("CreateFileMapping failed");
- return false;
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
}
// Map the file into memory.
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
CloseHandle(mapping);
- CloseHandle(file);
+ pm_string_file_handle_close(&handle);
if (source == NULL) {
- perror("MapViewOfFile failed");
- return false;
+ return PM_STRING_INIT_ERROR_GENERIC;
}
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
- return true;
-#else
+ return PM_STRING_INIT_SUCCESS;
+#elif defined(_POSIX_MAPPED_FILES)
// Open the file for reading
int fd = open(filepath, O_RDONLY);
if (fd == -1) {
- perror("open");
- return false;
+ return PM_STRING_INIT_ERROR_GENERIC;
}
// Stat the file to get the file size
struct stat sb;
if (fstat(fd, &sb) == -1) {
close(fd);
- perror("fstat");
- return false;
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ // Ensure it is a file and not a directory
+ if (S_ISDIR(sb.st_mode)) {
+ close(fd);
+ return PM_STRING_INIT_ERROR_DIRECTORY;
}
// mmap the file descriptor to virtually get the contents
@@ -128,33 +183,127 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
if (size == 0) {
close(fd);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return true;
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+ return PM_STRING_INIT_SUCCESS;
}
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
if (source == MAP_FAILED) {
- perror("Map failed");
- return false;
+ close(fd);
+ return PM_STRING_INIT_ERROR_GENERIC;
}
close(fd);
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
- return true;
+ return PM_STRING_INIT_SUCCESS;
+#else
+ return pm_string_file_init(string, filepath);
#endif
}
/**
- * Returns the memory size associated with the string.
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
*/
-size_t
-pm_string_memsize(const pm_string_t *string) {
- size_t size = sizeof(pm_string_t);
- if (string->type == PM_STRING_OWNED) {
- size += string->length;
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t
+pm_string_file_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+ // Open the file for reading.
+ pm_string_file_handle_t handle;
+ pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
+ if (result != PM_STRING_INIT_SUCCESS) return result;
+
+ // Get the file size.
+ DWORD file_size = GetFileSize(handle.file, NULL);
+ if (file_size == INVALID_FILE_SIZE) {
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
}
- return size;
+
+ // If the file is empty, then we don't need to do anything else, we'll set
+ // the source to a constant empty string and return.
+ if (file_size == 0) {
+ pm_string_file_handle_close(&handle);
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+ return PM_STRING_INIT_SUCCESS;
+ }
+
+ // Create a buffer to read the file into.
+ uint8_t *source = xmalloc(file_size);
+ if (source == NULL) {
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ // Read the contents of the file
+ DWORD bytes_read;
+ if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ // Check the number of bytes read
+ if (bytes_read != file_size) {
+ xfree(source);
+ pm_string_file_handle_close(&handle);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ pm_string_file_handle_close(&handle);
+ *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
+ return PM_STRING_INIT_SUCCESS;
+#elif defined(PRISM_HAS_FILESYSTEM)
+ // Open the file for reading
+ int fd = open(filepath, O_RDONLY);
+ if (fd == -1) {
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ // Stat the file to get the file size
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ // Ensure it is a file and not a directory
+ if (S_ISDIR(sb.st_mode)) {
+ close(fd);
+ return PM_STRING_INIT_ERROR_DIRECTORY;
+ }
+
+ // Check the size to see if it's empty
+ size_t size = (size_t) sb.st_size;
+ if (size == 0) {
+ close(fd);
+ *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+ return PM_STRING_INIT_SUCCESS;
+ }
+
+ size_t length = (size_t) size;
+ uint8_t *source = xmalloc(length);
+ if (source == NULL) {
+ close(fd);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ long bytes_read = (long) read(fd, source, length);
+ close(fd);
+
+ if (bytes_read == -1) {
+ xfree(source);
+ return PM_STRING_INIT_ERROR_GENERIC;
+ }
+
+ *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
+ return PM_STRING_INIT_SUCCESS;
+#else
+ (void) string;
+ (void) filepath;
+ perror("pm_string_file_init is not implemented for this platform");
+ return PM_STRING_INIT_ERROR_GENERIC;
+#endif
}
/**
@@ -168,7 +317,7 @@ pm_string_ensure_owned(pm_string_t *string) {
size_t length = pm_string_length(string);
const uint8_t *source = pm_string_source(string);
- uint8_t *memory = malloc(length);
+ uint8_t *memory = xmalloc(length);
if (!memory) return;
pm_string_owned_init(string, memory, length);
@@ -176,6 +325,26 @@ pm_string_ensure_owned(pm_string_t *string) {
}
/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int
+pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
+ size_t left_length = pm_string_length(left);
+ size_t right_length = pm_string_length(right);
+
+ if (left_length < right_length) {
+ return -1;
+ } else if (left_length > right_length) {
+ return 1;
+ }
+
+ return memcmp(pm_string_source(left), pm_string_source(right), left_length);
+}
+
+/**
* Returns the length associated with the string.
*/
PRISM_EXPORTED_FUNCTION size_t
@@ -199,12 +368,14 @@ pm_string_free(pm_string_t *string) {
void *memory = (void *) string->source;
if (string->type == PM_STRING_OWNED) {
- free(memory);
+ xfree(memory);
+#ifdef PRISM_HAS_MMAP
} else if (string->type == PM_STRING_MAPPED && string->length) {
#if defined(_WIN32)
UnmapViewOfFile(memory);
-#else
+#elif defined(_POSIX_MAPPED_FILES)
munmap(memory, string->length);
#endif
+#endif /* PRISM_HAS_MMAP */
}
}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index ddb153784f..d8456ff294 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -9,6 +9,7 @@
#include "prism/defines.h"
#include <assert.h>
+#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
@@ -17,11 +18,13 @@
// The following headers are necessary to read files using demand paging.
#ifdef _WIN32
#include <windows.h>
-#else
+#elif defined(_POSIX_MAPPED_FILES)
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
-#include <unistd.h>
+#elif defined(PRISM_HAS_FILESYSTEM)
+#include <fcntl.h>
+#include <sys/stat.h>
#endif
/**
@@ -42,11 +45,13 @@ typedef struct {
/** This is a slice of another string, and should not be freed. */
PM_STRING_SHARED,
- /** This string owns its memory, and should be freed using `pm_string_free`. */
+ /** This string owns its memory, and should be freed using `pm_string_free()`. */
PM_STRING_OWNED,
- /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
+#ifdef PRISM_HAS_MMAP
+ /** This string is a memory-mapped file, and should be freed using `pm_string_free()`. */
PM_STRING_MAPPED
+#endif
} type;
} pm_string_t;
@@ -92,6 +97,26 @@ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
/**
+ * Represents the result of calling pm_string_mapped_init or
+ * pm_string_file_init. We need this additional information because there is
+ * not a platform-agnostic way to indicate that the file that was attempted to
+ * be opened was a directory.
+ */
+typedef enum {
+ /** Indicates that the string was successfully initialized. */
+ PM_STRING_INIT_SUCCESS = 0,
+ /**
+ * Indicates a generic error from a string_*_init function, where the type
+ * of error should be read from `errno` or `GetLastError()`.
+ */
+ PM_STRING_INIT_ERROR_GENERIC = 1,
+ /**
+ * Indicates that the file that was attempted to be opened was a directory.
+ */
+ PM_STRING_INIT_ERROR_DIRECTORY = 2
+} pm_string_init_result_t;
+
+/**
* Read the file indicated by the filepath parameter into source and load its
* contents and size into the given `pm_string_t`. The given `pm_string_t`
* should be freed using `pm_string_free` when it is no longer used.
@@ -104,17 +129,24 @@ void pm_string_constant_init(pm_string_t *string, const char *source, size_t len
*
* @param string The string to initialize.
* @param filepath The filepath to read.
- * @return Whether or not the file was successfully mapped.
+ * @return The success of the read, indicated by the value of the enum.
+ *
+ * \public \memberof pm_string_t
*/
-PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
/**
- * Returns the memory size associated with the string.
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
*
- * @param string The string to get the memory size of.
- * @return The size of the memory associated with the string.
+ * @param string The string to initialize.
+ * @param filepath The filepath to read.
+ * @return The success of the read, indicated by the value of the enum.
+ *
+ * \public \memberof pm_string_t
*/
-size_t pm_string_memsize(const pm_string_t *string);
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
/**
* Ensure the string is owned. If it is not, then reinitialize it as owned and
@@ -125,10 +157,24 @@ size_t pm_string_memsize(const pm_string_t *string);
void pm_string_ensure_owned(pm_string_t *string);
/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ *
+ * @param left The left string to compare.
+ * @param right The right string to compare.
+ * @return The comparison result.
+ */
+int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
+
+/**
* Returns the length associated with the string.
*
* @param string The string to get the length of.
* @return The length of the string.
+ *
+ * \public \memberof pm_string_t
*/
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
@@ -137,6 +183,8 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
*
* @param string The string to get the start pointer of.
* @return The start pointer of the string.
+ *
+ * \public \memberof pm_string_t
*/
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
@@ -144,6 +192,8 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *stri
* Free the associated memory of the given string.
*
* @param string The string to free.
+ *
+ * \public \memberof pm_string_t
*/
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
diff --git a/prism/util/pm_string_list.c b/prism/util/pm_string_list.c
deleted file mode 100644
index d49e4ed734..0000000000
--- a/prism/util/pm_string_list.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include "prism/util/pm_string_list.h"
-
-/**
- * Append a pm_string_t to the given string list.
- */
-void
-pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
- if (string_list->length + 1 > string_list->capacity) {
- if (string_list->capacity == 0) {
- string_list->capacity = 1;
- } else {
- string_list->capacity *= 2;
- }
-
- string_list->strings = realloc(string_list->strings, string_list->capacity * sizeof(pm_string_t));
- if (string_list->strings == NULL) abort();
- }
-
- string_list->strings[string_list->length++] = *string;
-}
-
-/**
- * Free the memory associated with the string list
- */
-void
-pm_string_list_free(pm_string_list_t *string_list) {
- free(string_list->strings);
-}
diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h
deleted file mode 100644
index 0d406cc5d8..0000000000
--- a/prism/util/pm_string_list.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * @file pm_string_list.h
- *
- * A list of strings.
- */
-#ifndef PRISM_STRING_LIST_H
-#define PRISM_STRING_LIST_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_string.h"
-
-#include <stddef.h>
-#include <stdlib.h>
-
-/**
- * A list of strings.
- */
-typedef struct {
- /** The length of the string list. */
- size_t length;
-
- /** The capacity of the string list that has been allocated. */
- size_t capacity;
-
- /** A pointer to the start of the string list. */
- pm_string_t *strings;
-} pm_string_list_t;
-
-/**
- * Append a pm_string_t to the given string list.
- *
- * @param string_list The string list to append to.
- * @param string The string to append.
- */
-void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
-
-/**
- * Free the memory associated with the string list.
- *
- * @param string_list The string list to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
-
-#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
index 2240bf8110..3f58421554 100644
--- a/prism/util/pm_strncasecmp.c
+++ b/prism/util/pm_strncasecmp.c
@@ -1,6 +1,18 @@
#include "prism/util/pm_strncasecmp.h"
/**
+ * A locale-insensitive version of `tolower(3)`
+ */
+static inline int
+pm_tolower(int c)
+{
+ if ('A' <= c && c <= 'Z') {
+ return c | 0x20;
+ }
+ return c;
+}
+
+/**
* Compare two strings, ignoring case, up to the given length. Returns 0 if the
* strings are equal, a negative number if string1 is less than string2, or a
* positive number if string1 is greater than string2.
@@ -16,7 +28,7 @@ pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
while (offset < length && string1[offset] != '\0') {
if (string2[offset] == '\0') return string1[offset];
- if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
+ if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
offset++;
}
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
index 115eba1fd2..916a4cc3fd 100644
--- a/prism/util/pm_strpbrk.c
+++ b/prism/util/pm_strpbrk.c
@@ -1,10 +1,39 @@
#include "prism/util/pm_strpbrk.h"
/**
- * This is the slow path that does care about the encoding.
+ * Add an invalid multibyte character error to the parser.
+ */
+static inline void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
+}
+
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static inline void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
+ if (parser->explicit_encoding != NULL) {
+ if (parser->explicit_encoding == parser->encoding) {
+ // Okay, we already locked to this encoding.
+ } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ // Not okay, we already found a Unicode escape sequence and this
+ // conflicts.
+ pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ } else {
+ // Should not be anything else.
+ assert(false && "unreachable");
+ }
+ }
+
+ parser->explicit_encoding = parser->encoding;
+}
+
+/**
+ * This is the default path.
*/
static inline const uint8_t *
-pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
while (index < maximum) {
@@ -12,22 +41,39 @@ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const ui
return source + index;
}
- size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
- if (width == 0) {
- return NULL;
- }
+ if (source[index] < 0x80) {
+ index++;
+ } else {
+ size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+
+ if (width > 0) {
+ index += width;
+ } else if (!validate) {
+ index++;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
- index += width;
+ do {
+ index++;
+ } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+ }
+ }
}
return NULL;
}
/**
- * This is the fast path that does not care about the encoding.
+ * This is the path when the encoding is ASCII-8BIT.
*/
static inline const uint8_t *
-pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
while (index < maximum) {
@@ -35,6 +81,7 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
return source + index;
}
+ if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
index++;
}
@@ -42,6 +89,89 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
}
/**
+ * This is the slow path that does care about the encoding.
+ */
+static inline const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+ size_t index = 0;
+ const pm_encoding_t *encoding = parser->encoding;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (source[index] < 0x80) {
+ index++;
+ } else {
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+ if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+ if (width > 0) {
+ index += width;
+ } else if (!validate) {
+ index++;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
+
+ do {
+ index++;
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static inline const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+ size_t index = 0;
+ const pm_encoding_t *encoding = parser->encoding;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (source[index] < 0x80 || !validate) {
+ index++;
+ } else {
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+ pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+ if (width > 0) {
+ index += width;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
+
+ do {
+ index++;
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
* Here we have rolled our own version of strpbrk. The standard library strpbrk
* has undefined behavior when the source string is not null-terminated. We want
* to support strings that are not null-terminated because pm_parse does not
@@ -57,16 +187,20 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
*
* Finally, we want to support encodings wherein the charset could contain
* characters that are trailing bytes of multi-byte characters. For example, in
- * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
* need to take a slower path and iterate one multi-byte character at a time.
*/
const uint8_t *
-pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
if (length <= 0) {
return NULL;
- } else if (parser->encoding_changed && parser->encoding->multibyte) {
- return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
+ } else if (!parser->encoding_changed) {
+ return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+ } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+ return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
+ } else if (parser->encoding->multibyte) {
+ return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
} else {
- return pm_strpbrk_single_byte(source, charset, (size_t) length);
+ return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
}
}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
index c1cf0d54db..f387bd5782 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/util/pm_strpbrk.h
@@ -7,6 +7,7 @@
#define PRISM_STRPBRK_H
#include "prism/defines.h"
+#include "prism/diagnostic.h"
#include "prism/parser.h"
#include <stddef.h>
@@ -35,9 +36,11 @@
* @param source The source to search.
* @param charset The charset to search for.
* @param length The maximum number of bytes to search.
+ * @param validate Whether to validate that the source string is valid in the
+ * current encoding of the parser.
* @return A pointer to the first character in the source string that is in the
* charset, or NULL if no such character exists.
*/
-const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
+const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
#endif
diff --git a/prism/version.h b/prism/version.h
index 1472c58be6..0ef7435c17 100644
--- a/prism/version.h
+++ b/prism/version.h
@@ -9,12 +9,12 @@
/**
* The major version of the Prism library as an int.
*/
-#define PRISM_VERSION_MAJOR 0
+#define PRISM_VERSION_MAJOR 1
/**
* The minor version of the Prism library as an int.
*/
-#define PRISM_VERSION_MINOR 19
+#define PRISM_VERSION_MINOR 8
/**
* The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
/**
* The version of the Prism library as a constant string.
*/
-#define PRISM_VERSION "0.19.0"
+#define PRISM_VERSION "1.8.0"
#endif