diff options
author | Kevin Newton <kddnewton@gmail.com> | 2024-03-04 09:27:18 -0500 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2024-03-04 14:39:52 +0000 |
commit | 5856ea3fd16437279cb0a2e137e4a2b58fc655cb (patch) | |
tree | f2da33982d983ea8a616fbf8569bf72beed725f0 /lib/prism/translation/parser | |
parent | 2c787bf90ff4176970f6dea331401e8246fdde95 (diff) |
[ruby/prism] Fix up some minor parser incompatibilities
https://github.com/ruby/prism/commit/c6c771d1fa
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r-- | lib/prism/translation/parser/lexer.rb | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 5c993cfab8..b710b1981b 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -177,12 +177,23 @@ module Prism WORDS_SEP: :tSPACE } - private_constant :TYPES + # These constants represent flags in our lex state. We really, really + # don't want to be using them and we really, really don't want to be + # exposing them as part of our public API. Unfortunately, we don't have + # another way of matching the exact tokens that the parser gem expects + # without them. We should find another way to do this, but in the + # meantime we'll hide them from the documentation and mark them as + # private constants. + EXPR_BEG = 0x1 # :nodoc: + EXPR_LABEL = 0x400 # :nodoc: + + private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL # The Parser::Source::Buffer that the tokens were lexed from. attr_reader :source_buffer - # An array of prism tokens that we lexed. + # An array of tuples that contain prism tokens and their associated lex + # state when they were lexed. attr_reader :lexed # A hash that maps offsets in bytes to offsets in characters. @@ -205,9 +216,9 @@ module Prism index = 0 while index < lexed.length - token, = lexed[index] + token, state = lexed[index] index += 1 - next if token.type == :IGNORED_NEWLINE || token.type == :EOF + next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) type = TYPES.fetch(token.type) value = token.value @@ -218,13 +229,13 @@ module Prism value.delete_prefix!("?") when :tCOMMENT if token.type == :EMBDOC_BEGIN - until (next_token = lexed[index]) && next_token.type == :EMBDOC_END + until (next_token = lexed[index][0]) && next_token.type == :EMBDOC_END value += next_token.value index += 1 end value += next_token.value - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset]) + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) index += 1 else value.chomp! @@ -247,6 +258,8 @@ module Prism value.chomp!(":") when :tLABEL_END value.chomp!(":") + when :tLCURLY + type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL when :tNTH_REF value = Integer(value.delete_prefix("$")) when :tOP_ASGN @@ -256,13 +269,13 @@ module Prism when :tSPACE value = nil when :tSTRING_BEG - if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END + if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 - elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END + elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END next_location = token.location.join(next_next_token.location) type = :tSTRING value = next_token.value @@ -280,7 +293,7 @@ module Prism location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) end when :tSYMBEG - if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR + if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value |