diff options
| author | Earlopain <14981592+Earlopain@users.noreply.github.com> | 2025-03-11 09:05:21 +0100 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2025-03-13 13:52:45 +0000 |
| commit | 67e6ccb23fd910b70b0d690adcb56395778c9f2a (patch) | |
| tree | 5dc120eb6ed80fc080e27aa8bbae5237e64e4bcf | |
| parent | ee1f39ef882e7ce175794e6286c0dcafba0bfa35 (diff) | |
[ruby/prism] Optimize array inclusion checks in the parser translator
I see `Array.include?` as 2.4% runtime. Probably because of `LPAREN_CONVERSION_TOKEN_TYPES` but
the others will be faster as well.
Also remove some inline array checks. They are specifically optimized in Ruby since 3.4, but for now prism is for >= 2.7
https://github.com/ruby/prism/commit/ca9500a3fc
| -rw-r--r-- | lib/prism/translation/parser/lexer.rb | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 1fa2723f03..0d247be117 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "set" require "strscan" module Prism @@ -8,16 +9,17 @@ module Prism # Accepts a list of prism tokens and converts them into the expected # format for the parser gem. class Lexer + # These tokens are always skipped + TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set + private_constant :TYPES_ALWAYS_SKIP + # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - EOF: nil, MISSING: nil, NOT_PROVIDED: nil, - IGNORED_NEWLINE: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, - __END__: nil, # These tokens have more or less direct mappings. AMPERSAND: :tAMPER2, @@ -193,18 +195,18 @@ module Prism # # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046 - LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG] + LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. # The following token types are listed as those classified as `tLPAREN`. LPAREN_CONVERSION_TOKEN_TYPES = [ :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS - ] + ].to_set # Types of tokens that are allowed to continue a method call with comments in-between. # For these, the parser gem doesn't emit a newline token after the last comment. - COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT] + COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set private_constant :COMMENT_CONTINUATION_TYPES # Heredocs are complex and require us to keep track of a bit of info to refer to later @@ -251,7 +253,7 @@ module Prism while index < length token, state = lexed[index] index += 1 - next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) + next if TYPES_ALWAYS_SKIP.include?(token.type) type = TYPES.fetch(token.type) value = token.value @@ -342,7 +344,7 @@ module Prism when :tSTRING_BEG next_token = lexed[index][0] next_next_token = lexed[index + 1][0] - basic_quotes = ["\"", "'"].include?(value) + basic_quotes = value == '"' || value == "'" if basic_quotes && next_token&.type == :STRING_END next_location = token.location.join(next_token.location) |
