summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2025-03-11 09:05:21 +0100
committergit <svn-admin@ruby-lang.org>2025-03-13 13:52:45 +0000
commit67e6ccb23fd910b70b0d690adcb56395778c9f2a (patch)
tree5dc120eb6ed80fc080e27aa8bbae5237e64e4bcf
parentee1f39ef882e7ce175794e6286c0dcafba0bfa35 (diff)
[ruby/prism] Optimize array inclusion checks in the parser translator
I see `Array.include?` as 2.4% runtime. Probably because of `LPAREN_CONVERSION_TOKEN_TYPES` but the others will be faster as well. Also remove some inline array checks. They are specifically optimized in Ruby since 3.4, but for now prism is for >= 2.7 https://github.com/ruby/prism/commit/ca9500a3fc
-rw-r--r--lib/prism/translation/parser/lexer.rb18
1 files changed, 10 insertions, 8 deletions
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 1fa2723f03..0d247be117 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -1,5 +1,6 @@
# frozen_string_literal: true
+require "set"
require "strscan"
module Prism
@@ -8,16 +9,17 @@ module Prism
# Accepts a list of prism tokens and converts them into the expected
# format for the parser gem.
class Lexer
+ # These tokens are always skipped
+ TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set
+ private_constant :TYPES_ALWAYS_SKIP
+
# The direct translating of types between the two lexers.
TYPES = {
# These tokens should never appear in the output of the lexer.
- EOF: nil,
MISSING: nil,
NOT_PROVIDED: nil,
- IGNORED_NEWLINE: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,
- __END__: nil,
# These tokens have more or less direct mappings.
AMPERSAND: :tAMPER2,
@@ -193,18 +195,18 @@ module Prism
#
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
- LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]
+ LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
# The following token types are listed as those classified as `tLPAREN`.
LPAREN_CONVERSION_TOKEN_TYPES = [
:kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
- ]
+ ].to_set
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
- COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT]
+ COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set
private_constant :COMMENT_CONTINUATION_TYPES
# Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -251,7 +253,7 @@ module Prism
while index < length
token, state = lexed[index]
index += 1
- next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
+ next if TYPES_ALWAYS_SKIP.include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
@@ -342,7 +344,7 @@ module Prism
when :tSTRING_BEG
next_token = lexed[index][0]
next_next_token = lexed[index + 1][0]
- basic_quotes = ["\"", "'"].include?(value)
+ basic_quotes = value == '"' || value == "'"
if basic_quotes && next_token&.type == :STRING_END
next_location = token.location.join(next_token.location)