From d8601621edcf29e3323b90dcf04b774edd9fb45e Mon Sep 17 00:00:00 2001 From: yui-knk Date: Fri, 23 Sep 2022 22:40:02 +0900 Subject: Enhance keep_tokens option for RubyVM::AbstractSyntaxTree parsing methods Implementation for Language Server Protocol (LSP) sometimes needs token information. For example both `m(1)` and `m(1, )` has same AST structure other than node locations then it's impossible to check the existence of `,` from AST. However in later case, it might be better to suggest variables list for the second argument. Token information is important for such case. This commit adds these methods. * Add `keep_tokens` option for `RubyVM::AbstractSyntaxTree.parse`, `.parse_file` and `.of` * Add `RubyVM::AbstractSyntaxTree::Node#tokens` which returns tokens for the node including tokens for descendants nodes. * Add `RubyVM::AbstractSyntaxTree::Node#all_tokens` which returns all tokens for the input script regardless the receiver node. [Feature #19070] Impacts on memory usage and performance are below: Memory usage: ``` $ cat test.rb root = RubyVM::AbstractSyntaxTree.parse_file(File.expand_path('../test/ruby/test_keyword.rb', __FILE__), keep_tokens: true) $ /usr/bin/time -f %Mkb /usr/local/bin/ruby -v ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] 11408kb # keep_tokens :false $ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb 17508kb # keep_tokens :true $ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb 30960kb ``` Performance: ``` $ cat ../ast_keep_tokens.yml prelude: | src = <<~SRC module M class C def m1(a, b) 1 + a + b end end end SRC benchmark: without_keep_tokens: | RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: false) with_keep_tokens: | RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: true) $ make benchmark COMPARE_RUBY="./ruby" ARGS=../ast_keep_tokens.yml /home/kaneko.y/.rbenv/shims/ruby --disable=gems -rrubygems -I../benchmark/lib ../benchmark/benchmark-driver/exe/benchmark-driver \ --executables="compare-ruby::./ruby -I.ext/common --disable-gem" \ --executables="built-ruby::./miniruby -I../lib -I. -I.ext/common ../tool/runruby.rb --extout=.ext -- --disable-gems --disable-gem" \ --output=markdown --output-compare -v ../ast_keep_tokens.yml compare-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] built-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] warming up.. | |compare-ruby|built-ruby| |:--------------------|-----------:|---------:| |without_keep_tokens | 21.659k| 21.303k| | | 1.02x| -| |with_keep_tokens | 6.220k| 5.691k| | | 1.09x| -| ``` --- test/ruby/test_ast.rb | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'test/ruby') diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index acf6722bb7..c3de36b4fb 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -132,6 +132,34 @@ class TestAst < Test::Unit::TestCase end end + Dir.glob("test/**/*.rb", base: SRCDIR).each do |path| + define_method("test_all_tokens:#{path}") do + node = RubyVM::AbstractSyntaxTree.parse_file("#{SRCDIR}/#{path}", keep_tokens: true) + tokens = node.all_tokens.sort_by { [_1.last[0], _1.last[1]] } + tokens_bytes = tokens.map { _1[2]}.join.bytes + source_bytes = File.read("#{SRCDIR}/#{path}").bytes + + assert_equal(source_bytes, tokens_bytes) + + (tokens.count - 1).times do |i| + token_0 = tokens[i] + token_1 = tokens[i + 1] + end_pos = token_0.last[2..3] + beg_pos = token_1.last[0..1] + + if end_pos[0] == beg_pos[0] + # When both tokens are same line, column should be consecutives + assert_equal(beg_pos[1], end_pos[1], "#{token_0}. #{token_1}") + else + # Line should be next + assert_equal(beg_pos[0], end_pos[0] + 1, "#{token_0}. #{token_1}") + # It should be on the beginning of the line + assert_equal(0, beg_pos[1], "#{token_0}. #{token_1}") + end + end + end + end + private def parse(src) EnvUtil.suppress_warning { RubyVM::AbstractSyntaxTree.parse(src) @@ -705,11 +733,11 @@ dummy a = 1 else STR - (SCOPE@1:0-3:5 + (SCOPE@1:0-3:4 tbl: [:a] args: nil body: - (IF@1:0-3:5 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) + (IF@1:0-3:4 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) (BEGIN@3:4-3:4 nil))) EXP end @@ -732,11 +760,11 @@ dummy a = 1 else STR - (SCOPE@1:0-3:5 + (SCOPE@1:0-3:4 tbl: [:a] args: nil body: - (UNLESS@1:0-3:5 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) + (UNLESS@1:0-3:4 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1)) (BEGIN@3:4-3:4 nil))) EXP end -- cgit v1.2.3