diff options
author | yui-knk <spiketeika@gmail.com> | 2022-09-23 22:40:02 +0900 |
---|---|---|
committer | Yuichiro Kaneko <spiketeika@gmail.com> | 2022-11-21 09:01:34 +0900 |
commit | d8601621edcf29e3323b90dcf04b774edd9fb45e (patch) | |
tree | fd9b08ca7008ce5ad7f8acafd83accff2347697f /ast.c | |
parent | bbc4cf5f76c0e17e645ba81ae0b3625c8cbfc4c4 (diff) |
Enhance keep_tokens option for RubyVM::AbstractSyntaxTree parsing methods
Implementation for Language Server Protocol (LSP) sometimes needs token information.
For example both `m(1)` and `m(1, )` has same AST structure other than node locations
then it's impossible to check the existence of `,` from AST. However in later case,
it might be better to suggest variables list for the second argument.
Token information is important for such case.
This commit adds these methods.
* Add `keep_tokens` option for `RubyVM::AbstractSyntaxTree.parse`, `.parse_file` and `.of`
* Add `RubyVM::AbstractSyntaxTree::Node#tokens` which returns tokens for the node including tokens for descendants nodes.
* Add `RubyVM::AbstractSyntaxTree::Node#all_tokens` which returns all tokens for the input script regardless the receiver node.
[Feature #19070]
Impacts on memory usage and performance are below:
Memory usage:
```
$ cat test.rb
root = RubyVM::AbstractSyntaxTree.parse_file(File.expand_path('../test/ruby/test_keyword.rb', __FILE__), keep_tokens: true)
$ /usr/bin/time -f %Mkb /usr/local/bin/ruby -v
ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux]
11408kb
# keep_tokens :false
$ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb
17508kb
# keep_tokens :true
$ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb
30960kb
```
Performance:
```
$ cat ../ast_keep_tokens.yml
prelude: |
src = <<~SRC
module M
class C
def m1(a, b)
1 + a + b
end
end
end
SRC
benchmark:
without_keep_tokens: |
RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: false)
with_keep_tokens: |
RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: true)
$ make benchmark COMPARE_RUBY="./ruby" ARGS=../ast_keep_tokens.yml
/home/kaneko.y/.rbenv/shims/ruby --disable=gems -rrubygems -I../benchmark/lib ../benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::./ruby -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby -I../lib -I. -I.ext/common ../tool/runruby.rb --extout=.ext -- --disable-gems --disable-gem" \
--output=markdown --output-compare -v ../ast_keep_tokens.yml
compare-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux]
built-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux]
warming up..
| |compare-ruby|built-ruby|
|:--------------------|-----------:|---------:|
|without_keep_tokens | 21.659k| 21.303k|
| | 1.02x| -|
|with_keep_tokens | 6.220k| 5.691k|
| | 1.09x| -|
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6770
Diffstat (limited to 'ast.c')
-rw-r--r-- | ast.c | 38 |
1 files changed, 25 insertions, 13 deletions
@@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node) return obj; } -static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant); -static VALUE rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant); +static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens); +static VALUE rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens); static VALUE ast_parse_new(void) @@ -85,13 +85,13 @@ ast_parse_done(rb_ast_t *ast) } static VALUE -ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant) +ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { - return rb_ast_parse_str(str, keep_script_lines, error_tolerant); + return rb_ast_parse_str(str, keep_script_lines, error_tolerant, keep_tokens); } static VALUE -rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant) +rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { rb_ast_t *ast = 0; @@ -99,18 +99,19 @@ rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant) VALUE vparser = ast_parse_new(); if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser); if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); + if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); ast = rb_parser_compile_string_path(vparser, Qnil, str, 1); return ast_parse_done(ast); } static VALUE -ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE keep_script_lines, VALUE error_tolerant) +ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { - return rb_ast_parse_file(path, keep_script_lines, error_tolerant); + return rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens); } static VALUE -rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant) +rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { VALUE f; rb_ast_t *ast = 0; @@ -122,6 +123,7 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant) VALUE vparser = ast_parse_new(); if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser); if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); + if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); ast = rb_parser_compile_file_path(vparser, Qnil, f, 1); rb_io_close(f); return ast_parse_done(ast); @@ -141,7 +143,7 @@ lex_array(VALUE array, int index) } static VALUE -rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant) +rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { rb_ast_t *ast = 0; @@ -149,6 +151,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant) VALUE vparser = ast_parse_new(); if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser); if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); + if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1); return ast_parse_done(ast); } @@ -208,7 +211,7 @@ node_id_for_backtrace_location(rb_execution_context_t *ec, VALUE module, VALUE l } static VALUE -ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script_lines, VALUE error_tolerant) +ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { VALUE node, lines = Qnil; const rb_iseq_t *iseq; @@ -247,13 +250,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script } if (!NIL_P(lines) || !NIL_P(lines = script_lines(path))) { - node = rb_ast_parse_array(lines, keep_script_lines, error_tolerant); + node = rb_ast_parse_array(lines, keep_script_lines, error_tolerant, keep_tokens); } else if (e_option) { - node = rb_ast_parse_str(rb_e_script, keep_script_lines, error_tolerant); + node = rb_ast_parse_str(rb_e_script, keep_script_lines, error_tolerant, keep_tokens); } else { - node = rb_ast_parse_file(path, keep_script_lines, error_tolerant); + node = rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens); } return node_find(node, node_id); @@ -716,6 +719,15 @@ ast_node_last_column(rb_execution_context_t *ec, VALUE self) } static VALUE +ast_node_all_tokens(rb_execution_context_t *ec, VALUE self) +{ + struct ASTNodeData *data; + TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data); + + return rb_ast_tokens(data->ast); +} + +static VALUE ast_node_inspect(rb_execution_context_t *ec, VALUE self) { VALUE str; |