diff options
| author | Benoit Daloze <eregontp@gmail.com> | 2024-02-08 16:26:27 +0100 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2024-02-14 15:48:32 +0000 |
| commit | f0f6ffef4252fcc899fe2f039b910fc7613d00aa (patch) | |
| tree | 1fd688e83b79528d07cd38e55917057f4a9d43dd | |
| parent | 65f54355406a25f352241406967d038fc72d4737 (diff) | |
[ruby/prism] Serialize the newline_list to avoid recomputing it again later
* Fixes https://github.com/ruby/prism/issues/2380
https://github.com/ruby/prism/commit/4eaaa90114
| -rw-r--r-- | lib/prism/parse_result.rb | 20 | ||||
| -rw-r--r-- | prism/extension.c | 23 | ||||
| -rw-r--r-- | prism/extension.h | 3 | ||||
| -rw-r--r-- | prism/templates/ext/prism/api_node.c.erb | 25 | ||||
| -rw-r--r-- | prism/templates/lib/prism/serialize.rb.erb | 6 | ||||
| -rw-r--r-- | prism/templates/src/serialize.c.erb | 37 |
6 files changed, 65 insertions, 49 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 4b0c57ed4b..785d2acf35 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -12,15 +12,13 @@ module Prism attr_accessor :start_line # The list of newline byte offsets in the source code. - attr_reader :offsets + attr_accessor :offsets - # Create a new source object with the given source code and newline byte - # offsets. If no newline byte offsets are given, they will be computed from - # the source code. - def initialize(source, start_line = 1, offsets = compute_offsets(source)) + # Create a new source object with the given source code. + def initialize(source) @source = source - @start_line = start_line - @offsets = offsets + @start_line = 1 # set after parsing is done + @offsets = [] # set after parsing is done end # Perform a byteslice on the source code using the given byte offset and @@ -94,14 +92,6 @@ module Prism left - 1 end - - # Find all of the newlines in the source code and return their byte offsets - # from the start of the string an array. - def compute_offsets(code) - offsets = [0] - code.b.scan("\n") { offsets << $~.end(0) } - offsets - end end # This represents a location in the source. diff --git a/prism/extension.c b/prism/extension.c index c20ce5b525..5ea6f57ca5 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -542,9 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); - VALUE offsets = rb_ary_new(); - VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets }; - VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource); + VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); + VALUE source_argv[] = { source_string }; + VALUE source = rb_class_new_instance(1, source_argv, rb_cPrismSource); parse_lex_data_t parse_lex_data = { .source = source, @@ -561,17 +561,18 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod parser.lex_callback = &lex_callback; pm_node_t *node = pm_parse(&parser); - // Here we need to update the source range to have the correct newline - // offsets. We do it here because we've already created the object and given - // it over to all of the tokens. - for (size_t index = 0; index < parser.newline_list.size; index++) { - rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index])); - } + // Here we need to update the Source object to have the correct + // encoding for the source string and the correct newline offsets. + // We do it here because we've already created the Source object and given + // it over to all of the tokens, and both of these are only set after pm_parse(). + rb_encoding *encoding = rb_enc_find(parser.encoding->name); + rb_enc_associate(source_string, encoding); + pm_source_init(source, &parser); VALUE value; if (return_nodes) { value = rb_ary_new_capa(2); - rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding)); + rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source)); rb_ary_push(value, parse_lex_data.tokens); } else { value = parse_lex_data.tokens; @@ -650,7 +651,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { VALUE source = pm_source_new(&parser, encoding); VALUE result_argv[] = { - pm_ast_new(&parser, node, encoding), + pm_ast_new(&parser, node, encoding, source), parser_comments(&parser, source), parser_magic_comments(&parser, source), parser_data_loc(&parser, source), diff --git a/prism/extension.h b/prism/extension.h index 20538d133b..83cb9f3942 100644 --- a/prism/extension.h +++ b/prism/extension.h @@ -8,8 +8,9 @@ #include "prism.h" VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding); +void pm_source_init(VALUE source, pm_parser_t *parser); VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source); -VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding); +VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source); void Init_prism_api_node(void); void Init_prism_pack(void); diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 20b3810715..8bcd4f402b 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -36,18 +36,26 @@ pm_string_new(pm_string_t *string, rb_encoding *encoding) { return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding); } -// Create a Prism::Source object from the given parser. +// Create a Prism::Source object from the given parser, after pm_parse() was called. VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding) { - VALUE source = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding); - VALUE offsets = rb_ary_new_capa(parser->newline_list.size); + VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding); + VALUE source_argv[] = { source_string }; + VALUE source = rb_class_new_instance(1, source_argv, rb_cPrismSource); + + pm_source_init(source, parser); + return source; +} +void +pm_source_init(VALUE source, pm_parser_t *parser) { + rb_funcall(source, rb_intern("start_line="), 1, LONG2NUM(parser->start_line)); + + VALUE offsets = rb_ary_new_capa(parser->newline_list.size); for (size_t index = 0; index < parser->newline_list.size; index++) { - rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index])); + rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index])); } - - VALUE source_argv[] = { source, LONG2NUM(parser->start_line), offsets }; - return rb_class_new_instance(3, source_argv, rb_cPrismSource); + rb_funcall(source, rb_intern("offsets="), 1, offsets); } typedef struct pm_node_stack_node { @@ -77,8 +85,7 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) { } VALUE -pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) { - VALUE source = pm_source_new(parser, encoding); +pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source) { ID *constants = calloc(parser->constant_pool.size, sizeof(ID)); for (uint32_t index = 0; index < parser->constant_pool.size; index++) { diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index e5681007b3..500a980341 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -82,6 +82,10 @@ module Prism source.start_line = load_varsint end + def load_line_offsets + source.offsets = load_varuint.times.map { load_varuint } + end + def load_comments load_varuint.times.map do case load_varuint @@ -118,6 +122,7 @@ module Prism tokens = load_tokens encoding = load_encoding load_start_line + load_line_offsets comments, magic_comments, data_loc, errors, warnings = load_metadata tokens.each { |token,| token.value.force_encoding(encoding) } @@ -129,6 +134,7 @@ module Prism load_header load_encoding load_start_line + load_line_offsets comments, magic_comments, data_loc, errors, warnings = load_metadata diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index deda01f29c..c9511bbfca 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -129,6 +129,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { } static void +pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) { + uint32_t size = pm_sizet_to_u32(list->size); + pm_buffer_append_varuint(buffer, size); + + for (uint32_t i = 0; i < size; i++) { + uint32_t offset = pm_sizet_to_u32(list->offsets[i]); + pm_buffer_append_varuint(buffer, offset); + } +} + +static void pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) { // serialize type pm_buffer_append_byte(buffer, (uint8_t) comment->type); @@ -214,14 +225,11 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) { pm_buffer_append_string(buffer, encoding->name, encoding_length); } -#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>" -/** - * Serialize the encoding, metadata, nodes, and constant pool. - */ -void -pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { +static void +pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_serialize_encoding(parser->encoding, buffer); pm_buffer_append_varsint(buffer, parser->start_line); + pm_serialize_newline_list(&parser->newline_list, buffer); <%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> pm_serialize_comment_list(parser, &parser->comment_list, buffer); <%- end -%> @@ -229,6 +237,15 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) pm_serialize_data_loc(parser, buffer); pm_serialize_diagnostic_list(parser, &parser->error_list, buffer); pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer); +} + +#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>" +/** + * Serialize the metadata, nodes, and constant pool. + */ +void +pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { + pm_serialize_metadata(parser, buffer); // Here we're going to leave space for the offset of the constant pool in // the buffer. @@ -319,13 +336,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const // Append 0 to mark end of tokens. pm_buffer_append_byte(buffer, 0); - pm_serialize_encoding(parser.encoding, buffer); - pm_buffer_append_varsint(buffer, parser.start_line); - pm_serialize_comment_list(&parser, &parser.comment_list, buffer); - pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer); - pm_serialize_data_loc(&parser, buffer); - pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer); - pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer); + pm_serialize_metadata(&parser, buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); |
