diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-10-13 12:16:11 -0400 |
|---|---|---|
| committer | Jemma Issroff <jemmaissroff@gmail.com> | 2023-10-16 15:40:19 -0700 |
| commit | 5523a23469987f92e38d52d4332bde09bdd8896c (patch) | |
| tree | 01c4fcef6dea113e55ab1eedb59742a5ebad2b36 | |
| parent | 39dd3343d8672a70ebb0990c166d99a8b29ee19e (diff) | |
[ruby/prism] Attach magic comments to the parse result
https://github.com/ruby/prism/commit/c7ef25a79a
| -rw-r--r-- | lib/prism/ffi.rb | 4 | ||||
| -rw-r--r-- | lib/prism/lex_compat.rb | 2 | ||||
| -rw-r--r-- | lib/prism/parse_result.rb | 35 | ||||
| -rw-r--r-- | prism/extension.c | 37 | ||||
| -rw-r--r-- | prism/parser.h | 11 | ||||
| -rw-r--r-- | prism/prism.c | 25 | ||||
| -rw-r--r-- | prism/templates/lib/prism/serialize.rb.erb | 15 | ||||
| -rw-r--r-- | prism/templates/src/serialize.c.erb | 23 |
8 files changed, 136 insertions, 16 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 69b2b35bbb..cc7d94fb3f 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -234,11 +234,11 @@ module Prism loader = Serialize::Loader.new(source, buffer.read) tokens = loader.load_tokens - node, comments, errors, warnings = loader.load_nodes + node, comments, magic_comments, errors, warnings = loader.load_nodes tokens.each { |token,| token.value.force_encoding(loader.encoding) } - ParseResult.new([node, tokens], comments, errors, warnings, source) + ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source) end end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index d0679db669..a17d4eaadd 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -818,7 +818,7 @@ module Prism # We sort by location to compare against Ripper's output tokens.sort_by!(&:location) - ParseResult.new(tokens, result.comments, result.errors, result.warnings, []) + ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, []) end end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index b5e6643e6d..fd2737b20c 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -137,7 +137,7 @@ module Prism end def self.null - new(0, 0) + new(nil, 0, 0) end end @@ -166,6 +166,32 @@ module Prism end end + # This represents a magic comment that was encountered during parsing. + class MagicComment + attr_reader :key_loc, :value_loc + + def initialize(key_loc, value_loc) + @key_loc = key_loc + @value_loc = value_loc + end + + def key + key_loc.slice + end + + def value + value_loc.slice + end + + def deconstruct_keys(keys) + { key_loc: key_loc, value_loc: value_loc } + end + + def inspect + "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>" + end + end + # This represents an error that was encountered during parsing. class ParseError attr_reader :message, :location @@ -206,18 +232,19 @@ module Prism # the AST, any comments that were encounters, and any errors that were # encountered. class ParseResult - attr_reader :value, :comments, :errors, :warnings, :source + attr_reader :value, :comments, :magic_comments, :errors, :warnings, :source - def initialize(value, comments, errors, warnings, source) + def initialize(value, comments, magic_comments, errors, warnings, source) @value = value @comments = comments + @magic_comments = magic_comments @errors = errors @warnings = warnings @source = source end def deconstruct_keys(keys) - { value: value, comments: comments, errors: errors, warnings: warnings } + { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings } end def success? diff --git a/prism/extension.c b/prism/extension.c index d6e335f4f1..77d9881714 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -10,6 +10,7 @@ VALUE rb_cPrismToken; VALUE rb_cPrismLocation; VALUE rb_cPrismComment; +VALUE rb_cPrismMagicComment; VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; @@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) { return comments; } +// Extract the magic comments out of the parser into an array. +static VALUE +parser_magic_comments(pm_parser_t *parser, VALUE source) { + VALUE magic_comments = rb_ary_new(); + + for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { + VALUE key_loc_argv[] = { + source, + LONG2FIX(magic_comment->key_start - parser->start), + LONG2FIX(magic_comment->key_length) + }; + + VALUE value_loc_argv[] = { + source, + LONG2FIX(magic_comment->value_start - parser->start), + LONG2FIX(magic_comment->value_length) + }; + + VALUE magic_comment_argv[] = { + rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation), + rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation) + }; + + rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment)); + } + + return magic_comments; +} + // Extract the errors out of the parser into an array. static VALUE parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { @@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) { VALUE result_argv[] = { value, parser_comments(&parser, source), + parser_magic_comments(&parser, source), parser_errors(&parser, parse_lex_data.encoding, source), parser_warnings(&parser, parse_lex_data.encoding, source), source @@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) { pm_node_destroy(&parser, node); pm_parser_free(&parser); - return rb_class_new_instance(5, result_argv, rb_cPrismParseResult); + return rb_class_new_instance(6, result_argv, rb_cPrismParseResult); } // Return an array of tokens corresponding to the given string. @@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) { VALUE result_argv[] = { pm_ast_new(&parser, node, encoding), parser_comments(&parser, source), + parser_magic_comments(&parser, source), parser_errors(&parser, encoding, source), parser_warnings(&parser, encoding, source), source }; - VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult); + VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult); pm_node_destroy(&parser, node); pm_parser_free(&parser); @@ -547,6 +579,7 @@ Init_prism(void) { rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject); rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject); rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject); + rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject); rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject); rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject); rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject); diff --git a/prism/parser.h b/prism/parser.h index 027c3a92b5..f77d8818aa 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -250,6 +250,16 @@ typedef struct pm_comment { pm_comment_type_t type; } pm_comment_t; +// This is a node in the linked list of magic comments that we've found while +// parsing. +typedef struct { + pm_list_node_t node; + const uint8_t *key_start; + const uint8_t *value_start; + uint32_t key_length; + uint32_t value_length; +} pm_magic_comment_t; + // When the encoding that is being used to parse the source is changed by prism, // we provide the ability here to call out to a user-defined function. typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser); @@ -353,6 +363,7 @@ struct pm_parser { const uint8_t *heredoc_end; pm_list_t comment_list; // the list of comments that have been found while parsing + pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing. pm_list_t warning_list; // the list of warnings that have been found while parsing pm_list_t error_list; // the list of errors that have been found while parsing pm_scope_t *current_scope; // the current local scope diff --git a/prism/prism.c b/prism/prism.c index b7d4101e06..c0f726e796 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -5448,6 +5448,16 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // When we're done, we want to free the string in case we had to // allocate memory for it. pm_string_free(&key); + + // Allocate a new magic comment node to append to the parser's list. + pm_magic_comment_t *magic_comment; + if ((magic_comment = (pm_magic_comment_t *) malloc(sizeof(pm_magic_comment_t))) != NULL) { + magic_comment->key_start = key_start; + magic_comment->value_start = value_start; + magic_comment->key_length = (uint32_t) key_length; + magic_comment->value_length = (uint32_t) (value_end - value_start); + pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); + } } } @@ -15262,6 +15272,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch .next_start = NULL, .heredoc_end = NULL, .comment_list = PM_LIST_EMPTY, + .magic_comment_list = PM_LIST_EMPTY, .warning_list = PM_LIST_EMPTY, .error_list = PM_LIST_EMPTY, .current_scope = NULL, @@ -15356,6 +15367,19 @@ pm_comment_list_free(pm_list_t *list) { } } +// Free all of the memory associated with the magic comment list. +static inline void +pm_magic_comment_list_free(pm_list_t *list) { + pm_list_node_t *node, *next; + + for (node = list->head; node != NULL; node = next) { + next = node->next; + + pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node; + free(magic_comment); + } +} + // Free any memory associated with the given parser. PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) { @@ -15363,6 +15387,7 @@ pm_parser_free(pm_parser_t *parser) { pm_diagnostic_list_free(&parser->error_list); pm_diagnostic_list_free(&parser->warning_list); pm_comment_list_free(&parser->comment_list); + pm_magic_comment_list_free(&parser->magic_comment_list); pm_constant_pool_free(&parser->constant_pool); pm_newline_list_free(&parser->newline_list); diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 047bd3d99d..ef39b7f1e5 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -55,9 +55,10 @@ module Prism def load_metadata comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } + magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) } errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) } warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) } - [comments, errors, warnings] + [comments, magic_comments, errors, warnings] end def load_tokens @@ -76,14 +77,14 @@ module Prism def load_tokens_result tokens = load_tokens encoding = load_encoding - comments, errors, warnings = load_metadata + comments, magic_comments, errors, warnings = load_metadata if encoding != @encoding tokens.each { |token,| token.value.force_encoding(encoding) } end raise "Expected to consume all bytes while deserializing" unless @io.eof? - Prism::ParseResult.new(tokens, comments, errors, warnings, @source) + Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source) end def load_nodes @@ -97,17 +98,17 @@ module Prism @encoding = load_encoding @input = input.force_encoding(@encoding).freeze - comments, errors, warnings = load_metadata + comments, magic_comments, errors, warnings = load_metadata @constant_pool_offset = io.read(4).unpack1("L") @constant_pool = Array.new(load_varint, nil) - [load_node, comments, errors, warnings] + [load_node, comments, magic_comments, errors, warnings] end def load_result - node, comments, errors, warnings = load_nodes - Prism::ParseResult.new(node, comments, errors, warnings, @source) + node, comments, magic_comments, errors, warnings = load_nodes + Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source) end private diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index e9ebc31590..9ee179af7f 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -147,6 +147,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf } static void +pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { + // serialize key location + pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start)); + pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length)); + + // serialize value location + pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start)); + pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length)); +} + +static void +pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { + pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list))); + + pm_magic_comment_t *magic_comment; + for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { + pm_serialize_magic_comment(parser, magic_comment, buffer); + } +} + +static void pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) { // serialize message size_t message_length = strlen(diagnostic->message); @@ -180,6 +201,7 @@ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_encoding(&parser->encoding, buffer); pm_serialize_comment_list(parser, &parser->comment_list, buffer); + pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer); pm_serialize_diagnostic_list(parser, &parser->error_list, buffer); pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer); @@ -268,6 +290,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu pm_serialize_encoding(&parser.encoding, buffer); pm_serialize_comment_list(&parser, &parser.comment_list, buffer); + pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer); pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer); pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer); |
