[ruby/prism] Attach magic comments to the parse result

https://github.com/ruby/prism/commit/c7ef25a79a
author: Kevin Newton <kddnewton@gmail.com> 2023-10-13 12:16:11 -0400
committer: Jemma Issroff <jemmaissroff@gmail.com> 2023-10-16 15:40:19 -0700
commit: 5523a23469987f92e38d52d4332bde09bdd8896c (patch)
tree: 01c4fcef6dea113e55ab1eedb59742a5ebad2b36
parent: 39dd3343d8672a70ebb0990c166d99a8b29ee19e (diff)
8 files changed, 136 insertions, 16 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 69b2b35bbb..cc7d94fb3f 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -234,11 +234,11 @@ module Prism
       loader = Serialize::Loader.new(source, buffer.read)
 
       tokens = loader.load_tokens
-      node, comments, errors, warnings = loader.load_nodes
+      node, comments, magic_comments, errors, warnings = loader.load_nodes
 
       tokens.each { |token,| token.value.force_encoding(loader.encoding) }
 
-      ParseResult.new([node, tokens], comments, errors, warnings, source)
+      ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source)
     end
   end
 
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index d0679db669..a17d4eaadd 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -818,7 +818,7 @@ module Prism
       # We sort by location to compare against Ripper's output
       tokens.sort_by!(&:location)
 
-      ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
+      ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, [])
     end
   end
 
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index b5e6643e6d..fd2737b20c 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -137,7 +137,7 @@ module Prism
     end
 
     def self.null
-      new(0, 0)
+      new(nil, 0, 0)
     end
   end
 
@@ -166,6 +166,32 @@ module Prism
     end
   end
 
+  # This represents a magic comment that was encountered during parsing.
+  class MagicComment
+    attr_reader :key_loc, :value_loc
+
+    def initialize(key_loc, value_loc)
+      @key_loc = key_loc
+      @value_loc = value_loc
+    end
+
+    def key
+      key_loc.slice
+    end
+
+    def value
+      value_loc.slice
+    end
+
+    def deconstruct_keys(keys)
+      { key_loc: key_loc, value_loc: value_loc }
+    end
+
+    def inspect
+      "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
+    end
+  end
+
   # This represents an error that was encountered during parsing.
   class ParseError
     attr_reader :message, :location
@@ -206,18 +232,19 @@ module Prism
   # the AST, any comments that were encounters, and any errors that were
   # encountered.
   class ParseResult
-    attr_reader :value, :comments, :errors, :warnings, :source
+    attr_reader :value, :comments, :magic_comments, :errors, :warnings, :source
 
-    def initialize(value, comments, errors, warnings, source)
+    def initialize(value, comments, magic_comments, errors, warnings, source)
       @value = value
       @comments = comments
+      @magic_comments = magic_comments
       @errors = errors
       @warnings = warnings
       @source = source
     end
 
     def deconstruct_keys(keys)
-      { value: value, comments: comments, errors: errors, warnings: warnings }
+      { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
     end
 
     def success?
diff --git a/prism/extension.c b/prism/extension.c
index d6e335f4f1..77d9881714 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
 VALUE rb_cPrismLocation;
 
 VALUE rb_cPrismComment;
+VALUE rb_cPrismMagicComment;
 VALUE rb_cPrismParseError;
 VALUE rb_cPrismParseWarning;
 VALUE rb_cPrismParseResult;
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
     return comments;
 }
 
+// Extract the magic comments out of the parser into an array.
+static VALUE
+parser_magic_comments(pm_parser_t *parser, VALUE source) {
+    VALUE magic_comments = rb_ary_new();
+
+    for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+        VALUE key_loc_argv[] = {
+            source,
+            LONG2FIX(magic_comment->key_start - parser->start),
+            LONG2FIX(magic_comment->key_length)
+        };
+
+        VALUE value_loc_argv[] = {
+            source,
+            LONG2FIX(magic_comment->value_start - parser->start),
+            LONG2FIX(magic_comment->value_length)
+        };
+
+        VALUE magic_comment_argv[] = {
+            rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
+            rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
+        };
+
+        rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
+    }
+
+    return magic_comments;
+}
+
 // Extract the errors out of the parser into an array.
 static VALUE
 parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
@@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
     VALUE result_argv[] = {
         value,
         parser_comments(&parser, source),
+        parser_magic_comments(&parser, source),
         parser_errors(&parser, parse_lex_data.encoding, source),
         parser_warnings(&parser, parse_lex_data.encoding, source),
         source
@@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
 
     pm_node_destroy(&parser, node);
     pm_parser_free(&parser);
-    return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
+    return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
 }
 
 // Return an array of tokens corresponding to the given string.
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
     VALUE result_argv[] = {
         pm_ast_new(&parser, node, encoding),
         parser_comments(&parser, source),
+        parser_magic_comments(&parser, source),
         parser_errors(&parser, encoding, source),
         parser_warnings(&parser, encoding, source),
         source
     };
 
-    VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
+    VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
 
     pm_node_destroy(&parser, node);
     pm_parser_free(&parser);
@@ -547,6 +579,7 @@ Init_prism(void) {
     rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
     rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
     rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
+    rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
     rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
     rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
     rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
diff --git a/prism/parser.h b/prism/parser.h
index 027c3a92b5..f77d8818aa 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -250,6 +250,16 @@ typedef struct pm_comment {
     pm_comment_type_t type;
 } pm_comment_t;
 
+// This is a node in the linked list of magic comments that we've found while
+// parsing.
+typedef struct {
+    pm_list_node_t node;
+    const uint8_t *key_start;
+    const uint8_t *value_start;
+    uint32_t key_length;
+    uint32_t value_length;
+} pm_magic_comment_t;
+
 // When the encoding that is being used to parse the source is changed by prism,
 // we provide the ability here to call out to a user-defined function.
 typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
@@ -353,6 +363,7 @@ struct pm_parser {
     const uint8_t *heredoc_end;
 
     pm_list_t comment_list;             // the list of comments that have been found while parsing
+    pm_list_t magic_comment_list;       // the list of magic comments that have been found while parsing.
     pm_list_t warning_list;             // the list of warnings that have been found while parsing
     pm_list_t error_list;               // the list of errors that have been found while parsing
     pm_scope_t *current_scope;          // the current local scope
diff --git a/prism/prism.c b/prism/prism.c
index b7d4101e06..c0f726e796 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -5448,6 +5448,16 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
         // When we're done, we want to free the string in case we had to
         // allocate memory for it.
         pm_string_free(&key);
+
+        // Allocate a new magic comment node to append to the parser's list.
+        pm_magic_comment_t *magic_comment;
+        if ((magic_comment = (pm_magic_comment_t *) malloc(sizeof(pm_magic_comment_t))) != NULL) {
+            magic_comment->key_start = key_start;
+            magic_comment->value_start = value_start;
+            magic_comment->key_length = (uint32_t) key_length;
+            magic_comment->value_length = (uint32_t) (value_end - value_start);
+            pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
+        }
     }
 }
 
@@ -15262,6 +15272,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
         .next_start = NULL,
         .heredoc_end = NULL,
         .comment_list = PM_LIST_EMPTY,
+        .magic_comment_list = PM_LIST_EMPTY,
         .warning_list = PM_LIST_EMPTY,
         .error_list = PM_LIST_EMPTY,
         .current_scope = NULL,
@@ -15356,6 +15367,19 @@ pm_comment_list_free(pm_list_t *list) {
     }
 }
 
+// Free all of the memory associated with the magic comment list.
+static inline void
+pm_magic_comment_list_free(pm_list_t *list) {
+    pm_list_node_t *node, *next;
+
+    for (node = list->head; node != NULL; node = next) {
+        next = node->next;
+
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
+        free(magic_comment);
+    }
+}
+
 // Free any memory associated with the given parser.
 PRISM_EXPORTED_FUNCTION void
 pm_parser_free(pm_parser_t *parser) {
@@ -15363,6 +15387,7 @@ pm_parser_free(pm_parser_t *parser) {
     pm_diagnostic_list_free(&parser->error_list);
     pm_diagnostic_list_free(&parser->warning_list);
     pm_comment_list_free(&parser->comment_list);
+    pm_magic_comment_list_free(&parser->magic_comment_list);
     pm_constant_pool_free(&parser->constant_pool);
     pm_newline_list_free(&parser->newline_list);
 
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 047bd3d99d..ef39b7f1e5 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -55,9 +55,10 @@ module Prism
 
       def load_metadata
         comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
+        magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
         errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
         warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
-        [comments, errors, warnings]
+        [comments, magic_comments, errors, warnings]
       end
 
       def load_tokens
@@ -76,14 +77,14 @@ module Prism
       def load_tokens_result
         tokens = load_tokens
         encoding = load_encoding
-        comments, errors, warnings = load_metadata
+        comments, magic_comments, errors, warnings = load_metadata
 
         if encoding != @encoding
           tokens.each { |token,| token.value.force_encoding(encoding) }
         end
 
         raise "Expected to consume all bytes while deserializing" unless @io.eof?
-        Prism::ParseResult.new(tokens, comments, errors, warnings, @source)
+        Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
       end
 
       def load_nodes
@@ -97,17 +98,17 @@ module Prism
         @encoding = load_encoding
         @input = input.force_encoding(@encoding).freeze
 
-        comments, errors, warnings = load_metadata
+        comments, magic_comments, errors, warnings = load_metadata
 
         @constant_pool_offset = io.read(4).unpack1("L")
         @constant_pool = Array.new(load_varint, nil)
 
-        [load_node, comments, errors, warnings]
+        [load_node, comments, magic_comments, errors, warnings]
       end
 
       def load_result
-        node, comments, errors, warnings = load_nodes
-        Prism::ParseResult.new(node, comments, errors, warnings, @source)
+        node, comments, magic_comments, errors, warnings = load_nodes
+        Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source)
       end
 
       private
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index e9ebc31590..9ee179af7f 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -147,6 +147,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf
 }
 
 static void
+pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+    // serialize key location
+    pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
+    pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length));
+
+    // serialize value location
+    pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
+    pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length));
+}
+
+static void
+pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+    pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+    pm_magic_comment_t *magic_comment;
+    for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+        pm_serialize_magic_comment(parser, magic_comment, buffer);
+    }
+}
+
+static void
 pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
     // serialize message
     size_t message_length = strlen(diagnostic->message);
@@ -180,6 +201,7 @@ void
 pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_serialize_encoding(&parser->encoding, buffer);
     pm_serialize_comment_list(parser, &parser->comment_list, buffer);
+    pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
     pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
     pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
 
@@ -268,6 +290,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
 
     pm_serialize_encoding(&parser.encoding, buffer);
     pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+    pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
     pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
     pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
author	Kevin Newton <kddnewton@gmail.com>	2023-10-13 12:16:11 -0400
committer	Jemma Issroff <jemmaissroff@gmail.com>	2023-10-16 15:40:19 -0700
commit	5523a23469987f92e38d52d4332bde09bdd8896c (patch)
tree	01c4fcef6dea113e55ab1eedb59742a5ebad2b36
parent	39dd3343d8672a70ebb0990c166d99a8b29ee19e (diff)