summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-13 12:16:11 -0400
committerJemma Issroff <jemmaissroff@gmail.com>2023-10-16 15:40:19 -0700
commit5523a23469987f92e38d52d4332bde09bdd8896c (patch)
tree01c4fcef6dea113e55ab1eedb59742a5ebad2b36
parent39dd3343d8672a70ebb0990c166d99a8b29ee19e (diff)
[ruby/prism] Attach magic comments to the parse result
https://github.com/ruby/prism/commit/c7ef25a79a
-rw-r--r--lib/prism/ffi.rb4
-rw-r--r--lib/prism/lex_compat.rb2
-rw-r--r--lib/prism/parse_result.rb35
-rw-r--r--prism/extension.c37
-rw-r--r--prism/parser.h11
-rw-r--r--prism/prism.c25
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb15
-rw-r--r--prism/templates/src/serialize.c.erb23
8 files changed, 136 insertions, 16 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 69b2b35bbb..cc7d94fb3f 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -234,11 +234,11 @@ module Prism
loader = Serialize::Loader.new(source, buffer.read)
tokens = loader.load_tokens
- node, comments, errors, warnings = loader.load_nodes
+ node, comments, magic_comments, errors, warnings = loader.load_nodes
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
- ParseResult.new([node, tokens], comments, errors, warnings, source)
+ ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source)
end
end
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index d0679db669..a17d4eaadd 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -818,7 +818,7 @@ module Prism
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
- ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
+ ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, [])
end
end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index b5e6643e6d..fd2737b20c 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -137,7 +137,7 @@ module Prism
end
def self.null
- new(0, 0)
+ new(nil, 0, 0)
end
end
@@ -166,6 +166,32 @@ module Prism
end
end
+ # This represents a magic comment that was encountered during parsing.
+ class MagicComment
+ attr_reader :key_loc, :value_loc
+
+ def initialize(key_loc, value_loc)
+ @key_loc = key_loc
+ @value_loc = value_loc
+ end
+
+ def key
+ key_loc.slice
+ end
+
+ def value
+ value_loc.slice
+ end
+
+ def deconstruct_keys(keys)
+ { key_loc: key_loc, value_loc: value_loc }
+ end
+
+ def inspect
+ "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
+ end
+ end
+
# This represents an error that was encountered during parsing.
class ParseError
attr_reader :message, :location
@@ -206,18 +232,19 @@ module Prism
# the AST, any comments that were encounters, and any errors that were
# encountered.
class ParseResult
- attr_reader :value, :comments, :errors, :warnings, :source
+ attr_reader :value, :comments, :magic_comments, :errors, :warnings, :source
- def initialize(value, comments, errors, warnings, source)
+ def initialize(value, comments, magic_comments, errors, warnings, source)
@value = value
@comments = comments
+ @magic_comments = magic_comments
@errors = errors
@warnings = warnings
@source = source
end
def deconstruct_keys(keys)
- { value: value, comments: comments, errors: errors, warnings: warnings }
+ { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
end
def success?
diff --git a/prism/extension.c b/prism/extension.c
index d6e335f4f1..77d9881714 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
VALUE rb_cPrismLocation;
VALUE rb_cPrismComment;
+VALUE rb_cPrismMagicComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
VALUE rb_cPrismParseResult;
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
return comments;
}
+// Extract the magic comments out of the parser into an array.
+static VALUE
+parser_magic_comments(pm_parser_t *parser, VALUE source) {
+ VALUE magic_comments = rb_ary_new();
+
+ for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+ VALUE key_loc_argv[] = {
+ source,
+ LONG2FIX(magic_comment->key_start - parser->start),
+ LONG2FIX(magic_comment->key_length)
+ };
+
+ VALUE value_loc_argv[] = {
+ source,
+ LONG2FIX(magic_comment->value_start - parser->start),
+ LONG2FIX(magic_comment->value_length)
+ };
+
+ VALUE magic_comment_argv[] = {
+ rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
+ rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
+ };
+
+ rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
+ }
+
+ return magic_comments;
+}
+
// Extract the errors out of the parser into an array.
static VALUE
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
@@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
VALUE result_argv[] = {
value,
parser_comments(&parser, source),
+ parser_magic_comments(&parser, source),
parser_errors(&parser, parse_lex_data.encoding, source),
parser_warnings(&parser, parse_lex_data.encoding, source),
source
@@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
- return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
+ return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
}
// Return an array of tokens corresponding to the given string.
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
VALUE result_argv[] = {
pm_ast_new(&parser, node, encoding),
parser_comments(&parser, source),
+ parser_magic_comments(&parser, source),
parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding, source),
source
};
- VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
+ VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@@ -547,6 +579,7 @@ Init_prism(void) {
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
+ rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
diff --git a/prism/parser.h b/prism/parser.h
index 027c3a92b5..f77d8818aa 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -250,6 +250,16 @@ typedef struct pm_comment {
pm_comment_type_t type;
} pm_comment_t;
+// This is a node in the linked list of magic comments that we've found while
+// parsing.
+typedef struct {
+ pm_list_node_t node;
+ const uint8_t *key_start;
+ const uint8_t *value_start;
+ uint32_t key_length;
+ uint32_t value_length;
+} pm_magic_comment_t;
+
// When the encoding that is being used to parse the source is changed by prism,
// we provide the ability here to call out to a user-defined function.
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
@@ -353,6 +363,7 @@ struct pm_parser {
const uint8_t *heredoc_end;
pm_list_t comment_list; // the list of comments that have been found while parsing
+ pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
pm_list_t warning_list; // the list of warnings that have been found while parsing
pm_list_t error_list; // the list of errors that have been found while parsing
pm_scope_t *current_scope; // the current local scope
diff --git a/prism/prism.c b/prism/prism.c
index b7d4101e06..c0f726e796 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -5448,6 +5448,16 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// When we're done, we want to free the string in case we had to
// allocate memory for it.
pm_string_free(&key);
+
+ // Allocate a new magic comment node to append to the parser's list.
+ pm_magic_comment_t *magic_comment;
+ if ((magic_comment = (pm_magic_comment_t *) malloc(sizeof(pm_magic_comment_t))) != NULL) {
+ magic_comment->key_start = key_start;
+ magic_comment->value_start = value_start;
+ magic_comment->key_length = (uint32_t) key_length;
+ magic_comment->value_length = (uint32_t) (value_end - value_start);
+ pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
+ }
}
}
@@ -15262,6 +15272,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
.next_start = NULL,
.heredoc_end = NULL,
.comment_list = PM_LIST_EMPTY,
+ .magic_comment_list = PM_LIST_EMPTY,
.warning_list = PM_LIST_EMPTY,
.error_list = PM_LIST_EMPTY,
.current_scope = NULL,
@@ -15356,6 +15367,19 @@ pm_comment_list_free(pm_list_t *list) {
}
}
+// Free all of the memory associated with the magic comment list.
+static inline void
+pm_magic_comment_list_free(pm_list_t *list) {
+ pm_list_node_t *node, *next;
+
+ for (node = list->head; node != NULL; node = next) {
+ next = node->next;
+
+ pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
+ free(magic_comment);
+ }
+}
+
// Free any memory associated with the given parser.
PRISM_EXPORTED_FUNCTION void
pm_parser_free(pm_parser_t *parser) {
@@ -15363,6 +15387,7 @@ pm_parser_free(pm_parser_t *parser) {
pm_diagnostic_list_free(&parser->error_list);
pm_diagnostic_list_free(&parser->warning_list);
pm_comment_list_free(&parser->comment_list);
+ pm_magic_comment_list_free(&parser->magic_comment_list);
pm_constant_pool_free(&parser->constant_pool);
pm_newline_list_free(&parser->newline_list);
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 047bd3d99d..ef39b7f1e5 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -55,9 +55,10 @@ module Prism
def load_metadata
comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
+ magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
- [comments, errors, warnings]
+ [comments, magic_comments, errors, warnings]
end
def load_tokens
@@ -76,14 +77,14 @@ module Prism
def load_tokens_result
tokens = load_tokens
encoding = load_encoding
- comments, errors, warnings = load_metadata
+ comments, magic_comments, errors, warnings = load_metadata
if encoding != @encoding
tokens.each { |token,| token.value.force_encoding(encoding) }
end
raise "Expected to consume all bytes while deserializing" unless @io.eof?
- Prism::ParseResult.new(tokens, comments, errors, warnings, @source)
+ Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
end
def load_nodes
@@ -97,17 +98,17 @@ module Prism
@encoding = load_encoding
@input = input.force_encoding(@encoding).freeze
- comments, errors, warnings = load_metadata
+ comments, magic_comments, errors, warnings = load_metadata
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
- [load_node, comments, errors, warnings]
+ [load_node, comments, magic_comments, errors, warnings]
end
def load_result
- node, comments, errors, warnings = load_nodes
- Prism::ParseResult.new(node, comments, errors, warnings, @source)
+ node, comments, magic_comments, errors, warnings = load_nodes
+ Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source)
end
private
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index e9ebc31590..9ee179af7f 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -147,6 +147,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf
}
static void
+pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+ // serialize key location
+ pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
+ pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length));
+
+ // serialize value location
+ pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
+ pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length));
+}
+
+static void
+pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+ pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+ pm_magic_comment_t *magic_comment;
+ for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+ pm_serialize_magic_comment(parser, magic_comment, buffer);
+ }
+}
+
+static void
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
// serialize message
size_t message_length = strlen(diagnostic->message);
@@ -180,6 +201,7 @@ void
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_encoding(&parser->encoding, buffer);
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
+ pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
@@ -268,6 +290,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
pm_serialize_encoding(&parser.encoding, buffer);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+ pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);