[ruby/prism] Properly support the start line option

https://github.com/ruby/prism/commit/33cc75a4b7
author: Kevin Newton <kddnewton@gmail.com> 2023-11-02 15:06:50 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2023-11-03 10:13:50 -0400
commit: d7d3243364ba39bfb5dddb10d5d6c8733b4ceb37 (patch)
tree: 9213b34638d1cef0b4a4798cb36306f876642411
parent: 95d3f2eaec1456eb5a964c4622ccc723af020ef2 (diff)
9 files changed, 42 insertions, 61 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index cf94232df2..92651cf766 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -8,14 +8,18 @@ module Prism
     # The source code that this source object represents.
     attr_reader :source
 
+    # The line number where this source starts.
+    attr_reader :start_line
+
     # The list of newline byte offsets in the source code.
     attr_reader :offsets
 
     # Create a new source object with the given source code and newline byte
     # offsets. If no newline byte offsets are given, they will be computed from
     # the source code.
-    def initialize(source, offsets = compute_offsets(source))
+    def initialize(source, start_line = 1, offsets = compute_offsets(source))
       @source = source
+      @start_line = start_line
       @offsets = offsets
     end
 
@@ -28,6 +32,25 @@ module Prism
     # Binary search through the offsets to find the line number for the given
     # byte offset.
     def line(value)
+      start_line + find_line(value)
+    end
+
+    # Return the byte offset of the start of the line corresponding to the given
+    # byte offset.
+    def line_offset(value)
+      offsets[find_line(value)]
+    end
+
+    # Return the column number for the given byte offset.
+    def column(value)
+      value - offsets[find_line(value)]
+    end
+
+    private
+
+    # Binary search through the offsets to find the line number for the given
+    # byte offset.
+    def find_line(value)
       left = 0
       right = offsets.length - 1
 
@@ -45,19 +68,6 @@ module Prism
       left - 1
     end
 
-    # Return the byte offset of the start of the line corresponding to the given
-    # byte offset.
-    def line_offset(value)
-      offsets[line(value)]
-    end
-
-    # Return the column number for the given byte offset.
-    def column(value)
-      value - offsets[line(value)]
-    end
-
-    private
-
     # Find all of the newlines in the source code and return their byte offsets
     # from the start of the string an array.
     def compute_offsets(code)
@@ -118,7 +128,7 @@ module Prism
 
     # The line number where this location starts.
     def start_line
-      source.line(start_offset) + 1
+      source.line(start_offset)
     end
 
     # The content of the line where this location starts before this location.
@@ -129,7 +139,7 @@ module Prism
 
     # The line number where this location ends.
     def end_line
-      source.line(end_offset) + 1
+      source.line(end_offset)
     end
 
     # The column number in bytes where this location starts from the start of
diff --git a/prism/extension.c b/prism/extension.c
index 86221a7bc5..dfd8e76d5a 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -471,8 +471,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
     pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
 
     VALUE offsets = rb_ary_new();
-    VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
-    VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
+    VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
+    VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
 
     parse_lex_data_t parse_lex_data = {
         .source = source,
diff --git a/prism/parser.h b/prism/parser.h
index a30e61c9bf..edefe70f25 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -639,6 +639,12 @@ struct pm_parser {
      */
     pm_string_t current_string;
 
+    /**
+     * The line number at the start of the parse. This will be used to offset
+     * the line numbers of all of the locations.
+     */
+    uint32_t start_line;
+
     /** Whether or not we're at the beginning of a command. */
     bool command_start;
 
diff --git a/prism/prism.c b/prism/prism.c
index 0e135db5cb..be8dd38ccf 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -16360,6 +16360,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .newline_list = { 0 },
         .integer_base = 0,
         .current_string = PM_STRING_EMPTY,
+        .start_line = 1,
         .command_start = true,
         .recovering = false,
         .encoding_changed = false,
@@ -16400,7 +16401,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
 
         // line option
         if (options->line > 0) {
-            pm_newline_list_force(&parser->newline_list, options->line);
+            parser->start_line = options->line;
         }
 
         // encoding option
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 1ffbf6c9f8..7bc52c1120 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -46,8 +46,8 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
         rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index]));
     }
 
-    VALUE source_argv[] = { source, offsets };
-    return rb_class_new_instance(2, source_argv, rb_cPrismSource);
+    VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets };
+    return rb_class_new_instance(3, source_argv, rb_cPrismSource);
 }
 
 typedef struct pm_node_stack_node {
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
index 978ebf3d0e..f27bb75b63 100644
--- a/prism/util/pm_newline_list.c
+++ b/prism/util/pm_newline_list.c
@@ -20,33 +20,6 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
 }
 
 /**
- * Set up the newline list such that it believes it is starting on a specific
- * line in the source. Basically this entails pushing on pointers to the start
- * of the string until we hit the desired line.
- */
-bool
-pm_newline_list_force(pm_newline_list_t *list, size_t count) {
-    size_t next_capacity = list->capacity == 0 ? 1 : list->capacity;
-    while (count > next_capacity) {
-        next_capacity *= 2;
-    }
-
-    size_t *offsets = list->offsets;
-    list->offsets = (size_t *) calloc(next_capacity, sizeof(size_t));
-    if (list->offsets == NULL) return false;
-
-    if (offsets != NULL) {
-        memcpy(list->offsets, offsets, list->size * sizeof(size_t));
-        free(offsets);
-    }
-
-    memset(list->offsets + list->size, 0, count * sizeof(size_t));
-    list->size += count;
-
-    return true;
-}
-
-/**
  * Append a new offset to the newline list. Returns true if the reallocation of
  * the offsets succeeds (if one was necessary), otherwise returns false.
  */
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
index 93816b0656..a31051f4e0 100644
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@@ -62,18 +62,6 @@ typedef struct {
 bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
 
 /**
- * Set up the newline list such that it believes it is starting on a specific
- * line in the source. Basically this entails pushing on pointers to the start
- * of the string until we hit the desired line.
- *
- * @param list The list to set up.
- * @param count The number of lines to push onto the list.
- * @return True if no reallocation was needed or the reallocation of the offsets
- *     succeeds (if one was necessary), otherwise false.
- */
-bool pm_newline_list_force(pm_newline_list_t *list, size_t count);
-
-/**
  * Append a new offset to the newline list. Returns true if the reallocation of
  * the offsets succeeds (if one was necessary), otherwise returns false.
  *
diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb
index 3a2892b970..c20a99a398 100644
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@@ -84,7 +84,7 @@ module Prism
 
       while node = queue.shift
         queue.concat(node.compact_child_nodes)
-        newlines << (result.source.line(node.location.start_offset) + 1) if node&.newline?
+        newlines << result.source.line(node.location.start_offset) if node&.newline?
       end
 
       newlines.sort
diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb
index 844a7796a3..a61282cca1 100644
--- a/test/prism/ruby_api_test.rb
+++ b/test/prism/ruby_api_test.rb
@@ -24,6 +24,9 @@ module Prism
       assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath
       assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath
 
+      assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line
+      assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line
+
       refute Prism.parse("\"foo\"").value.statements.body[0].frozen?
       assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen?
       refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen?
author	Kevin Newton <kddnewton@gmail.com>	2023-11-02 15:06:50 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2023-11-03 10:13:50 -0400
commit	d7d3243364ba39bfb5dddb10d5d6c8733b4ceb37 (patch)
tree	9213b34638d1cef0b4a4798cb36306f876642411
parent	95d3f2eaec1456eb5a964c4622ccc723af020ef2 (diff)