1 files changed, 154 insertions, 13 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 39e15f6027..798fde09e5 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -5,6 +5,14 @@ module Prism
   # conjunction with locations to allow them to resolve line numbers and source
   # ranges.
   class Source
+    # Create a new source object with the given source code. This method should
+    # be used instead of `new` and it will return either a `Source` or a
+    # specialized and more performant `ASCIISource` if no multibyte characters
+    # are present in the source code.
+    def self.for(source, start_line = 1, offsets = [])
+      source.ascii_only? ? ASCIISource.new(source, start_line, offsets): new(source, start_line, offsets)
+    end
+
     # The source code that this source object represents.
     attr_reader :source
 
@@ -27,6 +35,11 @@ module Prism
       source.encoding
     end
 
+    # Returns the lines of the source code as an array of strings.
+    def lines
+      source.lines
+    end
+
     # Perform a byteslice on the source code using the given byte offset and
     # byte length.
     def slice(byte_offset, length)
@@ -45,6 +58,12 @@ module Prism
       offsets[find_line(byte_offset)]
     end
 
+    # Returns the byte offset of the end of the line corresponding to the given
+    # byte offset.
+    def line_end(byte_offset)
+      offsets[find_line(byte_offset) + 1] || source.bytesize
+    end
+
     # Return the column number for the given byte offset.
     def column(byte_offset)
       byte_offset - line_start(byte_offset)
@@ -100,6 +119,39 @@ module Prism
     end
   end
 
+  # Specialized version of Prism::Source for source code that includes ASCII
+  # characters only. This class is used to apply performance optimizations that
+  # cannot be applied to sources that include multibyte characters. Sources that
+  # include multibyte characters are represented by the Prism::Source class.
+  class ASCIISource < Source
+    # Return the character offset for the given byte offset.
+    def character_offset(byte_offset)
+      byte_offset
+    end
+
+    # Return the column number in characters for the given byte offset.
+    def character_column(byte_offset)
+      byte_offset - line_start(byte_offset)
+    end
+
+    # Returns the offset from the start of the file for the given byte offset
+    # counting in code units for the given encoding.
+    #
+    # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
+    # concept of code units that differs from the number of characters in other
+    # encodings, it is not captured here.
+    def code_units_offset(byte_offset, encoding)
+      byte_offset
+    end
+
+    # Specialized version of `code_units_column` that does not depend on
+    # `code_units_offset`, which is a more expensive operation. This is
+    # essentialy the same as `Prism::Source#column`.
+    def code_units_column(byte_offset, encoding)
+      byte_offset - line_start(byte_offset)
+    end
+  end
+
   # This represents a location in the source.
   class Location
     # A Source object that is used to determine more information from the given
@@ -171,11 +223,25 @@ module Prism
       "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
     end
 
+    # Returns all of the lines of the source code associated with this location.
+    def source_lines
+      source.lines
+    end
+
     # The source code that this location represents.
     def slice
       source.slice(start_offset, length)
     end
 
+    # The source code that this location represents starting from the beginning
+    # of the line that this location starts on to the end of the line that this
+    # location ends on.
+    def slice_lines
+      line_start = source.line_start(start_offset)
+      line_end = source.line_end(end_offset)
+      source.slice(line_start, line_end - line_start)
+    end
+
     # The character offset from the beginning of the source where this location
     # starts.
     def start_character_offset
@@ -281,6 +347,18 @@ module Prism
 
       Location.new(source, start_offset, other.end_offset - start_offset)
     end
+
+    # Join this location with the first occurrence of the string in the source
+    # that occurs after this location on the same line, and return the new
+    # location. This will raise an error if the string does not exist.
+    def adjoin(string)
+      line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
+
+      line_suffix_index = line_suffix.byteindex(string)
+      raise "Could not find #{string}" if line_suffix_index.nil?
+
+      Location.new(source, start_offset, length + line_suffix_index + string.bytesize)
+    end
   end
 
   # This represents a comment that was encountered during parsing. It is the
@@ -438,14 +516,9 @@ module Prism
   end
 
   # This represents the result of a call to ::parse or ::parse_file. It contains
-  # the AST, any comments that were encounters, and any errors that were
-  # encountered.
-  class ParseResult
-    # The value that was generated by parsing. Normally this holds the AST, but
-    # it can sometimes how a list of tokens or other results passed back from
-    # the parser.
-    attr_reader :value
-
+  # the requested structure, any comments that were encounters, and any errors
+  # that were encountered.
+  class Result
     # The list of comments that were encountered during parsing.
     attr_reader :comments
 
@@ -466,9 +539,8 @@ module Prism
     # A Source instance that represents the source code that was parsed.
     attr_reader :source
 
-    # Create a new parse result object with the given values.
-    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
-      @value = value
+    # Create a new result object with the given values.
+    def initialize(comments, magic_comments, data_loc, errors, warnings, source)
       @comments = comments
       @magic_comments = magic_comments
       @data_loc = data_loc
@@ -477,9 +549,9 @@ module Prism
       @source = source
     end
 
-    # Implement the hash pattern matching interface for ParseResult.
+    # Implement the hash pattern matching interface for Result.
     def deconstruct_keys(keys)
-      { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
+      { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
     end
 
     # Returns the encoding of the source code that was parsed.
@@ -500,6 +572,75 @@ module Prism
     end
   end
 
+  # This is a result specific to the `parse` and `parse_file` methods.
+  class ParseResult < Result
+    autoload :Comments, "prism/parse_result/comments"
+    autoload :Newlines, "prism/parse_result/newlines"
+
+    private_constant :Comments
+    private_constant :Newlines
+
+    # The syntax tree that was parsed from the source code.
+    attr_reader :value
+
+    # Create a new parse result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for ParseResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+
+    # Attach the list of comments to their respective locations in the tree.
+    def attach_comments!
+      Comments.new(self).attach! # steep:ignore
+    end
+
+    # Walk the tree and mark nodes that are on a new line, loosely emulating
+    # the behavior of CRuby's `:line` tracepoint event.
+    def mark_newlines!
+      value.accept(Newlines.new(source.offsets.size)) # steep:ignore
+    end
+  end
+
+  # This is a result specific to the `lex` and `lex_file` methods.
+  class LexResult < Result
+    # The list of tokens that were parsed from the source code.
+    attr_reader :value
+
+    # Create a new lex result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for LexResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+  end
+
+  # This is a result specific to the `parse_lex` and `parse_lex_file` methods.
+  class ParseLexResult < Result
+    # A tuple of the syntax tree and the list of tokens that were parsed from
+    # the source code.
+    attr_reader :value
+
+    # Create a new parse lex result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for ParseLexResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+  end
+
   # This represents a token from the Ruby source.
   class Token
     # The Source object that represents the source this token came from.