diff options
Diffstat (limited to 'lib/prism/parse_result.rb')
-rw-r--r-- | lib/prism/parse_result.rb | 167 |
1 files changed, 154 insertions, 13 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 39e15f6027..798fde09e5 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -5,6 +5,14 @@ module Prism # conjunction with locations to allow them to resolve line numbers and source # ranges. class Source + # Create a new source object with the given source code. This method should + # be used instead of `new` and it will return either a `Source` or a + # specialized and more performant `ASCIISource` if no multibyte characters + # are present in the source code. + def self.for(source, start_line = 1, offsets = []) + source.ascii_only? ? ASCIISource.new(source, start_line, offsets): new(source, start_line, offsets) + end + # The source code that this source object represents. attr_reader :source @@ -27,6 +35,11 @@ module Prism source.encoding end + # Returns the lines of the source code as an array of strings. + def lines + source.lines + end + # Perform a byteslice on the source code using the given byte offset and # byte length. def slice(byte_offset, length) @@ -45,6 +58,12 @@ module Prism offsets[find_line(byte_offset)] end + # Returns the byte offset of the end of the line corresponding to the given + # byte offset. + def line_end(byte_offset) + offsets[find_line(byte_offset) + 1] || source.bytesize + end + # Return the column number for the given byte offset. def column(byte_offset) byte_offset - line_start(byte_offset) @@ -100,6 +119,39 @@ module Prism end end + # Specialized version of Prism::Source for source code that includes ASCII + # characters only. This class is used to apply performance optimizations that + # cannot be applied to sources that include multibyte characters. Sources that + # include multibyte characters are represented by the Prism::Source class. + class ASCIISource < Source + # Return the character offset for the given byte offset. + def character_offset(byte_offset) + byte_offset + end + + # Return the column number in characters for the given byte offset. + def character_column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + def code_units_offset(byte_offset, encoding) + byte_offset + end + + # Specialized version of `code_units_column` that does not depend on + # `code_units_offset`, which is a more expensive operation. This is + # essentialy the same as `Prism::Source#column`. + def code_units_column(byte_offset, encoding) + byte_offset - line_start(byte_offset) + end + end + # This represents a location in the source. class Location # A Source object that is used to determine more information from the given @@ -171,11 +223,25 @@ module Prism "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>" end + # Returns all of the lines of the source code associated with this location. + def source_lines + source.lines + end + # The source code that this location represents. def slice source.slice(start_offset, length) end + # The source code that this location represents starting from the beginning + # of the line that this location starts on to the end of the line that this + # location ends on. + def slice_lines + line_start = source.line_start(start_offset) + line_end = source.line_end(end_offset) + source.slice(line_start, line_end - line_start) + end + # The character offset from the beginning of the source where this location # starts. def start_character_offset @@ -281,6 +347,18 @@ module Prism Location.new(source, start_offset, other.end_offset - start_offset) end + + # Join this location with the first occurrence of the string in the source + # that occurs after this location on the same line, and return the new + # location. This will raise an error if the string does not exist. + def adjoin(string) + line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset) + + line_suffix_index = line_suffix.byteindex(string) + raise "Could not find #{string}" if line_suffix_index.nil? + + Location.new(source, start_offset, length + line_suffix_index + string.bytesize) + end end # This represents a comment that was encountered during parsing. It is the @@ -438,14 +516,9 @@ module Prism end # This represents the result of a call to ::parse or ::parse_file. It contains - # the AST, any comments that were encounters, and any errors that were - # encountered. - class ParseResult - # The value that was generated by parsing. Normally this holds the AST, but - # it can sometimes how a list of tokens or other results passed back from - # the parser. - attr_reader :value - + # the requested structure, any comments that were encounters, and any errors + # that were encountered. + class Result # The list of comments that were encountered during parsing. attr_reader :comments @@ -466,9 +539,8 @@ module Prism # A Source instance that represents the source code that was parsed. attr_reader :source - # Create a new parse result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) - @value = value + # Create a new result object with the given values. + def initialize(comments, magic_comments, data_loc, errors, warnings, source) @comments = comments @magic_comments = magic_comments @data_loc = data_loc @@ -477,9 +549,9 @@ module Prism @source = source end - # Implement the hash pattern matching interface for ParseResult. + # Implement the hash pattern matching interface for Result. def deconstruct_keys(keys) - { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } + { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } end # Returns the encoding of the source code that was parsed. @@ -500,6 +572,75 @@ module Prism end end + # This is a result specific to the `parse` and `parse_file` methods. + class ParseResult < Result + autoload :Comments, "prism/parse_result/comments" + autoload :Newlines, "prism/parse_result/newlines" + + private_constant :Comments + private_constant :Newlines + + # The syntax tree that was parsed from the source code. + attr_reader :value + + # Create a new parse result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + + # Attach the list of comments to their respective locations in the tree. + def attach_comments! + Comments.new(self).attach! # steep:ignore + end + + # Walk the tree and mark nodes that are on a new line, loosely emulating + # the behavior of CRuby's `:line` tracepoint event. + def mark_newlines! + value.accept(Newlines.new(source.offsets.size)) # steep:ignore + end + end + + # This is a result specific to the `lex` and `lex_file` methods. + class LexResult < Result + # The list of tokens that were parsed from the source code. + attr_reader :value + + # Create a new lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for LexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This is a result specific to the `parse_lex` and `parse_lex_file` methods. + class ParseLexResult < Result + # A tuple of the syntax tree and the list of tokens that were parsed from + # the source code. + attr_reader :value + + # Create a new parse lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseLexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + # This represents a token from the Ruby source. class Token # The Source object that represents the source this token came from. |