path: root/lib/prism/parse_result.rb
diff options
Diffstat (limited to 'lib/prism/parse_result.rb')
1 files changed, 698 insertions, 0 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
new file mode 100644
index 0000000000..798fde09e5
--- /dev/null
+++ b/lib/prism/parse_result.rb
@@ -0,0 +1,698 @@
+# frozen_string_literal: true
+module Prism
+ # This represents a source of Ruby code that has been parsed. It is used in
+ # conjunction with locations to allow them to resolve line numbers and source
+ # ranges.
+ class Source
+ # Create a new source object with the given source code. This method should
+ # be used instead of `new` and it will return either a `Source` or a
+ # specialized and more performant `ASCIISource` if no multibyte characters
+ # are present in the source code.
+ def self.for(source, start_line = 1, offsets = [])
+ source.ascii_only? ?, start_line, offsets): new(source, start_line, offsets)
+ end
+ # The source code that this source object represents.
+ attr_reader :source
+ # The line number where this source starts.
+ attr_reader :start_line
+ # The list of newline byte offsets in the source code.
+ attr_reader :offsets
+ # Create a new source object with the given source code.
+ def initialize(source, start_line = 1, offsets = [])
+ @source = source
+ @start_line = start_line # set after parsing is done
+ @offsets = offsets # set after parsing is done
+ end
+ # Returns the encoding of the source code, which is set by parameters to the
+ # parser or by the encoding magic comment.
+ def encoding
+ source.encoding
+ end
+ # Returns the lines of the source code as an array of strings.
+ def lines
+ source.lines
+ end
+ # Perform a byteslice on the source code using the given byte offset and
+ # byte length.
+ def slice(byte_offset, length)
+ source.byteslice(byte_offset, length) or raise
+ end
+ # Binary search through the offsets to find the line number for the given
+ # byte offset.
+ def line(byte_offset)
+ start_line + find_line(byte_offset)
+ end
+ # Return the byte offset of the start of the line corresponding to the given
+ # byte offset.
+ def line_start(byte_offset)
+ offsets[find_line(byte_offset)]
+ end
+ # Returns the byte offset of the end of the line corresponding to the given
+ # byte offset.
+ def line_end(byte_offset)
+ offsets[find_line(byte_offset) + 1] || source.bytesize
+ end
+ # Return the column number for the given byte offset.
+ def column(byte_offset)
+ byte_offset - line_start(byte_offset)
+ end
+ # Return the character offset for the given byte offset.
+ def character_offset(byte_offset)
+ (source.byteslice(0, byte_offset) or raise).length
+ end
+ # Return the column number in characters for the given byte offset.
+ def character_column(byte_offset)
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
+ end
+ # Returns the offset from the start of the file for the given byte offset
+ # counting in code units for the given encoding.
+ #
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
+ # concept of code units that differs from the number of characters in other
+ # encodings, it is not captured here.
+ def code_units_offset(byte_offset, encoding)
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
+ (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
+ end
+ # Returns the column number in code units for the given encoding for the
+ # given byte offset.
+ def code_units_column(byte_offset, encoding)
+ code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
+ end
+ private
+ # Binary search through the offsets to find the line number for the given
+ # byte offset.
+ def find_line(byte_offset)
+ left = 0
+ right = offsets.length - 1
+ while left <= right
+ mid = left + (right - left) / 2
+ return mid if (offset = offsets[mid]) == byte_offset
+ if offset < byte_offset
+ left = mid + 1
+ else
+ right = mid - 1
+ end
+ end
+ left - 1
+ end
+ end
+ # Specialized version of Prism::Source for source code that includes ASCII
+ # characters only. This class is used to apply performance optimizations that
+ # cannot be applied to sources that include multibyte characters. Sources that
+ # include multibyte characters are represented by the Prism::Source class.
+ class ASCIISource < Source
+ # Return the character offset for the given byte offset.
+ def character_offset(byte_offset)
+ byte_offset
+ end
+ # Return the column number in characters for the given byte offset.
+ def character_column(byte_offset)
+ byte_offset - line_start(byte_offset)
+ end
+ # Returns the offset from the start of the file for the given byte offset
+ # counting in code units for the given encoding.
+ #
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
+ # concept of code units that differs from the number of characters in other
+ # encodings, it is not captured here.
+ def code_units_offset(byte_offset, encoding)
+ byte_offset
+ end
+ # Specialized version of `code_units_column` that does not depend on
+ # `code_units_offset`, which is a more expensive operation. This is
+ # essentialy the same as `Prism::Source#column`.
+ def code_units_column(byte_offset, encoding)
+ byte_offset - line_start(byte_offset)
+ end
+ end
+ # This represents a location in the source.
+ class Location
+ # A Source object that is used to determine more information from the given
+ # offset and length.
+ attr_reader :source
+ protected :source
+ # The byte offset from the beginning of the source where this location
+ # starts.
+ attr_reader :start_offset
+ # The length of this location in bytes.
+ attr_reader :length
+ # Create a new location object with the given source, start byte offset, and
+ # byte length.
+ def initialize(source, start_offset, length)
+ @source = source
+ @start_offset = start_offset
+ @length = length
+ # These are used to store comments that are associated with this location.
+ # They are initialized to `nil` to save on memory when there are no
+ # comments to be attached and/or the comment-related APIs are not used.
+ @leading_comments = nil
+ @trailing_comments = nil
+ end
+ # These are the comments that are associated with this location that exist
+ # before the start of this location.
+ def leading_comments
+ @leading_comments ||= []
+ end
+ # Attach a comment to the leading comments of this location.
+ def leading_comment(comment)
+ leading_comments << comment
+ end
+ # These are the comments that are associated with this location that exist
+ # after the end of this location.
+ def trailing_comments
+ @trailing_comments ||= []
+ end
+ # Attach a comment to the trailing comments of this location.
+ def trailing_comment(comment)
+ trailing_comments << comment
+ end
+ # Returns all comments that are associated with this location (both leading
+ # and trailing comments).
+ def comments
+ [*@leading_comments, *@trailing_comments]
+ end
+ # Create a new location object with the given options.
+ def copy(source: self.source, start_offset: self.start_offset, length: self.length)
+, start_offset, length)
+ end
+ # Returns a new location that is the result of chopping off the last byte.
+ def chop
+ copy(length: length == 0 ? length : length - 1)
+ end
+ # Returns a string representation of this location.
+ def inspect
+ "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
+ end
+ # Returns all of the lines of the source code associated with this location.
+ def source_lines
+ source.lines
+ end
+ # The source code that this location represents.
+ def slice
+ source.slice(start_offset, length)
+ end
+ # The source code that this location represents starting from the beginning
+ # of the line that this location starts on to the end of the line that this
+ # location ends on.
+ def slice_lines
+ line_start = source.line_start(start_offset)
+ line_end = source.line_end(end_offset)
+ source.slice(line_start, line_end - line_start)
+ end
+ # The character offset from the beginning of the source where this location
+ # starts.
+ def start_character_offset
+ source.character_offset(start_offset)
+ end
+ # The offset from the start of the file in code units of the given encoding.
+ def start_code_units_offset(encoding = Encoding::UTF_16LE)
+ source.code_units_offset(start_offset, encoding)
+ end
+ # The byte offset from the beginning of the source where this location ends.
+ def end_offset
+ start_offset + length
+ end
+ # The character offset from the beginning of the source where this location
+ # ends.
+ def end_character_offset
+ source.character_offset(end_offset)
+ end
+ # The offset from the start of the file in code units of the given encoding.
+ def end_code_units_offset(encoding = Encoding::UTF_16LE)
+ source.code_units_offset(end_offset, encoding)
+ end
+ # The line number where this location starts.
+ def start_line
+ source.line(start_offset)
+ end
+ # The content of the line where this location starts before this location.
+ def start_line_slice
+ offset = source.line_start(start_offset)
+ source.slice(offset, start_offset - offset)
+ end
+ # The line number where this location ends.
+ def end_line
+ source.line(end_offset)
+ end
+ # The column number in bytes where this location starts from the start of
+ # the line.
+ def start_column
+ source.column(start_offset)
+ end
+ # The column number in characters where this location ends from the start of
+ # the line.
+ def start_character_column
+ source.character_column(start_offset)
+ end
+ # The column number in code units of the given encoding where this location
+ # starts from the start of the line.
+ def start_code_units_column(encoding = Encoding::UTF_16LE)
+ source.code_units_column(start_offset, encoding)
+ end
+ # The column number in bytes where this location ends from the start of the
+ # line.
+ def end_column
+ source.column(end_offset)
+ end
+ # The column number in characters where this location ends from the start of
+ # the line.
+ def end_character_column
+ source.character_column(end_offset)
+ end
+ # The column number in code units of the given encoding where this location
+ # ends from the start of the line.
+ def end_code_units_column(encoding = Encoding::UTF_16LE)
+ source.code_units_column(end_offset, encoding)
+ end
+ # Implement the hash pattern matching interface for Location.
+ def deconstruct_keys(keys)
+ { start_offset: start_offset, end_offset: end_offset }
+ end
+ # Implement the pretty print interface for Location.
+ def pretty_print(q)
+ q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
+ end
+ # Returns true if the given other location is equal to this location.
+ def ==(other)
+ Location === other &&
+ other.start_offset == start_offset &&
+ other.end_offset == end_offset
+ end
+ # Returns a new location that stretches from this location to the given
+ # other location. Raises an error if this location is not before the other
+ # location or if they don't share the same source.
+ def join(other)
+ raise "Incompatible sources" if source != other.source
+ raise "Incompatible locations" if start_offset > other.start_offset
+, start_offset, other.end_offset - start_offset)
+ end
+ # Join this location with the first occurrence of the string in the source
+ # that occurs after this location on the same line, and return the new
+ # location. This will raise an error if the string does not exist.
+ def adjoin(string)
+ line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
+ line_suffix_index = line_suffix.byteindex(string)
+ raise "Could not find #{string}" if line_suffix_index.nil?
+, start_offset, length + line_suffix_index + string.bytesize)
+ end
+ end
+ # This represents a comment that was encountered during parsing. It is the
+ # base class for all comment types.
+ class Comment
+ # The location of this comment in the source.
+ attr_reader :location
+ # Create a new comment object with the given location.
+ def initialize(location)
+ @location = location
+ end
+ # Implement the hash pattern matching interface for Comment.
+ def deconstruct_keys(keys)
+ { location: location }
+ end
+ # Returns the content of the comment by slicing it from the source code.
+ def slice
+ location.slice
+ end
+ end
+ # InlineComment objects are the most common. They correspond to comments in
+ # the source file like this one that start with #.
+ class InlineComment < Comment
+ # Returns true if this comment happens on the same line as other code and
+ # false if the comment is by itself.
+ def trailing?
+ !location.start_line_slice.strip.empty?
+ end
+ # Returns a string representation of this comment.
+ def inspect
+ "#<Prism::InlineComment @location=#{location.inspect}>"
+ end
+ end
+ # EmbDocComment objects correspond to comments that are surrounded by =begin
+ # and =end.
+ class EmbDocComment < Comment
+ # This can only be true for inline comments.
+ def trailing?
+ false
+ end
+ # Returns a string representation of this comment.
+ def inspect
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
+ end
+ end
+ # This represents a magic comment that was encountered during parsing.
+ class MagicComment
+ # A Location object representing the location of the key in the source.
+ attr_reader :key_loc
+ # A Location object representing the location of the value in the source.
+ attr_reader :value_loc
+ # Create a new magic comment object with the given key and value locations.
+ def initialize(key_loc, value_loc)
+ @key_loc = key_loc
+ @value_loc = value_loc
+ end
+ # Returns the key of the magic comment by slicing it from the source code.
+ def key
+ key_loc.slice
+ end
+ # Returns the value of the magic comment by slicing it from the source code.
+ def value
+ value_loc.slice
+ end
+ # Implement the hash pattern matching interface for MagicComment.
+ def deconstruct_keys(keys)
+ { key_loc: key_loc, value_loc: value_loc }
+ end
+ # Returns a string representation of this magic comment.
+ def inspect
+ "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
+ end
+ end
+ # This represents an error that was encountered during parsing.
+ class ParseError
+ # The type of error. This is an _internal_ symbol that is used for
+ # communicating with translation layers. It is not meant to be public API.
+ attr_reader :type
+ # The message associated with this error.
+ attr_reader :message
+ # A Location object representing the location of this error in the source.
+ attr_reader :location
+ # The level of this error.
+ attr_reader :level
+ # Create a new error object with the given message and location.
+ def initialize(type, message, location, level)
+ @type = type
+ @message = message
+ @location = location
+ @level = level
+ end
+ # Implement the hash pattern matching interface for ParseError.
+ def deconstruct_keys(keys)
+ { type: type, message: message, location: location, level: level }
+ end
+ # Returns a string representation of this error.
+ def inspect
+ "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
+ end
+ end
+ # This represents a warning that was encountered during parsing.
+ class ParseWarning
+ # The type of warning. This is an _internal_ symbol that is used for
+ # communicating with translation layers. It is not meant to be public API.
+ attr_reader :type
+ # The message associated with this warning.
+ attr_reader :message
+ # A Location object representing the location of this warning in the source.
+ attr_reader :location
+ # The level of this warning.
+ attr_reader :level
+ # Create a new warning object with the given message and location.
+ def initialize(type, message, location, level)
+ @type = type
+ @message = message
+ @location = location
+ @level = level
+ end
+ # Implement the hash pattern matching interface for ParseWarning.
+ def deconstruct_keys(keys)
+ { type: type, message: message, location: location, level: level }
+ end
+ # Returns a string representation of this warning.
+ def inspect
+ "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
+ end
+ end
+ # This represents the result of a call to ::parse or ::parse_file. It contains
+ # the requested structure, any comments that were encounters, and any errors
+ # that were encountered.
+ class Result
+ # The list of comments that were encountered during parsing.
+ attr_reader :comments
+ # The list of magic comments that were encountered during parsing.
+ attr_reader :magic_comments
+ # An optional location that represents the location of the __END__ marker
+ # and the rest of the content of the file. This content is loaded into the
+ # DATA constant when the file being parsed is the main file being executed.
+ attr_reader :data_loc
+ # The list of errors that were generated during parsing.
+ attr_reader :errors
+ # The list of warnings that were generated during parsing.
+ attr_reader :warnings
+ # A Source instance that represents the source code that was parsed.
+ attr_reader :source
+ # Create a new result object with the given values.
+ def initialize(comments, magic_comments, data_loc, errors, warnings, source)
+ @comments = comments
+ @magic_comments = magic_comments
+ @data_loc = data_loc
+ @errors = errors
+ @warnings = warnings
+ @source = source
+ end
+ # Implement the hash pattern matching interface for Result.
+ def deconstruct_keys(keys)
+ { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
+ end
+ # Returns the encoding of the source code that was parsed.
+ def encoding
+ source.encoding
+ end
+ # Returns true if there were no errors during parsing and false if there
+ # were.
+ def success?
+ errors.empty?
+ end
+ # Returns true if there were errors during parsing and false if there were
+ # not.
+ def failure?
+ !success?
+ end
+ end
+ # This is a result specific to the `parse` and `parse_file` methods.
+ class ParseResult < Result
+ autoload :Comments, "prism/parse_result/comments"
+ autoload :Newlines, "prism/parse_result/newlines"
+ private_constant :Comments
+ private_constant :Newlines
+ # The syntax tree that was parsed from the source code.
+ attr_reader :value
+ # Create a new parse result object with the given values.
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, source)
+ end
+ # Implement the hash pattern matching interface for ParseResult.
+ def deconstruct_keys(keys)
+ super.merge!(value: value)
+ end
+ # Attach the list of comments to their respective locations in the tree.
+ def attach_comments!
+! # steep:ignore
+ end
+ # Walk the tree and mark nodes that are on a new line, loosely emulating
+ # the behavior of CRuby's `:line` tracepoint event.
+ def mark_newlines!
+ value.accept( # steep:ignore
+ end
+ end
+ # This is a result specific to the `lex` and `lex_file` methods.
+ class LexResult < Result
+ # The list of tokens that were parsed from the source code.
+ attr_reader :value
+ # Create a new lex result object with the given values.
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, source)
+ end
+ # Implement the hash pattern matching interface for LexResult.
+ def deconstruct_keys(keys)
+ super.merge!(value: value)
+ end
+ end
+ # This is a result specific to the `parse_lex` and `parse_lex_file` methods.
+ class ParseLexResult < Result
+ # A tuple of the syntax tree and the list of tokens that were parsed from
+ # the source code.
+ attr_reader :value
+ # Create a new parse lex result object with the given values.
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, source)
+ end
+ # Implement the hash pattern matching interface for ParseLexResult.
+ def deconstruct_keys(keys)
+ super.merge!(value: value)
+ end
+ end
+ # This represents a token from the Ruby source.
+ class Token
+ # The Source object that represents the source this token came from.
+ attr_reader :source
+ private :source
+ # The type of token that this token is.
+ attr_reader :type
+ # A byteslice of the source that this token represents.
+ attr_reader :value
+ # Create a new token object with the given type, value, and location.
+ def initialize(source, type, value, location)
+ @source = source
+ @type = type
+ @value = value
+ @location = location
+ end
+ # Implement the hash pattern matching interface for Token.
+ def deconstruct_keys(keys)
+ { type: type, value: value, location: location }
+ end
+ # A Location object representing the location of this token in the source.
+ def location
+ location = @location
+ return location if location.is_a?(Location)
+ @location =, location >> 32, location & 0xFFFFFFFF)
+ end
+ # Implement the pretty print interface for Token.
+ def pretty_print(q)
+ do
+ q.text(type.to_s)
+ self.location.pretty_print(q)
+ q.text("(")
+ q.nest(2) do
+ q.breakable("")
+ q.pp(value)
+ end
+ q.breakable("")
+ q.text(")")
+ end
+ end
+ # Returns true if the given other token is equal to this token.
+ def ==(other)
+ Token === other &&
+ other.type == type &&
+ other.value == value
+ end
+ end