diff options
Diffstat (limited to 'lib/prism/parse_result.rb')
-rw-r--r-- | lib/prism/parse_result.rb | 618 |
1 files changed, 618 insertions, 0 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb new file mode 100644 index 0000000000..e01aa070c2 --- /dev/null +++ b/lib/prism/parse_result.rb @@ -0,0 +1,618 @@ +# frozen_string_literal: true + +module Prism + # This represents a source of Ruby code that has been parsed. It is used in + # conjunction with locations to allow them to resolve line numbers and source + # ranges. + class Source + # The source code that this source object represents. + attr_reader :source + + # The line number where this source starts. + attr_reader :start_line + + # The list of newline byte offsets in the source code. + attr_reader :offsets + + # Create a new source object with the given source code. + def initialize(source, start_line = 1, offsets = []) + @source = source + @start_line = start_line # set after parsing is done + @offsets = offsets # set after parsing is done + end + + # Returns the encoding of the source code, which is set by parameters to the + # parser or by the encoding magic comment. + def encoding + source.encoding + end + + # Perform a byteslice on the source code using the given byte offset and + # byte length. + def slice(byte_offset, length) + source.byteslice(byte_offset, length) or raise + end + + # Binary search through the offsets to find the line number for the given + # byte offset. + def line(byte_offset) + start_line + find_line(byte_offset) + end + + # Return the byte offset of the start of the line corresponding to the given + # byte offset. + def line_start(byte_offset) + offsets[find_line(byte_offset)] + end + + # Returns the byte offset of the end of the line corresponding to the given + # byte offset. + def line_end(byte_offset) + offsets[find_line(byte_offset) + 1] || source.bytesize + end + + # Return the column number for the given byte offset. + def column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Return the character offset for the given byte offset. + def character_offset(byte_offset) + (source.byteslice(0, byte_offset) or raise).length + end + + # Return the column number in characters for the given byte offset. + def character_column(byte_offset) + character_offset(byte_offset) - character_offset(line_start(byte_offset)) + end + + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + def code_units_offset(byte_offset, encoding) + byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding) + (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length + end + + # Returns the column number in code units for the given encoding for the + # given byte offset. + def code_units_column(byte_offset, encoding) + code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) + end + + private + + # Binary search through the offsets to find the line number for the given + # byte offset. + def find_line(byte_offset) + left = 0 + right = offsets.length - 1 + + while left <= right + mid = left + (right - left) / 2 + return mid if (offset = offsets[mid]) == byte_offset + + if offset < byte_offset + left = mid + 1 + else + right = mid - 1 + end + end + + left - 1 + end + end + + # This represents a location in the source. + class Location + # A Source object that is used to determine more information from the given + # offset and length. + attr_reader :source + protected :source + + # The byte offset from the beginning of the source where this location + # starts. + attr_reader :start_offset + + # The length of this location in bytes. + attr_reader :length + + # Create a new location object with the given source, start byte offset, and + # byte length. + def initialize(source, start_offset, length) + @source = source + @start_offset = start_offset + @length = length + + # These are used to store comments that are associated with this location. + # They are initialized to `nil` to save on memory when there are no + # comments to be attached and/or the comment-related APIs are not used. + @leading_comments = nil + @trailing_comments = nil + end + + # These are the comments that are associated with this location that exist + # before the start of this location. + def leading_comments + @leading_comments ||= [] + end + + # Attach a comment to the leading comments of this location. + def leading_comment(comment) + leading_comments << comment + end + + # These are the comments that are associated with this location that exist + # after the end of this location. + def trailing_comments + @trailing_comments ||= [] + end + + # Attach a comment to the trailing comments of this location. + def trailing_comment(comment) + trailing_comments << comment + end + + # Returns all comments that are associated with this location (both leading + # and trailing comments). + def comments + [*@leading_comments, *@trailing_comments] + end + + # Create a new location object with the given options. + def copy(source: self.source, start_offset: self.start_offset, length: self.length) + Location.new(source, start_offset, length) + end + + # Returns a new location that is the result of chopping off the last byte. + def chop + copy(length: length == 0 ? length : length - 1) + end + + # Returns a string representation of this location. + def inspect + "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>" + end + + # The source code that this location represents. + def slice + source.slice(start_offset, length) + end + + # The source code that this location represents starting from the beginning + # of the line that this location starts on to the end of the line that this + # location ends on. + def slice_lines + line_start = source.line_start(start_offset) + line_end = source.line_end(end_offset) + source.slice(line_start, line_end - line_start) + end + + # The character offset from the beginning of the source where this location + # starts. + def start_character_offset + source.character_offset(start_offset) + end + + # The offset from the start of the file in code units of the given encoding. + def start_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(start_offset, encoding) + end + + # The byte offset from the beginning of the source where this location ends. + def end_offset + start_offset + length + end + + # The character offset from the beginning of the source where this location + # ends. + def end_character_offset + source.character_offset(end_offset) + end + + # The offset from the start of the file in code units of the given encoding. + def end_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(end_offset, encoding) + end + + # The line number where this location starts. + def start_line + source.line(start_offset) + end + + # The content of the line where this location starts before this location. + def start_line_slice + offset = source.line_start(start_offset) + source.slice(offset, start_offset - offset) + end + + # The line number where this location ends. + def end_line + source.line(end_offset) + end + + # The column number in bytes where this location starts from the start of + # the line. + def start_column + source.column(start_offset) + end + + # The column number in characters where this location ends from the start of + # the line. + def start_character_column + source.character_column(start_offset) + end + + # The column number in code units of the given encoding where this location + # starts from the start of the line. + def start_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(start_offset, encoding) + end + + # The column number in bytes where this location ends from the start of the + # line. + def end_column + source.column(end_offset) + end + + # The column number in characters where this location ends from the start of + # the line. + def end_character_column + source.character_column(end_offset) + end + + # The column number in code units of the given encoding where this location + # ends from the start of the line. + def end_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(end_offset, encoding) + end + + # Implement the hash pattern matching interface for Location. + def deconstruct_keys(keys) + { start_offset: start_offset, end_offset: end_offset } + end + + # Implement the pretty print interface for Location. + def pretty_print(q) + q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})") + end + + # Returns true if the given other location is equal to this location. + def ==(other) + Location === other && + other.start_offset == start_offset && + other.end_offset == end_offset + end + + # Returns a new location that stretches from this location to the given + # other location. Raises an error if this location is not before the other + # location or if they don't share the same source. + def join(other) + raise "Incompatible sources" if source != other.source + raise "Incompatible locations" if start_offset > other.start_offset + + Location.new(source, start_offset, other.end_offset - start_offset) + end + end + + # This represents a comment that was encountered during parsing. It is the + # base class for all comment types. + class Comment + # The location of this comment in the source. + attr_reader :location + + # Create a new comment object with the given location. + def initialize(location) + @location = location + end + + # Implement the hash pattern matching interface for Comment. + def deconstruct_keys(keys) + { location: location } + end + + # Returns the content of the comment by slicing it from the source code. + def slice + location.slice + end + end + + # InlineComment objects are the most common. They correspond to comments in + # the source file like this one that start with #. + class InlineComment < Comment + # Returns true if this comment happens on the same line as other code and + # false if the comment is by itself. + def trailing? + !location.start_line_slice.strip.empty? + end + + # Returns a string representation of this comment. + def inspect + "#<Prism::InlineComment @location=#{location.inspect}>" + end + end + + # EmbDocComment objects correspond to comments that are surrounded by =begin + # and =end. + class EmbDocComment < Comment + # This can only be true for inline comments. + def trailing? + false + end + + # Returns a string representation of this comment. + def inspect + "#<Prism::EmbDocComment @location=#{location.inspect}>" + end + end + + # This represents a magic comment that was encountered during parsing. + class MagicComment + # A Location object representing the location of the key in the source. + attr_reader :key_loc + + # A Location object representing the location of the value in the source. + attr_reader :value_loc + + # Create a new magic comment object with the given key and value locations. + def initialize(key_loc, value_loc) + @key_loc = key_loc + @value_loc = value_loc + end + + # Returns the key of the magic comment by slicing it from the source code. + def key + key_loc.slice + end + + # Returns the value of the magic comment by slicing it from the source code. + def value + value_loc.slice + end + + # Implement the hash pattern matching interface for MagicComment. + def deconstruct_keys(keys) + { key_loc: key_loc, value_loc: value_loc } + end + + # Returns a string representation of this magic comment. + def inspect + "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>" + end + end + + # This represents an error that was encountered during parsing. + class ParseError + # The type of error. This is an _internal_ symbol that is used for + # communicating with translation layers. It is not meant to be public API. + attr_reader :type + + # The message associated with this error. + attr_reader :message + + # A Location object representing the location of this error in the source. + attr_reader :location + + # The level of this error. + attr_reader :level + + # Create a new error object with the given message and location. + def initialize(type, message, location, level) + @type = type + @message = message + @location = location + @level = level + end + + # Implement the hash pattern matching interface for ParseError. + def deconstruct_keys(keys) + { type: type, message: message, location: location, level: level } + end + + # Returns a string representation of this error. + def inspect + "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" + end + end + + # This represents a warning that was encountered during parsing. + class ParseWarning + # The type of warning. This is an _internal_ symbol that is used for + # communicating with translation layers. It is not meant to be public API. + attr_reader :type + + # The message associated with this warning. + attr_reader :message + + # A Location object representing the location of this warning in the source. + attr_reader :location + + # The level of this warning. + attr_reader :level + + # Create a new warning object with the given message and location. + def initialize(type, message, location, level) + @type = type + @message = message + @location = location + @level = level + end + + # Implement the hash pattern matching interface for ParseWarning. + def deconstruct_keys(keys) + { type: type, message: message, location: location, level: level } + end + + # Returns a string representation of this warning. + def inspect + "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" + end + end + + # This represents the result of a call to ::parse or ::parse_file. It contains + # the requested structure, any comments that were encounters, and any errors + # that were encountered. + class Result + # The list of comments that were encountered during parsing. + attr_reader :comments + + # The list of magic comments that were encountered during parsing. + attr_reader :magic_comments + + # An optional location that represents the location of the __END__ marker + # and the rest of the content of the file. This content is loaded into the + # DATA constant when the file being parsed is the main file being executed. + attr_reader :data_loc + + # The list of errors that were generated during parsing. + attr_reader :errors + + # The list of warnings that were generated during parsing. + attr_reader :warnings + + # A Source instance that represents the source code that was parsed. + attr_reader :source + + # Create a new result object with the given values. + def initialize(comments, magic_comments, data_loc, errors, warnings, source) + @comments = comments + @magic_comments = magic_comments + @data_loc = data_loc + @errors = errors + @warnings = warnings + @source = source + end + + # Implement the hash pattern matching interface for Result. + def deconstruct_keys(keys) + { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } + end + + # Returns the encoding of the source code that was parsed. + def encoding + source.encoding + end + + # Returns true if there were no errors during parsing and false if there + # were. + def success? + errors.empty? + end + + # Returns true if there were errors during parsing and false if there were + # not. + def failure? + !success? + end + end + + # This is a result specific to the `parse` and `parse_file` methods. + class ParseResult < Result + # The syntax tree that was parsed from the source code. + attr_reader :value + + # Create a new parse result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This is a result specific to the `lex` and `lex_file` methods. + class LexResult < Result + # The list of tokens that were parsed from the source code. + attr_reader :value + + # Create a new lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for LexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This is a result specific to the `parse_lex` and `parse_lex_file` methods. + class ParseLexResult < Result + # A tuple of the syntax tree and the list of tokens that were parsed from + # the source code. + attr_reader :value + + # Create a new parse lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseLexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This represents a token from the Ruby source. + class Token + # The Source object that represents the source this token came from. + attr_reader :source + private :source + + # The type of token that this token is. + attr_reader :type + + # A byteslice of the source that this token represents. + attr_reader :value + + # Create a new token object with the given type, value, and location. + def initialize(source, type, value, location) + @source = source + @type = type + @value = value + @location = location + end + + # Implement the hash pattern matching interface for Token. + def deconstruct_keys(keys) + { type: type, value: value, location: location } + end + + # A Location object representing the location of this token in the source. + def location + location = @location + return location if location.is_a?(Location) + @location = Location.new(source, location >> 32, location & 0xFFFFFFFF) + end + + # Implement the pretty print interface for Token. + def pretty_print(q) + q.group do + q.text(type.to_s) + self.location.pretty_print(q) + q.text("(") + q.nest(2) do + q.breakable("") + q.pp(value) + end + q.breakable("") + q.text(")") + end + end + + # Returns true if the given other token is equal to this token. + def ==(other) + Token === other && + other.type == type && + other.value == value + end + end +end |