summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-20 11:07:02 -0500
committergit <svn-admin@ruby-lang.org>2023-11-20 16:07:06 +0000
commitf2ed7eaba0275099842b5b8407250e2d410f2f25 (patch)
treef4033a6b792245c785dae837043b632c0c888878
parentadee7dab3edc3d58cc3d7245398b75ab1de8d077 (diff)
[ruby/prism] Add character APIs for locations
(https://github.com/ruby/prism/pull/1809) https://github.com/ruby/prism/commit/d493ccd093
-rw-r--r--lib/prism/ffi.rb2
-rw-r--r--lib/prism/parse_result.rb58
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb14
-rw-r--r--test/prism/ruby_api_test.rb32
4 files changed, 83 insertions, 23 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index e1d3e0dca7..c910fd3aae 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -230,7 +230,7 @@ module Prism
loader = Serialize::Loader.new(source, buffer.read)
loader.load_header
- loader.load_force_encoding
+ loader.load_encoding
loader.load_start_line
loader.load_comments
end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 170a529bea..50c23bce65 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -25,40 +25,50 @@ module Prism
# Perform a byteslice on the source code using the given byte offset and
# byte length.
- def slice(offset, length)
- source.byteslice(offset, length)
+ def slice(byte_offset, length)
+ source.byteslice(byte_offset, length)
end
# Binary search through the offsets to find the line number for the given
# byte offset.
- def line(value)
- start_line + find_line(value)
+ def line(byte_offset)
+ start_line + find_line(byte_offset)
end
# Return the byte offset of the start of the line corresponding to the given
# byte offset.
- def line_offset(value)
- offsets[find_line(value)]
+ def line_start(byte_offset)
+ offsets[find_line(byte_offset)]
end
# Return the column number for the given byte offset.
- def column(value)
- value - offsets[find_line(value)]
+ def column(byte_offset)
+ byte_offset - line_start(byte_offset)
+ end
+
+ # Return the character offset for the given byte offset.
+ def character_offset(byte_offset)
+ source.byteslice(0, byte_offset).length
+ end
+
+ # Return the column number in characters for the given byte offset.
+ def character_column(byte_offset)
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
private
# Binary search through the offsets to find the line number for the given
# byte offset.
- def find_line(value)
+ def find_line(byte_offset)
left = 0
right = offsets.length - 1
while left <= right
mid = left + (right - left) / 2
- return mid if offsets[mid] == value
+ return mid if offsets[mid] == byte_offset
- if offsets[mid] < value
+ if offsets[mid] < byte_offset
left = mid + 1
else
right = mid - 1
@@ -121,11 +131,23 @@ module Prism
source.slice(start_offset, length)
end
+ # The character offset from the beginning of the source where this location
+ # starts.
+ def start_character_offset
+ source.character_offset(start_offset)
+ end
+
# The byte offset from the beginning of the source where this location ends.
def end_offset
start_offset + length
end
+ # The character offset from the beginning of the source where this location
+ # ends.
+ def end_character_offset
+ source.character_offset(end_offset)
+ end
+
# The line number where this location starts.
def start_line
source.line(start_offset)
@@ -133,7 +155,7 @@ module Prism
# The content of the line where this location starts before this location.
def start_line_slice
- offset = source.line_offset(start_offset)
+ offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end
@@ -148,12 +170,24 @@ module Prism
source.column(start_offset)
end
+ # The column number in characters where this location ends from the start of
+ # the line.
+ def start_character_column
+ source.character_column(start_offset)
+ end
+
# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset)
end
+ # The column number in characters where this location ends from the start of
+ # the line.
+ def end_character_column
+ source.character_column(end_offset)
+ end
+
# Implement the hash pattern matching interface for Location.
def deconstruct_keys(keys)
{ start_offset: start_offset, end_offset: end_offset }
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 2837504543..e5a88ae99a 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -73,12 +73,9 @@ module Prism
end
def load_encoding
- Encoding.find(io.read(load_varint))
- end
-
- def load_force_encoding
- @encoding = load_encoding
+ @encoding = Encoding.find(io.read(load_varint))
@input = input.force_encoding(@encoding).freeze
+ @encoding
end
def load_start_line
@@ -121,10 +118,7 @@ module Prism
encoding = load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata
-
- if encoding != @encoding
- tokens.each { |token,| token.value.force_encoding(encoding) }
- end
+ tokens.each { |token,| token.value.force_encoding(encoding) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
@@ -132,7 +126,7 @@ module Prism
def load_nodes
load_header
- load_force_encoding
+ load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata
diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb
index a61282cca1..cd87a81395 100644
--- a/test/prism/ruby_api_test.rb
+++ b/test/prism/ruby_api_test.rb
@@ -71,6 +71,38 @@ module Prism
end
end
+ def test_location_character_offsets
+ program = Prism.parse("šŸ˜€ + šŸ˜€\nšŸ˜ ||= šŸ˜").value
+
+ # first šŸ˜€
+ location = program.statements.body.first.receiver.location
+ assert_equal 0, location.start_character_offset
+ assert_equal 1, location.end_character_offset
+ assert_equal 0, location.start_character_column
+ assert_equal 1, location.end_character_column
+
+ # second šŸ˜€
+ location = program.statements.body.first.arguments.arguments.first.location
+ assert_equal 4, location.start_character_offset
+ assert_equal 5, location.end_character_offset
+ assert_equal 4, location.start_character_column
+ assert_equal 5, location.end_character_column
+
+ # first šŸ˜
+ location = program.statements.body.last.name_loc
+ assert_equal 6, location.start_character_offset
+ assert_equal 7, location.end_character_offset
+ assert_equal 0, location.start_character_column
+ assert_equal 1, location.end_character_column
+
+ # second šŸ˜
+ location = program.statements.body.last.value.location
+ assert_equal 12, location.start_character_offset
+ assert_equal 13, location.end_character_offset
+ assert_equal 6, location.start_character_column
+ assert_equal 7, location.end_character_column
+ end
+
private
def parse_expression(source)