summaryrefslogtreecommitdiff
path: root/test/prism/ruby
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-10-09 14:40:35 -0400
committergit <svn-admin@ruby-lang.org>2024-10-10 18:02:27 +0000
commit7a198af7cdb437c5245ac3ab70cb66cef2002d06 (patch)
tree1c7270805ff508b297aca4d1e5f89c6464d03838 /test/prism/ruby
parentb77ff342ccb1c57a4b6c618e4ddf6bf1fec23a1d (diff)
[ruby/prism] Prism::CodeUnitsCache
Calculating code unit offsets for a source can be very expensive, especially when the source is large. This commit introduces a new class that wraps the source and desired encoding into a cache that reuses pre-computed offsets. It performs quite a bit better. There are still some problems with this approach, namely character boundaries and the fact that the cache is unbounded, but both of these may be addressed in subsequent commits. https://github.com/ruby/prism/commit/2e3e1a4d4d
Diffstat (limited to 'test/prism/ruby')
-rw-r--r--test/prism/ruby/location_test.rb46
1 files changed, 46 insertions, 0 deletions
diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb
index 3d3e7dd562..33f844243c 100644
--- a/test/prism/ruby/location_test.rb
+++ b/test/prism/ruby/location_test.rb
@@ -140,6 +140,52 @@ module Prism
assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
end
+ def test_cached_code_units
+ result = Prism.parse("šŸ˜€ + šŸ˜€\nšŸ˜ ||= šŸ˜")
+
+ utf8_cache = result.code_units_cache(Encoding::UTF_8)
+ utf16_cache = result.code_units_cache(Encoding::UTF_16LE)
+ utf32_cache = result.code_units_cache(Encoding::UTF_32LE)
+
+ # first šŸ˜€
+ location = result.value.statements.body.first.receiver.location
+
+ assert_equal 0, location.cached_start_code_units_offset(utf8_cache)
+ assert_equal 0, location.cached_start_code_units_offset(utf16_cache)
+ assert_equal 0, location.cached_start_code_units_offset(utf32_cache)
+
+ assert_equal 1, location.cached_end_code_units_offset(utf8_cache)
+ assert_equal 2, location.cached_end_code_units_offset(utf16_cache)
+ assert_equal 1, location.cached_end_code_units_offset(utf32_cache)
+
+ assert_equal 0, location.cached_start_code_units_column(utf8_cache)
+ assert_equal 0, location.cached_start_code_units_column(utf16_cache)
+ assert_equal 0, location.cached_start_code_units_column(utf32_cache)
+
+ assert_equal 1, location.cached_end_code_units_column(utf8_cache)
+ assert_equal 2, location.cached_end_code_units_column(utf16_cache)
+ assert_equal 1, location.cached_end_code_units_column(utf32_cache)
+
+ # second šŸ˜€
+ location = result.value.statements.body.first.arguments.arguments.first.location
+
+ assert_equal 4, location.cached_start_code_units_offset(utf8_cache)
+ assert_equal 5, location.cached_start_code_units_offset(utf16_cache)
+ assert_equal 4, location.cached_start_code_units_offset(utf32_cache)
+
+ assert_equal 5, location.cached_end_code_units_offset(utf8_cache)
+ assert_equal 7, location.cached_end_code_units_offset(utf16_cache)
+ assert_equal 5, location.cached_end_code_units_offset(utf32_cache)
+
+ assert_equal 4, location.cached_start_code_units_column(utf8_cache)
+ assert_equal 5, location.cached_start_code_units_column(utf16_cache)
+ assert_equal 4, location.cached_start_code_units_column(utf32_cache)
+
+ assert_equal 5, location.cached_end_code_units_column(utf8_cache)
+ assert_equal 7, location.cached_end_code_units_column(utf16_cache)
+ assert_equal 5, location.cached_end_code_units_column(utf32_cache)
+ end
+
def test_code_units_binary_valid_utf8
program = Prism.parse(<<~RUBY).value
# -*- encoding: binary -*-