diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2024-10-09 14:40:35 -0400 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2024-10-10 18:02:27 +0000 |
| commit | 7a198af7cdb437c5245ac3ab70cb66cef2002d06 (patch) | |
| tree | 1c7270805ff508b297aca4d1e5f89c6464d03838 /test/prism/ruby | |
| parent | b77ff342ccb1c57a4b6c618e4ddf6bf1fec23a1d (diff) | |
[ruby/prism] Prism::CodeUnitsCache
Calculating code unit offsets for a source can be very expensive,
especially when the source is large. This commit introduces a new
class that wraps the source and desired encoding into a cache that
reuses pre-computed offsets. It performs quite a bit better.
There are still some problems with this approach, namely character
boundaries and the fact that the cache is unbounded, but both of
these may be addressed in subsequent commits.
https://github.com/ruby/prism/commit/2e3e1a4d4d
Diffstat (limited to 'test/prism/ruby')
| -rw-r--r-- | test/prism/ruby/location_test.rb | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb index 3d3e7dd562..33f844243c 100644 --- a/test/prism/ruby/location_test.rb +++ b/test/prism/ruby/location_test.rb @@ -140,6 +140,52 @@ module Prism assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE) end + def test_cached_code_units + result = Prism.parse("š + š\nš ||= š") + + utf8_cache = result.code_units_cache(Encoding::UTF_8) + utf16_cache = result.code_units_cache(Encoding::UTF_16LE) + utf32_cache = result.code_units_cache(Encoding::UTF_32LE) + + # first š + location = result.value.statements.body.first.receiver.location + + assert_equal 0, location.cached_start_code_units_offset(utf8_cache) + assert_equal 0, location.cached_start_code_units_offset(utf16_cache) + assert_equal 0, location.cached_start_code_units_offset(utf32_cache) + + assert_equal 1, location.cached_end_code_units_offset(utf8_cache) + assert_equal 2, location.cached_end_code_units_offset(utf16_cache) + assert_equal 1, location.cached_end_code_units_offset(utf32_cache) + + assert_equal 0, location.cached_start_code_units_column(utf8_cache) + assert_equal 0, location.cached_start_code_units_column(utf16_cache) + assert_equal 0, location.cached_start_code_units_column(utf32_cache) + + assert_equal 1, location.cached_end_code_units_column(utf8_cache) + assert_equal 2, location.cached_end_code_units_column(utf16_cache) + assert_equal 1, location.cached_end_code_units_column(utf32_cache) + + # second š + location = result.value.statements.body.first.arguments.arguments.first.location + + assert_equal 4, location.cached_start_code_units_offset(utf8_cache) + assert_equal 5, location.cached_start_code_units_offset(utf16_cache) + assert_equal 4, location.cached_start_code_units_offset(utf32_cache) + + assert_equal 5, location.cached_end_code_units_offset(utf8_cache) + assert_equal 7, location.cached_end_code_units_offset(utf16_cache) + assert_equal 5, location.cached_end_code_units_offset(utf32_cache) + + assert_equal 4, location.cached_start_code_units_column(utf8_cache) + assert_equal 5, location.cached_start_code_units_column(utf16_cache) + assert_equal 4, location.cached_start_code_units_column(utf32_cache) + + assert_equal 5, location.cached_end_code_units_column(utf8_cache) + assert_equal 7, location.cached_end_code_units_column(utf16_cache) + assert_equal 5, location.cached_end_code_units_column(utf32_cache) + end + def test_code_units_binary_valid_utf8 program = Prism.parse(<<~RUBY).value # -*- encoding: binary -*- |
