From 7a198af7cdb437c5245ac3ab70cb66cef2002d06 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 9 Oct 2024 14:40:35 -0400 Subject: [ruby/prism] Prism::CodeUnitsCache Calculating code unit offsets for a source can be very expensive, especially when the source is large. This commit introduces a new class that wraps the source and desired encoding into a cache that reuses pre-computed offsets. It performs quite a bit better. There are still some problems with this approach, namely character boundaries and the fact that the cache is unbounded, but both of these may be addressed in subsequent commits. https://github.com/ruby/prism/commit/2e3e1a4d4d --- test/prism/ruby/location_test.rb | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'test') diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb index 3d3e7dd562..33f844243c 100644 --- a/test/prism/ruby/location_test.rb +++ b/test/prism/ruby/location_test.rb @@ -140,6 +140,52 @@ module Prism assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE) end + def test_cached_code_units + result = Prism.parse("šŸ˜€ + šŸ˜€\nšŸ˜ ||= šŸ˜") + + utf8_cache = result.code_units_cache(Encoding::UTF_8) + utf16_cache = result.code_units_cache(Encoding::UTF_16LE) + utf32_cache = result.code_units_cache(Encoding::UTF_32LE) + + # first šŸ˜€ + location = result.value.statements.body.first.receiver.location + + assert_equal 0, location.cached_start_code_units_offset(utf8_cache) + assert_equal 0, location.cached_start_code_units_offset(utf16_cache) + assert_equal 0, location.cached_start_code_units_offset(utf32_cache) + + assert_equal 1, location.cached_end_code_units_offset(utf8_cache) + assert_equal 2, location.cached_end_code_units_offset(utf16_cache) + assert_equal 1, location.cached_end_code_units_offset(utf32_cache) + + assert_equal 0, location.cached_start_code_units_column(utf8_cache) + assert_equal 0, location.cached_start_code_units_column(utf16_cache) + assert_equal 0, location.cached_start_code_units_column(utf32_cache) + + assert_equal 1, location.cached_end_code_units_column(utf8_cache) + assert_equal 2, location.cached_end_code_units_column(utf16_cache) + assert_equal 1, location.cached_end_code_units_column(utf32_cache) + + # second šŸ˜€ + location = result.value.statements.body.first.arguments.arguments.first.location + + assert_equal 4, location.cached_start_code_units_offset(utf8_cache) + assert_equal 5, location.cached_start_code_units_offset(utf16_cache) + assert_equal 4, location.cached_start_code_units_offset(utf32_cache) + + assert_equal 5, location.cached_end_code_units_offset(utf8_cache) + assert_equal 7, location.cached_end_code_units_offset(utf16_cache) + assert_equal 5, location.cached_end_code_units_offset(utf32_cache) + + assert_equal 4, location.cached_start_code_units_column(utf8_cache) + assert_equal 5, location.cached_start_code_units_column(utf16_cache) + assert_equal 4, location.cached_start_code_units_column(utf32_cache) + + assert_equal 5, location.cached_end_code_units_column(utf8_cache) + assert_equal 7, location.cached_end_code_units_column(utf16_cache) + assert_equal 5, location.cached_end_code_units_column(utf32_cache) + end + def test_code_units_binary_valid_utf8 program = Prism.parse(<<~RUBY).value # -*- encoding: binary -*- -- cgit v1.2.3