summaryrefslogtreecommitdiff
path: root/ext/strscan
diff options
context:
space:
mode:
authorBenoit Daloze <eregontp@gmail.com>2026-03-28 22:48:38 +0100
committergit <svn-admin@ruby-lang.org>2026-03-28 21:49:07 +0000
commitcbcd75224ea0dc979cf4a30be65e950029b338f5 (patch)
tree69ed868fb06a860888391f87592679302fa4d60f /ext/strscan
parent64e103651259836837595771ed163ba71f0f1d43 (diff)
[ruby/strscan] Implement StringScanner for TruffleRuby in pure Ruby
(https://github.com/ruby/strscan/pull/195) * Fixes https://github.com/ruby/strscan/issues/194. * This is a fresh new implementation from scratch, contributed directly to ruby/strscan, under BSD-2-Clause. This was implemented using the strscan tests, specs from ruby/spec and the documentation (https://docs.ruby-lang.org/en/master/StringScanner.html). * lib/strscan.rb now handles the loading for all implementations. * Test on TruffleRuby 33 to ensure it keeps working on older TruffleRuby releases, which do not have the necessary Primitives used by this new implementation. --------- https://github.com/ruby/strscan/commit/3908e0ddb0 Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Diffstat (limited to 'ext/strscan')
-rw-r--r--ext/strscan/lib/strscan.rb20
-rw-r--r--ext/strscan/lib/strscan/truffleruby.rb421
-rw-r--r--ext/strscan/strscan.c2
-rw-r--r--ext/strscan/strscan.gemspec12
4 files changed, 447 insertions, 8 deletions
diff --git a/ext/strscan/lib/strscan.rb b/ext/strscan/lib/strscan.rb
new file mode 100644
index 0000000000..4e8910d141
--- /dev/null
+++ b/ext/strscan/lib/strscan.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+case RUBY_ENGINE
+when 'ruby'
+ require 'strscan.so'
+ require_relative 'strscan/strscan'
+when 'jruby'
+ require 'strscan.jar'
+ JRuby::Util.load_ext('org.jruby.ext.strscan.StringScannerLibrary')
+ require_relative 'strscan/strscan'
+when 'truffleruby'
+ if RUBY_ENGINE_VERSION.to_i >= 34
+ require 'strscan/truffleruby'
+ else
+ $LOAD_PATH.delete __dir__
+ require 'strscan'
+ end
+else
+ raise NotImplementedError, "Unknown Ruby: #{RUBY_ENGINE}"
+end
diff --git a/ext/strscan/lib/strscan/truffleruby.rb b/ext/strscan/lib/strscan/truffleruby.rb
new file mode 100644
index 0000000000..b09fed661c
--- /dev/null
+++ b/ext/strscan/lib/strscan/truffleruby.rb
@@ -0,0 +1,421 @@
+# frozen_string_literal: true
+# truffleruby_primitives: true
+
+class StringScanner
+ class Error < StandardError
+ end
+ unless ::Object.const_defined?(:ScanError)
+ ::Object::ScanError = Error
+ ::Object.deprecate_constant :ScanError
+ end
+
+ Version = '3.1.8'
+ Id = '$Id$'
+
+ def self.must_C_version = self
+
+ attr_reader :string, :pos
+ alias_method :pointer, :pos
+
+ def initialize(string, options = nil, fixed_anchor: false)
+ @string = Primitive.convert_with_to_str(string)
+ @fixed_anchor = Primitive.as_boolean(fixed_anchor)
+ @pos = 0
+ @last_match = nil
+ end
+
+ def inspect
+ return "#<#{Primitive.class(self)} (uninitialized)>" unless @string
+ return "#<#{Primitive.class(self)} fin>" if eos?
+
+ before =
+ if @pos == 0
+ ''
+ elsif @pos < 5
+ "#{@string.byteslice(0, @pos).inspect} "
+ else
+ "#{('...' + @string.byteslice(@pos - 5, 5)).inspect} "
+ end
+
+ after =
+ if @pos >= @string.bytesize - 5
+ " #{@string.byteslice(@pos..).inspect}"
+ else
+ " #{(@string.byteslice(@pos, 5) + '...').inspect}"
+ end
+
+ "#<#{Primitive.class(self)} #{@pos}/#{@string.bytesize} #{before}@#{after}>"
+ end
+
+ def pos=(new_pos)
+ if new_pos < 0
+ new_pos += @string.bytesize
+ end
+ raise RangeError, 'index out of range' if new_pos < 0
+ raise RangeError, 'index out of range' if new_pos > @string.bytesize
+ @pos = new_pos
+ end
+ alias_method :pointer=, :pos=
+
+ def charpos = Primitive.string_byte_index_to_character_index(@string, @pos)
+
+ def rest = @string.byteslice(@pos, @string.bytesize)
+
+ def rest_size = @string.bytesize - @pos
+
+ def concat(more_string)
+ @string.concat(Primitive.convert_with_to_str(more_string))
+ self
+ end
+ alias_method :<<, :concat
+
+ def string=(other_string)
+ @string = Primitive.convert_with_to_str(other_string)
+ @pos = 0
+ @last_match = nil
+ other_string
+ end
+
+ def reset
+ @pos = 0
+ @last_match = nil
+ self
+ end
+
+ def terminate
+ @pos = @string.bytesize
+ @last_match = nil
+ self
+ end
+
+ def unscan
+ if @last_match
+ @pos = Primitive.match_data_byte_begin(@last_match, 0)
+ @last_match = nil
+ self
+ else
+ raise Error, 'unscan failed: previous match record not exist'
+ end
+ end
+
+ # Predicates
+
+ def fixed_anchor? = @fixed_anchor
+
+ def beginning_of_line?
+ @pos == 0 or @string.byteslice(@pos-1, 1) == "\n"
+ end
+ alias_method :bol?, :beginning_of_line?
+
+ def eos?
+ @pos >= @string.bytesize
+ end
+
+ def rest?
+ !eos?
+ end
+
+ # MatchData-like methods
+
+ def matched? = !Primitive.nil?(@last_match)
+
+ def matched = @last_match&.to_s
+
+ def [](group)
+ raise TypeError, 'no implicit conversion of Range into Integer' if Primitive.is_a?(group, Range)
+
+ if @last_match
+ @last_match[group]
+ else
+ nil
+ end
+ end
+
+ def values_at(*groups) = @last_match&.values_at(*groups)
+
+ def captures = @last_match&.captures
+
+ def size = @last_match&.size
+
+ def pre_match = @last_match&.pre_match
+
+ def post_match = @last_match&.post_match
+
+ def named_captures = @last_match&.named_captures || {}
+
+ def matched_size
+ if @last_match
+ Primitive.match_data_byte_end(@last_match, 0) - Primitive.match_data_byte_begin(@last_match, 0)
+ end
+ end
+
+ # Scan-like methods
+
+ def peek(length)
+ raise ArgumentError, 'negative string size (or size too big)' if length < 0
+ @string.byteslice(@pos, length)
+ end
+
+ def peek_byte = @string.getbyte(@pos)
+
+ def get_byte
+ return nil if eos?
+
+ byte = @string.byteslice(@pos, 1)
+ to = @pos + 1
+ @last_match = Primitive.matchdata_create_single_group(byte, @string, @pos, to)
+ @pos = to
+ byte
+ end
+
+ def scan_byte
+ return nil if eos?
+
+ byte_value = @string.getbyte(@pos)
+ get_byte
+ byte_value
+ end
+
+ def getch = scan(/./m)
+
+ def scan_integer(base: 10)
+ case base
+ when 10
+ scan(/[+-]?\d+/)&.to_i
+ when 16
+ scan(/[+-]?(0x)?[0-9a-fA-F]+/)&.to_i(16)
+ else
+ raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
+ end
+ end
+
+ def scan_full(pattern, advance_pointer, return_string)
+ if advance_pointer
+ if return_string
+ scan(pattern)
+ else
+ skip(pattern)
+ end
+ else
+ if return_string
+ check(pattern)
+ else
+ match?(pattern)
+ end
+ end
+ end
+ Primitive.always_split self, :scan_full
+
+ def search_full(pattern, advance_pointer, return_string)
+ if advance_pointer
+ if return_string
+ scan_until(pattern)
+ else
+ skip_until(pattern)
+ end
+ else
+ if return_string
+ check_until(pattern)
+ else
+ exist?(pattern)
+ end
+ end
+ end
+ Primitive.always_split self, :search_full
+
+ # Keep the following 8 methods in sync, they are small variations of one another
+
+ # Matches at start methods
+
+ # Matches at start, returns matched string, does not advance position
+ def check(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_match_at_start(pattern, @string, prev, start)
+ if @last_match
+ @last_match.to_s
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if rest.start_with?(pattern)
+ to = prev + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, prev, to)
+ pattern
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :check
+
+ # Matches at start, returns matched string, advances position
+ def scan(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_match_at_start(pattern, @string, prev, start)
+ if @last_match
+ @pos = Primitive.match_data_byte_end(@last_match, 0)
+ @last_match.to_s
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if rest.start_with?(pattern)
+ to = prev + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, prev, to)
+ @pos = to
+ pattern
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :scan
+
+ # Matches at start, returns matched bytesize, does not advance position
+ def match?(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_match_at_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ to - prev
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if rest.start_with?(pattern)
+ to = prev + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, prev, to)
+ to - prev
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :match?
+
+ # Matches at start, returns matched bytesize, advances position
+ def skip(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_match_at_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ @pos = to
+ to - prev
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if rest.start_with?(pattern)
+ to = prev + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, prev, to)
+ @pos = to
+ to - prev
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :skip
+
+ # Matches anywhere methods
+
+ # Matches anywhere, returns matched string, does not advance position
+ def check_until(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_search_with_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ @string.byteslice(prev, to - prev)
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if from = @string.byteindex(pattern, prev)
+ to = from + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, from, to)
+ @string.byteslice(prev, to - prev)
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :check_until
+
+ # Matches anywhere, returns matched string, advances position
+ def scan_until(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_search_with_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ @pos = to
+ @string.byteslice(prev, to - prev)
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if from = @string.byteindex(pattern, prev)
+ to = from + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, from, to)
+ @pos = to
+ @string.byteslice(prev, to - prev)
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :scan_until
+
+ # Matches anywhere, returns matched bytesize, does not advance position
+ def exist?(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_search_with_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ to - prev
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if from = @string.byteindex(pattern, prev)
+ to = from + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, from, to)
+ to - prev
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :exist?
+
+ # Matches anywhere, returns matched bytesize, advances position
+ def skip_until(pattern)
+ prev = @pos
+ if Primitive.is_a?(pattern, Regexp)
+ start = @fixed_anchor ? 0 : prev
+ @last_match = Primitive.regexp_search_with_start(pattern, @string, prev, start)
+ if @last_match
+ to = Primitive.match_data_byte_end(@last_match, 0)
+ @pos = to
+ to - prev
+ end
+ else
+ pattern = Primitive.convert_with_to_str(pattern)
+ if from = @string.byteindex(pattern, prev)
+ to = from + pattern.bytesize
+ @last_match = Primitive.matchdata_create_single_group(pattern, @string, from, to)
+ @pos = to
+ to - prev
+ else
+ @last_match = nil
+ end
+ end
+ end
+ Primitive.always_split self, :skip_until
+end
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index d00c66699a..93c0c04631 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -2300,6 +2300,4 @@ Init_strscan(void)
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
-
- rb_require("strscan/strscan");
}
diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec
index 47180bb8d8..a51285fa7e 100644
--- a/ext/strscan/strscan.gemspec
+++ b/ext/strscan/strscan.gemspec
@@ -16,18 +16,18 @@ Gem::Specification.new do |s|
s.summary = "Provides lexical scanning operations on a String."
s.description = "Provides lexical scanning operations on a String."
- files = [
- "COPYING",
- "LICENSE.txt",
- "lib/strscan/strscan.rb"
+ files = %w[
+ COPYING
+ LICENSE.txt
+ lib/strscan.rb
+ lib/strscan/strscan.rb
+ lib/strscan/truffleruby.rb
]
s.require_paths = %w{lib}
if RUBY_ENGINE == "jruby"
files << "lib/strscan.jar"
- files << "ext/jruby/lib/strscan.rb"
- s.require_paths += %w{ext/jruby/lib}
s.platform = "java"
else
files << "ext/strscan/extconf.rb"