summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Patterson <tenderlove@ruby-lang.org>2024-02-25 16:45:08 -0800
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2024-02-26 15:54:54 +0900
commit164e464b042239cdbd14d3751a7f907754d580ce (patch)
tree6e4afbb42ab04688a0baa125ba3831bbc568c7ef
parent7176c186d0d794bf89eac0645205c3f3d2d40f2d (diff)
[ruby/strscan] Add a method for peeking and reading bytes as
integers (https://github.com/ruby/strscan/pull/89) This commit adds `scan_byte` and `peek_byte`. `scan_byte` will scan the current byte, return it as an integer, and advance the cursor. `peek_byte` will return the current byte as an integer without advancing the cursor. Currently `StringScanner#get_byte` returns a string, but I want to get the current byte without allocating a string. I think this will help with writing high performance lexers. --------- https://github.com/ruby/strscan/commit/873aba2e5d Co-authored-by: Sutou Kouhei <kou@clear-code.com>
-rw-r--r--ext/strscan/strscan.c55
-rw-r--r--test/strscan/test_stringscanner.rb23
2 files changed, 78 insertions, 0 deletions
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index bed1c87cdc..70a3ce5260 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -903,6 +903,57 @@ strscan_getch(VALUE self)
}
/*
+ * Scans one byte and returns it as an integer.
+ * This method is not multibyte character sensitive.
+ * See also: #getch.
+ *
+ * s = StringScanner.new('ab')
+ * s.scan_byte # => 97
+ * s.scan_byte # => 98
+ * s.scan_byte # => nil
+ *
+ * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
+ * s.scan_byte # => 0xA4
+ * s.scan_byte # => 0xA2
+ * s.scan_byte # => nil
+ */
+static VALUE
+strscan_scan_byte(VALUE self)
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ CLEAR_MATCH_STATUS(p);
+ if (EOS_P(p))
+ return Qnil;
+
+ VALUE byte = INT2FIX((unsigned char)*CURPTR(p));
+ p->prev = p->curr;
+ p->curr++;
+ MATCHED(p);
+ adjust_registers_to_matched(p);
+ return byte;
+}
+
+/*
+ * Peeks at the current byte and returns it as an integer.
+ *
+ * s = StringScanner.new('ab')
+ * s.peek_byte # => 97
+ */
+static VALUE
+strscan_peek_byte(VALUE self)
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ if (EOS_P(p))
+ return Qnil;
+
+ return INT2FIX((unsigned char)*CURPTR(p));
+}
+
+/*
* Scans one byte and returns it.
* This method is not multibyte character sensitive.
* See also: #getch.
@@ -1605,6 +1656,7 @@ strscan_named_captures(VALUE self)
*
* - #getch
* - #get_byte
+ * - #scan_byte
* - #scan
* - #scan_until
* - #skip
@@ -1617,6 +1669,7 @@ strscan_named_captures(VALUE self)
* - #exist?
* - #match?
* - #peek
+ * - #peek_byte
*
* === Finding Where we Are
*
@@ -1708,7 +1761,9 @@ Init_strscan(void)
rb_define_method(StringScanner, "getch", strscan_getch, 0);
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
+ rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
rb_define_method(StringScanner, "peek", strscan_peek, 1);
+ rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb
index 2a127a773a..2884b8ef05 100644
--- a/test/strscan/test_stringscanner.rb
+++ b/test/strscan/test_stringscanner.rb
@@ -8,6 +8,29 @@ require 'strscan'
require 'test/unit'
module StringScannerTests
+ def test_peek_byte
+ s = create_string_scanner('ab')
+ assert_equal 97, s.peek_byte
+ assert_equal 97, s.scan_byte
+ assert_equal 98, s.peek_byte
+ assert_equal 98, s.scan_byte
+ assert_nil s.peek_byte
+ assert_nil s.scan_byte
+ end
+
+ def test_scan_byte
+ s = create_string_scanner('ab')
+ assert_equal 97, s.scan_byte
+ assert_equal 98, s.scan_byte
+ assert_nil s.scan_byte
+
+ str = "\244\242".dup.force_encoding("euc-jp")
+ s = StringScanner.new(str)
+ assert_equal str.getbyte(s.pos), s.scan_byte
+ assert_equal str.getbyte(s.pos), s.scan_byte
+ assert_nil s.scan_byte
+ end
+
def test_s_new
s = create_string_scanner('test string')
assert_instance_of StringScanner, s