diff options
| author | Shugo Maeda <shugo@ruby-lang.org> | 2024-06-12 11:35:53 +0900 |
|---|---|---|
| committer | Shugo Maeda <shugo.maeda@gmail.com> | 2024-07-16 14:48:06 +0900 |
| commit | e048a073a3cba04576b8f6a1673c283e4e20cd90 (patch) | |
| tree | f8eb52ab8d61192f2afb29c815db8deeddc7d94b | |
| parent | a887b41875d00637064294be7059335599937f17 (diff) | |
Add MatchData#bytebegin and MatchData#byteend
These methods return the byte-based offset of the beginning or end of the specified match.
[Feature #20576]
| -rw-r--r-- | doc/matchdata/bytebegin.rdoc | 30 | ||||
| -rw-r--r-- | doc/matchdata/byteend.rdoc | 30 | ||||
| -rw-r--r-- | re.c | 50 | ||||
| -rw-r--r-- | test/ruby/test_regexp.rb | 10 |
4 files changed, 120 insertions, 0 deletions
diff --git a/doc/matchdata/bytebegin.rdoc b/doc/matchdata/bytebegin.rdoc new file mode 100644 index 0000000000..5b40a7ef73 --- /dev/null +++ b/doc/matchdata/bytebegin.rdoc @@ -0,0 +1,30 @@ +Returns the offset (in bytes) of the beginning of the specified match. + +When non-negative integer argument +n+ is given, +returns the offset of the beginning of the <tt>n</tt>th match: + + m = /(.)(.)(\d+)(\d)/.match("THX1138.") + # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> + m[0] # => "HX1138" + m.bytebegin(0) # => 1 + m[3] # => "113" + m.bytebegin(3) # => 3 + + m = /(т)(е)(с)/.match('тест') + # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> + m[0] # => "тес" + m.bytebegin(0) # => 0 + m[3] # => "с" + m.bytebegin(3) # => 4 + +When string or symbol argument +name+ is given, +returns the offset of the beginning for the named match: + + m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") + # => #<MatchData "hog" foo:"h" bar:"g"> + m[:foo] # => "h" + m.bytebegin('foo') # => 0 + m[:bar] # => "g" + m.bytebegin(:bar) # => 2 + +Related: MatchData#byteend, MatchData#byteoffset. diff --git a/doc/matchdata/byteend.rdoc b/doc/matchdata/byteend.rdoc new file mode 100644 index 0000000000..eb57664022 --- /dev/null +++ b/doc/matchdata/byteend.rdoc @@ -0,0 +1,30 @@ +Returns the offset (in bytes) of the end of the specified match. + +When non-negative integer argument +n+ is given, +returns the offset of the end of the <tt>n</tt>th match: + + m = /(.)(.)(\d+)(\d)/.match("THX1138.") + # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> + m[0] # => "HX1138" + m.byteend(0) # => 7 + m[3] # => "113" + m.byteend(3) # => 6 + + m = /(т)(е)(с)/.match('тест') + # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> + m[0] # => "тес" + m.byteend(0) # => 6 + m[3] # => "с" + m.byteend(3) # => 6 + +When string or symbol argument +name+ is given, +returns the offset of the end for the named match: + + m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") + # => #<MatchData "hog" foo:"h" bar:"g"> + m[:foo] # => "h" + m.byteend('foo') # => 1 + m[:bar] # => "g" + m.byteend(:bar) # => 3 + +Related: MatchData#bytebegin, MatchData#byteoffset. @@ -1298,6 +1298,54 @@ match_byteoffset(VALUE match, VALUE n) /* * call-seq: + * bytebegin(n) -> integer + * bytebegin(name) -> integer + * + * :include: doc/matchdata/bytebegin.rdoc + * + */ + +static VALUE +match_bytebegin(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + backref_number_check(regs, i); + + if (BEG(i) < 0) + return Qnil; + return LONG2NUM(BEG(i)); +} + + +/* + * call-seq: + * byteend(n) -> integer + * byteend(name) -> integer + * + * :include: doc/matchdata/byteend.rdoc + * + */ + +static VALUE +match_byteend(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + backref_number_check(regs, i); + + if (BEG(i) < 0) + return Qnil; + return LONG2NUM(END(i)); +} + + +/* + * call-seq: * begin(n) -> integer * begin(name) -> integer * @@ -4842,6 +4890,8 @@ Init_Regexp(void) rb_define_method(rb_cMatch, "length", match_size, 0); rb_define_method(rb_cMatch, "offset", match_offset, 1); rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1); + rb_define_method(rb_cMatch, "bytebegin", match_bytebegin, 1); + rb_define_method(rb_cMatch, "byteend", match_byteend, 1); rb_define_method(rb_cMatch, "begin", match_begin, 1); rb_define_method(rb_cMatch, "end", match_end, 1); rb_define_method(rb_cMatch, "match", match_nth, 1); diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 828117f516..04e24b2ded 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -559,16 +559,26 @@ class TestRegexp < Test::Unit::TestCase assert_raise(IndexError) { m.byteoffset(2) } assert_raise(IndexError) { m.begin(2) } assert_raise(IndexError) { m.end(2) } + assert_raise(IndexError) { m.bytebegin(2) } + assert_raise(IndexError) { m.byteend(2) } m = /(?<x>q..)?/.match("foobarbaz") assert_equal([nil, nil], m.byteoffset("x")) assert_equal(nil, m.begin("x")) assert_equal(nil, m.end("x")) + assert_equal(nil, m.bytebegin("x")) + assert_equal(nil, m.byteend("x")) m = /\A\u3042(.)(.)?(.)\z/.match("\u3042\u3043\u3044") assert_equal([3, 6], m.byteoffset(1)) + assert_equal(3, m.bytebegin(1)) + assert_equal(6, m.byteend(1)) assert_equal([nil, nil], m.byteoffset(2)) + assert_equal(nil, m.bytebegin(2)) + assert_equal(nil, m.byteend(2)) assert_equal([6, 9], m.byteoffset(3)) + assert_equal(6, m.bytebegin(3)) + assert_equal(9, m.byteend(3)) end def test_match_to_s |
