summaryrefslogtreecommitdiff
path: root/test/ruby/test_regexp.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/ruby/test_regexp.rb')
-rw-r--r--test/ruby/test_regexp.rb135
1 files changed, 129 insertions, 6 deletions
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 78269f8e9a..805c57b472 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -975,7 +975,7 @@ class TestRegexp < Test::Unit::TestCase
def test_dup
assert_equal(//, //.dup)
- assert_raise(TypeError) { //.dup.instance_eval { initialize_copy(nil) } }
+ assert_raise(FrozenError) { //.dup.instance_eval { initialize_copy(/a/) } }
end
def test_regsub
@@ -1011,6 +1011,18 @@ class TestRegexp < Test::Unit::TestCase
end;
end
+ def test_regsub_no_memory_leak_many_captures
+ assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", rss: true)
+ code = proc do
+ "aaaaaaaaaaa".gsub(/(a)(b)?(c)?(d)?(e)?(f)?(g)?(h)?/, "")
+ end
+
+ 1_000.times(&code)
+ begin;
+ 100_000.times(&code)
+ end;
+ end
+
def test_ignorecase
v = assert_deprecated_warning(/variable \$= is no longer effective/) { $= }
assert_equal(false, v)
@@ -1036,10 +1048,12 @@ class TestRegexp < Test::Unit::TestCase
[Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
idx = key.encode(enc)
pat = /#{idx}/
- test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} }
- test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} }
- test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} }
- test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} }
+ EnvUtil.with_default_internal(enc) do
+ test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} }
+ test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} }
+ test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} }
+ test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} }
+ end
end
test.call {|m| assert_equal(/a/, m.regexp) }
test.call {|m| assert_equal("abc", m.string) }
@@ -1308,6 +1322,9 @@ class TestRegexp < Test::Unit::TestCase
assert_match(/\A[[:space:]]+\z/, "\r\n\v\f\r\s\u0085")
assert_match(/\A[[:ascii:]]+\z/, "\x00\x7F")
assert_no_match(/[[:ascii:]]/, "\x80\xFF")
+
+ assert_match(/[[:word:]]/, "\u{200C}")
+ assert_match(/[[:word:]]/, "\u{200D}")
end
def test_cclass_R
@@ -1664,6 +1681,65 @@ class TestRegexp < Test::Unit::TestCase
assert_equal("hoge fuga", h["body"])
end
+ def test_matchdata_large_capture_groups_stack
+ env = {"RUBY_THREAD_MACHINE_STACK_SIZE" => (256 * 1024).to_s}
+ assert_separately([env], <<~'RUBY')
+ n = 20000
+ require "rbconfig/sizeof"
+ stack = RubyVM::DEFAULT_PARAMS[:thread_machine_stack_size]
+ size = RbConfig::SIZEOF["long"]
+ required = (n + 1) * 4 * size
+ if !stack || stack == 0 || stack >= required
+ omit "thread machine stack size not reduced (#{stack}:#{required})"
+ end
+
+ inspect = Thread.new do
+ str = "\u{3042}" * n
+ m = Regexp.new("(.)" * n).match(str)
+ assert_not_nil(m)
+ assert_equal([n - 1, n], m.offset(n))
+ m.inspect
+ end.value
+
+ assert_include(inspect, "MatchData")
+ RUBY
+ end
+
+ def test_match_integer_at
+ m = /(\d{4})(\d{2})(\d{2})/.match("20260308")
+ assert_equal(20260308, m.integer_at(0))
+ assert_equal(2026, m.integer_at(1))
+ assert_equal(3, m.integer_at(2))
+ assert_equal(8, m.integer_at(3))
+ assert_equal(nil, m.integer_at(4))
+ assert_equal(8, m.integer_at(-1))
+ assert_equal(3, m.integer_at(-2))
+ assert_equal(2026, m.integer_at(-3))
+ assert_equal(nil, m.integer_at(-4))
+
+ re = /[a-z]+|(\d+)/
+ assert_equal(123, re.match("123").integer_at(1))
+ assert_equal(nil, re.match("abc").integer_at(1))
+ end
+
+ def test_match_integer_at_name
+ m = /(?<y>\d{4})(?<m>\d{2})(?<d>\d{2})/.match("20260308")
+ assert_equal(2026, m.integer_at("y"))
+ assert_equal(3, m.integer_at("m"))
+ assert_equal(8, m.integer_at("d"))
+ end
+
+ def test_match_integer_at_base
+ assert_equal(91, /\w+/.match("111").integer_at(0, 9))
+ assert_equal(10_0000, /\w+/.match("10_0000").integer_at(0))
+ assert_equal(0d1_0000, /\w+/.match("01_0000").integer_at(0))
+ assert_equal(0o1_0000, /\w+/.match("01_0000").integer_at(0, 0))
+ assert_equal(0b1_0000, /\w+/.match("0b1_0000").integer_at(0, 0))
+ assert_equal(0o1_0000, /\w+/.match("0o1_0000").integer_at(0, 0))
+ assert_equal(0d1_0000, /\w+/.match("0d1_0000").integer_at(0, 0))
+ assert_equal(0x1_0000, /\w+/.match("0x1_0000").integer_at(0, 0))
+ end
+
def test_regexp_popped
EnvUtil.suppress_warning do
assert_nothing_raised { eval("a = 1; /\#{ a }/; a") }
@@ -1738,6 +1814,33 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
end
+ def test_quick_search
+ assert_match_at('(?i) *TOOKY', 'Mozilla/5.0 (Linux; Android 4.0.3; TOOKY', [[34, 40]]) # Issue #120
+ end
+
+ def test_ss_in_look_behind
+ assert_match_at("(?i:ss)", "ss", [[0, 2]])
+ assert_match_at("(?i:ss)", "Ss", [[0, 2]])
+ assert_match_at("(?i:ss)", "SS", [[0, 2]])
+ assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S
+ assert_match_at("(?i:ss)", "s\u017f", [[0, 2]])
+ assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S
+ assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S
+ assert_match_at("(?i:xssy)", "xssy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xSsy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xSSy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]])
+ assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]])
+ assert_match_at("(?i:\u00df)", "ss", [[0, 2]])
+ assert_match_at("(?i:\u00df)", "SS", [[0, 2]])
+ assert_match_at("(?i:[\u00df])", "ss", [[0, 2]])
+ assert_match_at("(?i:[\u00df])", "SS", [[0, 2]])
+ assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92
+ assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]])
+ end
+
def test_options_in_look_behind
assert_nothing_raised {
assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])
@@ -1875,6 +1978,12 @@ class TestRegexp < Test::Unit::TestCase
end;
end
+ def test_too_big_number_for_repeat_range
+ assert_raise_with_message(SyntaxError, /too big number for repeat range/) do
+ eval(%[/|{1000000}/])
+ end
+ end
+
# This assertion is for porting x2() tests in testpy.py of Onigmo.
def assert_match_at(re, str, positions, msg = nil)
re = Regexp.new(re) unless re.is_a?(Regexp)
@@ -1938,6 +2047,7 @@ class TestRegexp < Test::Unit::TestCase
Regexp.timeout = 1e300
assert_equal(((1<<64)-1) / 1000000000.0, Regexp.timeout)
+ assert_raise(ArgumentError) { Regexp.timeout = Float::NAN }
assert_raise(ArgumentError) { Regexp.timeout = 0 }
assert_raise(ArgumentError) { Regexp.timeout = -1 }
@@ -1979,7 +2089,7 @@ class TestRegexp < Test::Unit::TestCase
end
def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout)
- assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
+ assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}", timeout: 60)
global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect }
per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect }
expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect }
@@ -2030,6 +2140,7 @@ class TestRegexp < Test::Unit::TestCase
assert_equal(((1<<64)-1) / 1000000000.0, Regexp.new("foo", timeout: 1e300).timeout)
+ assert_raise(ArgumentError) { Regexp.new("foo", timeout: Float::NAN) }
assert_raise(ArgumentError) { Regexp.new("foo", timeout: 0) }
assert_raise(ArgumentError) { Regexp.new("foo", timeout: -1) }
end;
@@ -2253,4 +2364,16 @@ class TestRegexp < Test::Unit::TestCase
assert_match(/[x#{e_acute_lower}]/i, "CAF#{e_acute_upper}", "should match e acute case insensitive")
end
end
+
+ def test_too_many_range_repeat
+ source = '(?:foobar){0,100}' * 100000
+ assert_raise(RegexpError) { Regexp.new(source) }
+ assert_raise(SyntaxError) { eval("/#{source}/") }
+ end
+
+ def test_too_many_null_check
+ source = '(?:(?:foo)?|(?:bar)?)*' * 100000
+ assert_raise(RegexpError) { Regexp.new(source) }
+ assert_raise(SyntaxError) { eval("/#{source}/") }
+ end
end