diff options
Diffstat (limited to 'test/ruby/test_regexp.rb')
| -rw-r--r-- | test/ruby/test_regexp.rb | 135 |
1 files changed, 129 insertions, 6 deletions
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 78269f8e9a..805c57b472 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -975,7 +975,7 @@ class TestRegexp < Test::Unit::TestCase def test_dup assert_equal(//, //.dup) - assert_raise(TypeError) { //.dup.instance_eval { initialize_copy(nil) } } + assert_raise(FrozenError) { //.dup.instance_eval { initialize_copy(/a/) } } end def test_regsub @@ -1011,6 +1011,18 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_regsub_no_memory_leak_many_captures + assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", rss: true) + code = proc do + "aaaaaaaaaaa".gsub(/(a)(b)?(c)?(d)?(e)?(f)?(g)?(h)?/, "") + end + + 1_000.times(&code) + begin; + 100_000.times(&code) + end; + end + def test_ignorecase v = assert_deprecated_warning(/variable \$= is no longer effective/) { $= } assert_equal(false, v) @@ -1036,10 +1048,12 @@ class TestRegexp < Test::Unit::TestCase [Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc| idx = key.encode(enc) pat = /#{idx}/ - test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + EnvUtil.with_default_internal(enc) do + test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + end end test.call {|m| assert_equal(/a/, m.regexp) } test.call {|m| assert_equal("abc", m.string) } @@ -1308,6 +1322,9 @@ class TestRegexp < Test::Unit::TestCase assert_match(/\A[[:space:]]+\z/, "\r\n\v\f\r\s\u0085") assert_match(/\A[[:ascii:]]+\z/, "\x00\x7F") assert_no_match(/[[:ascii:]]/, "\x80\xFF") + + assert_match(/[[:word:]]/, "\u{200C}") + assert_match(/[[:word:]]/, "\u{200D}") end def test_cclass_R @@ -1664,6 +1681,65 @@ class TestRegexp < Test::Unit::TestCase assert_equal("hoge fuga", h["body"]) end + def test_matchdata_large_capture_groups_stack + env = {"RUBY_THREAD_MACHINE_STACK_SIZE" => (256 * 1024).to_s} + assert_separately([env], <<~'RUBY') + n = 20000 + require "rbconfig/sizeof" + stack = RubyVM::DEFAULT_PARAMS[:thread_machine_stack_size] + size = RbConfig::SIZEOF["long"] + required = (n + 1) * 4 * size + if !stack || stack == 0 || stack >= required + omit "thread machine stack size not reduced (#{stack}:#{required})" + end + + inspect = Thread.new do + str = "\u{3042}" * n + m = Regexp.new("(.)" * n).match(str) + assert_not_nil(m) + assert_equal([n - 1, n], m.offset(n)) + m.inspect + end.value + + assert_include(inspect, "MatchData") + RUBY + end + + def test_match_integer_at + m = /(\d{4})(\d{2})(\d{2})/.match("20260308") + assert_equal(20260308, m.integer_at(0)) + assert_equal(2026, m.integer_at(1)) + assert_equal(3, m.integer_at(2)) + assert_equal(8, m.integer_at(3)) + assert_equal(nil, m.integer_at(4)) + assert_equal(8, m.integer_at(-1)) + assert_equal(3, m.integer_at(-2)) + assert_equal(2026, m.integer_at(-3)) + assert_equal(nil, m.integer_at(-4)) + + re = /[a-z]+|(\d+)/ + assert_equal(123, re.match("123").integer_at(1)) + assert_equal(nil, re.match("abc").integer_at(1)) + end + + def test_match_integer_at_name + m = /(?<y>\d{4})(?<m>\d{2})(?<d>\d{2})/.match("20260308") + assert_equal(2026, m.integer_at("y")) + assert_equal(3, m.integer_at("m")) + assert_equal(8, m.integer_at("d")) + end + + def test_match_integer_at_base + assert_equal(91, /\w+/.match("111").integer_at(0, 9)) + assert_equal(10_0000, /\w+/.match("10_0000").integer_at(0)) + assert_equal(0d1_0000, /\w+/.match("01_0000").integer_at(0)) + assert_equal(0o1_0000, /\w+/.match("01_0000").integer_at(0, 0)) + assert_equal(0b1_0000, /\w+/.match("0b1_0000").integer_at(0, 0)) + assert_equal(0o1_0000, /\w+/.match("0o1_0000").integer_at(0, 0)) + assert_equal(0d1_0000, /\w+/.match("0d1_0000").integer_at(0, 0)) + assert_equal(0x1_0000, /\w+/.match("0x1_0000").integer_at(0, 0)) + end + def test_regexp_popped EnvUtil.suppress_warning do assert_nothing_raised { eval("a = 1; /\#{ a }/; a") } @@ -1738,6 +1814,33 @@ class TestRegexp < Test::Unit::TestCase assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') } end + def test_quick_search + assert_match_at('(?i) *TOOKY', 'Mozilla/5.0 (Linux; Android 4.0.3; TOOKY', [[34, 40]]) # Issue #120 + end + + def test_ss_in_look_behind + assert_match_at("(?i:ss)", "ss", [[0, 2]]) + assert_match_at("(?i:ss)", "Ss", [[0, 2]]) + assert_match_at("(?i:ss)", "SS", [[0, 2]]) + assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S + assert_match_at("(?i:ss)", "s\u017f", [[0, 2]]) + assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S + assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S + assert_match_at("(?i:xssy)", "xssy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xSsy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xSSy", [[0, 4]]) + assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]]) + assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]]) + assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]]) + assert_match_at("(?i:\u00df)", "ss", [[0, 2]]) + assert_match_at("(?i:\u00df)", "SS", [[0, 2]]) + assert_match_at("(?i:[\u00df])", "ss", [[0, 2]]) + assert_match_at("(?i:[\u00df])", "SS", [[0, 2]]) + assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92 + assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]]) + end + def test_options_in_look_behind assert_nothing_raised { assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]]) @@ -1875,6 +1978,12 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_too_big_number_for_repeat_range + assert_raise_with_message(SyntaxError, /too big number for repeat range/) do + eval(%[/|{1000000}/]) + end + end + # This assertion is for porting x2() tests in testpy.py of Onigmo. def assert_match_at(re, str, positions, msg = nil) re = Regexp.new(re) unless re.is_a?(Regexp) @@ -1938,6 +2047,7 @@ class TestRegexp < Test::Unit::TestCase Regexp.timeout = 1e300 assert_equal(((1<<64)-1) / 1000000000.0, Regexp.timeout) + assert_raise(ArgumentError) { Regexp.timeout = Float::NAN } assert_raise(ArgumentError) { Regexp.timeout = 0 } assert_raise(ArgumentError) { Regexp.timeout = -1 } @@ -1979,7 +2089,7 @@ class TestRegexp < Test::Unit::TestCase end def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) - assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}", timeout: 60) global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect } expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect } @@ -2030,6 +2140,7 @@ class TestRegexp < Test::Unit::TestCase assert_equal(((1<<64)-1) / 1000000000.0, Regexp.new("foo", timeout: 1e300).timeout) + assert_raise(ArgumentError) { Regexp.new("foo", timeout: Float::NAN) } assert_raise(ArgumentError) { Regexp.new("foo", timeout: 0) } assert_raise(ArgumentError) { Regexp.new("foo", timeout: -1) } end; @@ -2253,4 +2364,16 @@ class TestRegexp < Test::Unit::TestCase assert_match(/[x#{e_acute_lower}]/i, "CAF#{e_acute_upper}", "should match e acute case insensitive") end end + + def test_too_many_range_repeat + source = '(?:foobar){0,100}' * 100000 + assert_raise(RegexpError) { Regexp.new(source) } + assert_raise(SyntaxError) { eval("/#{source}/") } + end + + def test_too_many_null_check + source = '(?:(?:foo)?|(?:bar)?)*' * 100000 + assert_raise(RegexpError) { Regexp.new(source) } + assert_raise(SyntaxError) { eval("/#{source}/") } + end end |
