diff options
Diffstat (limited to 'test/ruby/test_regexp.rb')
| -rw-r--r-- | test/ruby/test_regexp.rb | 236 |
1 files changed, 230 insertions, 6 deletions
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index b1ae039009..805c57b472 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -975,7 +975,7 @@ class TestRegexp < Test::Unit::TestCase def test_dup assert_equal(//, //.dup) - assert_raise(TypeError) { //.dup.instance_eval { initialize_copy(nil) } } + assert_raise(FrozenError) { //.dup.instance_eval { initialize_copy(/a/) } } end def test_regsub @@ -1011,6 +1011,18 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_regsub_no_memory_leak_many_captures + assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", rss: true) + code = proc do + "aaaaaaaaaaa".gsub(/(a)(b)?(c)?(d)?(e)?(f)?(g)?(h)?/, "") + end + + 1_000.times(&code) + begin; + 100_000.times(&code) + end; + end + def test_ignorecase v = assert_deprecated_warning(/variable \$= is no longer effective/) { $= } assert_equal(false, v) @@ -1036,10 +1048,12 @@ class TestRegexp < Test::Unit::TestCase [Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc| idx = key.encode(enc) pat = /#{idx}/ - test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + EnvUtil.with_default_internal(enc) do + test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + end end test.call {|m| assert_equal(/a/, m.regexp) } test.call {|m| assert_equal("abc", m.string) } @@ -1308,6 +1322,9 @@ class TestRegexp < Test::Unit::TestCase assert_match(/\A[[:space:]]+\z/, "\r\n\v\f\r\s\u0085") assert_match(/\A[[:ascii:]]+\z/, "\x00\x7F") assert_no_match(/[[:ascii:]]/, "\x80\xFF") + + assert_match(/[[:word:]]/, "\u{200C}") + assert_match(/[[:word:]]/, "\u{200D}") end def test_cclass_R @@ -1524,6 +1541,107 @@ class TestRegexp < Test::Unit::TestCase "CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D") end + def test_unicode_age_16_0 + @matches = %w"16.0" + @unmatches = %w"15.1" + + # https://www.unicode.org/Public/16.0.0/ucd/DerivedAge.txt + assert_unicode_age("\u{0897}", + "ARABIC PEPET") + assert_unicode_age("\u{1B4E}".."\u{1B4F}", + "BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN") + assert_unicode_age("\u{1B7F}", + "BALINESE PANTI BAWAK") + assert_unicode_age("\u{1C89}".."\u{1C8A}", + "CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE") + assert_unicode_age("\u{2427}".."\u{2429}", + "SYMBOL FOR DELETE SQUARE CHECKER BOARD FORM..SYMBOL FOR DELETE MEDIUM SHADE FORM") + assert_unicode_age("\u{31E4}".."\u{31E5}", + "CJK STROKE HXG..CJK STROKE SZP") + assert_unicode_age("\u{A7CB}".."\u{A7CD}", + "LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE") + assert_unicode_age("\u{A7DA}".."\u{A7DC}", + "LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE") + assert_unicode_age("\u{105C0}".."\u{105F3}", + "TODHRI LETTER A..TODHRI LETTER OO") + assert_unicode_age("\u{10D40}".."\u{10D65}", + "GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA") + assert_unicode_age("\u{10D69}".."\u{10D85}", + "GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA") + assert_unicode_age("\u{10D8E}".."\u{10D8F}", + "GARAY PLUS SIGN..GARAY MINUS SIGN") + assert_unicode_age("\u{10EC2}".."\u{10EC4}", + "ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW") + assert_unicode_age("\u{10EFC}", + "ARABIC COMBINING ALEF OVERLAY") + assert_unicode_age("\u{11380}".."\u{11389}", + "TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL") + assert_unicode_age("\u{1138B}", + "TULU-TIGALARI LETTER EE") + assert_unicode_age("\u{1138E}", + "TULU-TIGALARI LETTER AI") + assert_unicode_age("\u{11390}".."\u{113B5}", + "TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA") + assert_unicode_age("\u{113B7}".."\u{113C0}", + "TULU-TIGALARI SIGN AVAGRAHA..TULU-TIGALARI VOWEL SIGN VOCALIC LL") + assert_unicode_age("\u{113C2}", + "TULU-TIGALARI VOWEL SIGN EE") + assert_unicode_age("\u{113C5}", + "TULU-TIGALARI VOWEL SIGN AI") + assert_unicode_age("\u{113C7}".."\u{113CA}", + "TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA") + assert_unicode_age("\u{113CC}".."\u{113D5}", + "TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI DOUBLE DANDA") + assert_unicode_age("\u{113D7}".."\u{113D8}", + "TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA") + assert_unicode_age("\u{113E1}".."\u{113E2}", + "TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA") + assert_unicode_age("\u{116D0}".."\u{116E3}", + "MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE") + assert_unicode_age("\u{11BC0}".."\u{11BE1}", + "SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO") + assert_unicode_age("\u{11BF0}".."\u{11BF9}", + "SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE") + assert_unicode_age("\u{11F5A}", + "KAWI SIGN NUKTA") + assert_unicode_age("\u{13460}".."\u{143FA}", + "EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA") + assert_unicode_age("\u{16100}".."\u{16139}", + "GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE") + assert_unicode_age("\u{16D40}".."\u{16D79}", + "KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE") + assert_unicode_age("\u{18CFF}", + "KHITAN SMALL SCRIPT CHARACTER-18CFF") + assert_unicode_age("\u{1CC00}".."\u{1CCF9}", + "UP-POINTING GO-KART..OUTLINED DIGIT NINE") + assert_unicode_age("\u{1CD00}".."\u{1CEB3}", + "BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET") + assert_unicode_age("\u{1E5D0}".."\u{1E5FA}", + "OL ONAL LETTER O..OL ONAL DIGIT NINE") + assert_unicode_age("\u{1E5FF}", + "OL ONAL ABBREVIATION SIGN") + assert_unicode_age("\u{1F8B2}".."\u{1F8BB}", + "RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR") + assert_unicode_age("\u{1F8C0}".."\u{1F8C1}", + "LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW") + assert_unicode_age("\u{1FA89}", + "HARP") + assert_unicode_age("\u{1FA8F}", + "SHOVEL") + assert_unicode_age("\u{1FABE}", + "LEAFLESS TREE") + assert_unicode_age("\u{1FAC6}", + "FINGERPRINT") + assert_unicode_age("\u{1FADC}", + "ROOT VEGETABLE") + assert_unicode_age("\u{1FADF}", + "SPLATTER") + assert_unicode_age("\u{1FAE9}", + "FACE WITH BAGS UNDER EYES") + assert_unicode_age("\u{1FBCB}".."\u{1FBEF}", + "WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE") + end + UnicodeAgeRegexps = Hash.new do |h, age| h[age] = [/\A\p{age=#{age}}+\z/u, /\A\P{age=#{age}}+\z/u].freeze end @@ -1563,6 +1681,65 @@ class TestRegexp < Test::Unit::TestCase assert_equal("hoge fuga", h["body"]) end + def test_matchdata_large_capture_groups_stack + env = {"RUBY_THREAD_MACHINE_STACK_SIZE" => (256 * 1024).to_s} + assert_separately([env], <<~'RUBY') + n = 20000 + require "rbconfig/sizeof" + stack = RubyVM::DEFAULT_PARAMS[:thread_machine_stack_size] + size = RbConfig::SIZEOF["long"] + required = (n + 1) * 4 * size + if !stack || stack == 0 || stack >= required + omit "thread machine stack size not reduced (#{stack}:#{required})" + end + + inspect = Thread.new do + str = "\u{3042}" * n + m = Regexp.new("(.)" * n).match(str) + assert_not_nil(m) + assert_equal([n - 1, n], m.offset(n)) + m.inspect + end.value + + assert_include(inspect, "MatchData") + RUBY + end + + def test_match_integer_at + m = /(\d{4})(\d{2})(\d{2})/.match("20260308") + assert_equal(20260308, m.integer_at(0)) + assert_equal(2026, m.integer_at(1)) + assert_equal(3, m.integer_at(2)) + assert_equal(8, m.integer_at(3)) + assert_equal(nil, m.integer_at(4)) + assert_equal(8, m.integer_at(-1)) + assert_equal(3, m.integer_at(-2)) + assert_equal(2026, m.integer_at(-3)) + assert_equal(nil, m.integer_at(-4)) + + re = /[a-z]+|(\d+)/ + assert_equal(123, re.match("123").integer_at(1)) + assert_equal(nil, re.match("abc").integer_at(1)) + end + + def test_match_integer_at_name + m = /(?<y>\d{4})(?<m>\d{2})(?<d>\d{2})/.match("20260308") + assert_equal(2026, m.integer_at("y")) + assert_equal(3, m.integer_at("m")) + assert_equal(8, m.integer_at("d")) + end + + def test_match_integer_at_base + assert_equal(91, /\w+/.match("111").integer_at(0, 9)) + assert_equal(10_0000, /\w+/.match("10_0000").integer_at(0)) + assert_equal(0d1_0000, /\w+/.match("01_0000").integer_at(0)) + assert_equal(0o1_0000, /\w+/.match("01_0000").integer_at(0, 0)) + assert_equal(0b1_0000, /\w+/.match("0b1_0000").integer_at(0, 0)) + assert_equal(0o1_0000, /\w+/.match("0o1_0000").integer_at(0, 0)) + assert_equal(0d1_0000, /\w+/.match("0d1_0000").integer_at(0, 0)) + assert_equal(0x1_0000, /\w+/.match("0x1_0000").integer_at(0, 0)) + end + def test_regexp_popped EnvUtil.suppress_warning do assert_nothing_raised { eval("a = 1; /\#{ a }/; a") } @@ -1637,6 +1814,33 @@ class TestRegexp < Test::Unit::TestCase assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') } end + def test_quick_search + assert_match_at('(?i) *TOOKY', 'Mozilla/5.0 (Linux; Android 4.0.3; TOOKY', [[34, 40]]) # Issue #120 + end + + def test_ss_in_look_behind + assert_match_at("(?i:ss)", "ss", [[0, 2]]) + assert_match_at("(?i:ss)", "Ss", [[0, 2]]) + assert_match_at("(?i:ss)", "SS", [[0, 2]]) + assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S + assert_match_at("(?i:ss)", "s\u017f", [[0, 2]]) + assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S + assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S + assert_match_at("(?i:xssy)", "xssy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xSsy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xSSy", [[0, 4]]) + assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]]) + assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]]) + assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]]) + assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]]) + assert_match_at("(?i:\u00df)", "ss", [[0, 2]]) + assert_match_at("(?i:\u00df)", "SS", [[0, 2]]) + assert_match_at("(?i:[\u00df])", "ss", [[0, 2]]) + assert_match_at("(?i:[\u00df])", "SS", [[0, 2]]) + assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92 + assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]]) + end + def test_options_in_look_behind assert_nothing_raised { assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]]) @@ -1774,6 +1978,12 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_too_big_number_for_repeat_range + assert_raise_with_message(SyntaxError, /too big number for repeat range/) do + eval(%[/|{1000000}/]) + end + end + # This assertion is for porting x2() tests in testpy.py of Onigmo. def assert_match_at(re, str, positions, msg = nil) re = Regexp.new(re) unless re.is_a?(Regexp) @@ -1837,6 +2047,7 @@ class TestRegexp < Test::Unit::TestCase Regexp.timeout = 1e300 assert_equal(((1<<64)-1) / 1000000000.0, Regexp.timeout) + assert_raise(ArgumentError) { Regexp.timeout = Float::NAN } assert_raise(ArgumentError) { Regexp.timeout = 0 } assert_raise(ArgumentError) { Regexp.timeout = -1 } @@ -1878,7 +2089,7 @@ class TestRegexp < Test::Unit::TestCase end def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) - assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}", timeout: 60) global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect } expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect } @@ -1929,6 +2140,7 @@ class TestRegexp < Test::Unit::TestCase assert_equal(((1<<64)-1) / 1000000000.0, Regexp.new("foo", timeout: 1e300).timeout) + assert_raise(ArgumentError) { Regexp.new("foo", timeout: Float::NAN) } assert_raise(ArgumentError) { Regexp.new("foo", timeout: 0) } assert_raise(ArgumentError) { Regexp.new("foo", timeout: -1) } end; @@ -2152,4 +2364,16 @@ class TestRegexp < Test::Unit::TestCase assert_match(/[x#{e_acute_lower}]/i, "CAF#{e_acute_upper}", "should match e acute case insensitive") end end + + def test_too_many_range_repeat + source = '(?:foobar){0,100}' * 100000 + assert_raise(RegexpError) { Regexp.new(source) } + assert_raise(SyntaxError) { eval("/#{source}/") } + end + + def test_too_many_null_check + source = '(?:(?:foo)?|(?:bar)?)*' * 100000 + assert_raise(RegexpError) { Regexp.new(source) } + assert_raise(SyntaxError) { eval("/#{source}/") } + end end |
