diff options
Diffstat (limited to 'test/ruby/enc')
-rw-r--r-- | test/ruby/enc/test_case_comprehensive.rb | 2 | ||||
-rw-r--r-- | test/ruby/enc/test_case_mapping.rb | 10 | ||||
-rw-r--r-- | test/ruby/enc/test_case_options.rb | 12 | ||||
-rw-r--r-- | test/ruby/enc/test_cesu8.rb | 4 | ||||
-rw-r--r-- | test/ruby/enc/test_emoji_breaks.rb | 206 | ||||
-rw-r--r-- | test/ruby/enc/test_grapheme_breaks.rb | 115 | ||||
-rw-r--r-- | test/ruby/enc/test_regex_casefold.rb | 2 |
7 files changed, 178 insertions, 173 deletions
diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb index bc57d57ee4..de18ac865c 100644 --- a/test/ruby/enc/test_case_comprehensive.rb +++ b/test/ruby/enc/test_case_comprehensive.rb @@ -37,7 +37,7 @@ TestComprehensiveCaseMapping.data_files_available? and class TestComprehensiveC end def self.read_data_file(filename) - IO.foreach(expand_filename(filename), encoding: Encoding::ASCII_8BIT) do |line| + File.foreach(expand_filename(filename), encoding: Encoding::ASCII_8BIT) do |line| if $. == 1 if filename == 'UnicodeData' elsif line.start_with?("# #{filename}-#{UNICODE_VERSION}.txt") diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index 31acdc4331..a7d1ed0d16 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -47,7 +47,7 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase # different properties; careful: roundtrip isn't always guaranteed def check_swapcase_properties(expected, start, *flags) assert_equal expected, start.swapcase(*flags) - temp = start + temp = +start assert_equal expected, temp.swapcase!(*flags) assert_equal start, start.swapcase(*flags).swapcase(*flags) assert_equal expected, expected.swapcase(*flags).swapcase(*flags) @@ -61,10 +61,10 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase end def test_invalid - assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".force_encoding('UTF-8').upcase } - assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".force_encoding('UTF-8').downcase } - assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".force_encoding('UTF-8').capitalize } - assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".force_encoding('UTF-8').swapcase } + assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').upcase } + assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').downcase } + assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".dup.force_encoding('UTF-8').capitalize } + assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').swapcase } end def test_general diff --git a/test/ruby/enc/test_case_options.rb b/test/ruby/enc/test_case_options.rb index e9bf50fcfc..e9c81d804e 100644 --- a/test/ruby/enc/test_case_options.rb +++ b/test/ruby/enc/test_case_options.rb @@ -19,7 +19,7 @@ class TestCaseOptions < Test::Unit::TestCase def assert_raise_both_types(*options) assert_raise_functional_operations 'a', *options - assert_raise_bang_operations 'a', *options + assert_raise_bang_operations(+'a', *options) assert_raise_functional_operations :a, *options end @@ -51,7 +51,7 @@ class TestCaseOptions < Test::Unit::TestCase def assert_okay_both_types(*options) assert_okay_functional_operations 'a', *options - assert_okay_bang_operations 'a', *options + assert_okay_bang_operations(+'a', *options) assert_okay_functional_operations :a, *options end @@ -69,10 +69,10 @@ class TestCaseOptions < Test::Unit::TestCase assert_raise(ArgumentError) { 'a'.upcase :fold } assert_raise(ArgumentError) { 'a'.capitalize :fold } assert_raise(ArgumentError) { 'a'.swapcase :fold } - assert_nothing_raised { 'a'.downcase! :fold } - assert_raise(ArgumentError) { 'a'.upcase! :fold } - assert_raise(ArgumentError) { 'a'.capitalize! :fold } - assert_raise(ArgumentError) { 'a'.swapcase! :fold } + assert_nothing_raised { 'a'.dup.downcase! :fold } + assert_raise(ArgumentError) { 'a'.dup.upcase! :fold } + assert_raise(ArgumentError) { 'a'.dup.capitalize! :fold } + assert_raise(ArgumentError) { 'a'.dup.swapcase! :fold } assert_nothing_raised { :a.downcase :fold } assert_raise(ArgumentError) { :a.upcase :fold } assert_raise(ArgumentError) { :a.capitalize :fold } diff --git a/test/ruby/enc/test_cesu8.rb b/test/ruby/enc/test_cesu8.rb index d9debe76cd..68a08389ea 100644 --- a/test/ruby/enc/test_cesu8.rb +++ b/test/ruby/enc/test_cesu8.rb @@ -106,4 +106,8 @@ EOT assert_equal chr, ord.chr("cesu-8") end end + + def test_cesu8_left_adjust_char_head + assert_equal("", "\u{10000}".encode("cesu-8").chop) + end end diff --git a/test/ruby/enc/test_emoji_breaks.rb b/test/ruby/enc/test_emoji_breaks.rb index 0aad14e75b..bb5114680e 100644 --- a/test/ruby/enc/test_emoji_breaks.rb +++ b/test/ruby/enc/test_emoji_breaks.rb @@ -4,50 +4,47 @@ require "test/unit" class TestEmojiBreaks < Test::Unit::TestCase -end - -class TestEmojiBreaks::BreakTest - attr_reader :string, :comment, :filename, :line_number, :type, :shortname - - def initialize(filename, line_number, data, comment='') - @filename = filename - @line_number = line_number - @comment = comment.gsub(/\s+/, ' ').strip - if filename=='emoji-test' or filename=='emoji-variation-sequences' - codes, @type = data.split(/\s*;\s*/) - @shortname = '' - else - codes, @type, @shortname = data.split(/\s*;\s*/) + class BreakTest + attr_reader :string, :comment, :filename, :line_number, :type, :shortname + + def initialize(filename, line_number, data, comment='') + @filename = filename + @line_number = line_number + @comment = comment.gsub(/\s+/, ' ').strip + if filename=='emoji-test' or filename=='emoji-variation-sequences' + codes, @type = data.split(/\s*;\s*/) + @shortname = '' + else + codes, @type, @shortname = data.split(/\s*;\s*/) + end + @type = @type.gsub(/\s+/, ' ').strip + @shortname = @shortname.gsub(/\s+/, ' ').strip + @string = codes.split(/\s+/) + .map do |ch| + c = ch.to_i(16) + # eliminate cases with surrogates + # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF + c.chr('UTF-8') + end.join end - @type = @type.gsub(/\s+/, ' ').strip - @shortname = @shortname.gsub(/\s+/, ' ').strip - @string = codes.split(/\s+/) - .map do |ch| - c = ch.to_i(16) - # eliminate cases with surrogates - # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF - c.chr('UTF-8') - end.join end -end -class TestEmojiBreaks::BreakFile - attr_reader :basename, :fullname, :version - FILES = [] + class BreakFile + attr_reader :basename, :fullname, :version + FILES = [] - def initialize(basename, path, version) - @basename = basename - @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__) - @version = version - FILES << self - end + def initialize(basename, path, version) + @basename = basename + @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__) + @version = version + FILES << self + end - def self.files - FILES + def self.files + FILES + end end -end -class TestEmojiBreaks < Test::Unit::TestCase UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION'] UNICODE_DATA_PATH = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}/ucd/emoji", __dir__) EMOJI_VERSION = RbConfig::CONFIG['UNICODE_EMOJI_VERSION'] @@ -56,7 +53,7 @@ class TestEmojiBreaks < Test::Unit::TestCase EMOJI_DATA_FILES = %w[emoji-sequences emoji-test emoji-zwj-sequences].map do |basename| BreakFile.new(basename, EMOJI_DATA_PATH, EMOJI_VERSION) end - UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION[0..-3]) # [0..-3] deals with a versioning mismatch problem in Unicode + UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION) EMOJI_DATA_FILES << UNICODE_DATA_FILE def self.data_files_available? @@ -71,80 +68,87 @@ class TestEmojiBreaks < Test::Unit::TestCase omit "Emoji data files not available in #{EMOJI_DATA_PATH}." end end -end -TestEmojiBreaks.data_files_available? and class TestEmojiBreaks - def read_data - tests = [] - EMOJI_DATA_FILES.each do |file| - version_mismatch = true - file_tests = [] - IO.foreach(file.fullname, encoding: Encoding::UTF_8) do |line| - line.chomp! - raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt" if $.==1 and not line=="# #{file.basename}.txt" - version_mismatch = false if line =~ /^# Version: #{file.version}/ - next if line.match?(/\A(#|\z)/) - if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028) - range_start = $1.to_i(16) - range_end = $2.to_i(16) - rest = $3 - (range_start..range_end).each do |code_point| - file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2)) + if data_files_available? + def read_data + tests = [] + EMOJI_DATA_FILES.each do |file| + version_mismatch = true + file_tests = [] + File.foreach(file.fullname, encoding: Encoding::UTF_8) do |line| + line.chomp! + if $.==1 + if line=="# #{file.basename}-#{file.version}.txt" + version_mismatch = false + elsif line!="# #{file.basename}.txt" + raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt" + end + end + version_mismatch = false if line =~ /^# Version: #{file.version}/ # 13.0 and older + version_mismatch = false if line =~ /^# Used with Emoji Version #{EMOJI_VERSION}/ # 14.0 and newer + next if line.match?(/\A(#|\z)/) + if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028) + range_start = $1.to_i(16) + range_end = $2.to_i(16) + rest = $3 + (range_start..range_end).each do |code_point| + file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2)) + end + else + file_tests << BreakTest.new(file.basename, $., *line.split('#', 2)) end - else - file_tests << BreakTest.new(file.basename, $., *line.split('#', 2)) end + raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch + tests += file_tests end - raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch - tests += file_tests + tests end - tests - end - - def all_tests - @@tests ||= read_data - rescue Errno::ENOENT - @@tests ||= [] - end - def test_single_emoji - all_tests.each do |test| - expected = [test.string] - actual = test.string.each_grapheme_cluster.to_a - assert_equal expected, actual, - "file: #{test.filename}, line #{test.line_number}, " + - "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" + def all_tests + @@tests ||= read_data + rescue Errno::ENOENT + @@tests ||= [] end - end - def test_embedded_emoji - all_tests.each do |test| - expected = ["\t", test.string, "\t"] - actual = "\t#{test.string}\t".each_grapheme_cluster.to_a - assert_equal expected, actual, - "file: #{test.filename}, line #{test.line_number}, " + - "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" + def test_single_emoji + all_tests.each do |test| + expected = [test.string] + actual = test.string.each_grapheme_cluster.to_a + assert_equal expected, actual, + "file: #{test.filename}, line #{test.line_number}, " + + "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" + end end - end - # test some pseodorandom combinations of emoji - def test_mixed_emoji - srand 0 - length = all_tests.length - step = 503 # use a prime number - all_tests.each do |test1| - start = rand step - start.step(by: step, to: length-1) do |t2| - test2 = all_tests[t2] - # exclude skin tones, because they glue to previous grapheme clusters - next if (0x1F3FB..0x1F3FF).include? test2.string.ord - expected = [test1.string, test2.string] - actual = (test1.string+test2.string).each_grapheme_cluster.to_a + def test_embedded_emoji + all_tests.each do |test| + expected = ["\t", test.string, "\t"] + actual = "\t#{test.string}\t".each_grapheme_cluster.to_a assert_equal expected, actual, - "file1: #{test1.filename}, line1 #{test1.line_number}, " + - "file2: #{test2.filename}, line2 #{test2.line_number},\n" + - "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" + - "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}" + "file: #{test.filename}, line #{test.line_number}, " + + "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" + end + end + + # test some pseodorandom combinations of emoji + def test_mixed_emoji + srand 0 + length = all_tests.length + step = 503 # use a prime number + all_tests.each do |test1| + start = rand step + start.step(by: step, to: length-1) do |t2| + test2 = all_tests[t2] + # exclude skin tones, because they glue to previous grapheme clusters + next if (0x1F3FB..0x1F3FF).include? test2.string.ord + expected = [test1.string, test2.string] + actual = (test1.string+test2.string).each_grapheme_cluster.to_a + assert_equal expected, actual, + "file1: #{test1.filename}, line1 #{test1.line_number}, " + + "file2: #{test2.filename}, line2 #{test2.line_number},\n" + + "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" + + "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}" + end end end end diff --git a/test/ruby/enc/test_grapheme_breaks.rb b/test/ruby/enc/test_grapheme_breaks.rb index 2a31e078cf..7e6d722d40 100644 --- a/test/ruby/enc/test_grapheme_breaks.rb +++ b/test/ruby/enc/test_grapheme_breaks.rb @@ -4,31 +4,28 @@ require "test/unit" class TestGraphemeBreaksFromFile < Test::Unit::TestCase -end - -class TestGraphemeBreaksFromFile::BreakTest - attr_reader :clusters, :string, :comment, :line_number + class BreakTest + attr_reader :clusters, :string, :comment, :line_number - def initialize(line_number, data, comment) - @line_number = line_number - @comment = comment - @clusters = data.sub(/\A\s*÷\s*/, '') - .sub(/\s*÷\s*\z/, '') - .split(/\s*÷\s*/) - .map do |cl| - cl.split(/\s*×\s*/) - .map do |ch| - c = ch.to_i(16) - # eliminate cases with surrogates - raise ArgumentError if 0xD800 <= c and c <= 0xDFFF - c.chr('UTF-8') - end.join - end - @string = @clusters.join + def initialize(line_number, data, comment) + @line_number = line_number + @comment = comment + @clusters = data.sub(/\A\s*÷\s*/, '') + .sub(/\s*÷\s*\z/, '') + .split(/\s*÷\s*/) + .map do |cl| + cl.split(/\s*×\s*/) + .map do |ch| + c = ch.to_i(16) + # eliminate cases with surrogates + raise ArgumentError if 0xD800 <= c and c <= 0xDFFF + c.chr('UTF-8') + end.join + end + @string = @clusters.join + end end -end -class TestGraphemeBreaksFromFile < Test::Unit::TestCase UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION'] path = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}", __dir__) UNICODE_DATA_PATH = File.directory?("#{path}/ucd/auxiliary") ? "#{path}/ucd/auxiliary" : path @@ -43,53 +40,53 @@ class TestGraphemeBreaksFromFile < Test::Unit::TestCase omit "Unicode data file GraphemeBreakTest not available in #{UNICODE_DATA_PATH}." end end -end -TestGraphemeBreaksFromFile.file_available? and class TestGraphemeBreaksFromFile - def read_data - tests = [] - IO.foreach(GRAPHEME_BREAK_TEST_FILE, encoding: Encoding::UTF_8) do |line| - if $. == 1 and not line.start_with?("# GraphemeBreakTest-#{UNICODE_VERSION}.txt") - raise "File Version Mismatch" + if file_available? + def read_data + tests = [] + File.foreach(GRAPHEME_BREAK_TEST_FILE, encoding: Encoding::UTF_8) do |line| + if $. == 1 and not line.start_with?("# GraphemeBreakTest-#{UNICODE_VERSION}.txt") + raise "File Version Mismatch" + end + next if /\A#/.match? line + tests << BreakTest.new($., *line.chomp.split('#')) rescue 'whatever' end - next if /\A#/.match? line - tests << BreakTest.new($., *line.chomp.split('#')) rescue 'whatever' + tests end - tests - end - def all_tests - @@tests ||= read_data - rescue Errno::ENOENT - @@tests ||= [] - end + def all_tests + @@tests ||= read_data + rescue Errno::ENOENT + @@tests ||= [] + end - def test_each_grapheme_cluster - all_tests.each do |test| - expected = test.clusters - actual = test.string.each_grapheme_cluster.to_a - assert_equal expected, actual, - "line #{test.line_number}, expected '#{expected}', " + - "but got '#{actual}', comment: #{test.comment}" + def test_each_grapheme_cluster + all_tests.each do |test| + expected = test.clusters + actual = test.string.each_grapheme_cluster.to_a + assert_equal expected, actual, + "line #{test.line_number}, expected '#{expected}', " + + "but got '#{actual}', comment: #{test.comment}" + end end - end - def test_backslash_X - all_tests.each do |test| - clusters = test.clusters.dup - string = test.string.dup - removals = 0 - while string.sub!(/\A\X/, '') - removals += 1 - clusters.shift - expected = clusters.join + def test_backslash_X + all_tests.each do |test| + clusters = test.clusters.dup + string = test.string.dup + removals = 0 + while string.sub!(/\A\X/, '') + removals += 1 + clusters.shift + expected = clusters.join + assert_equal expected, string, + "line #{test.line_number}, removals: #{removals}, expected '#{expected}', " + + "but got '#{string}', comment: #{test.comment}" + end assert_equal expected, string, - "line #{test.line_number}, removals: #{removals}, expected '#{expected}', " + + "line #{test.line_number}, after last removal, expected '#{expected}', " + "but got '#{string}', comment: #{test.comment}" end - assert_equal expected, string, - "line #{test.line_number}, after last removal, expected '#{expected}', " + - "but got '#{string}', comment: #{test.comment}" end end end diff --git a/test/ruby/enc/test_regex_casefold.rb b/test/ruby/enc/test_regex_casefold.rb index eaabbc58a2..b5d5c6e337 100644 --- a/test/ruby/enc/test_regex_casefold.rb +++ b/test/ruby/enc/test_regex_casefold.rb @@ -19,7 +19,7 @@ class TestCaseFold < Test::Unit::TestCase end def read_tests - IO.readlines("#{UNICODE_DATA_PATH}/CaseFolding.txt", encoding: Encoding::ASCII_8BIT) + File.readlines("#{UNICODE_DATA_PATH}/CaseFolding.txt", encoding: Encoding::ASCII_8BIT) .collect.with_index { |linedata, linenumber| [linenumber.to_i+1, linedata.chomp] } .reject { |number, data| data =~ /^(#|$)/ } .collect do |linenumber, linedata| |