summaryrefslogtreecommitdiff
path: root/test/ruby/enc
diff options
context:
space:
mode:
Diffstat (limited to 'test/ruby/enc')
-rw-r--r--test/ruby/enc/test_case_comprehensive.rb4
-rw-r--r--test/ruby/enc/test_case_mapping.rb10
-rw-r--r--test/ruby/enc/test_case_options.rb12
-rw-r--r--test/ruby/enc/test_cesu8.rb4
-rw-r--r--test/ruby/enc/test_emoji_breaks.rb208
-rw-r--r--test/ruby/enc/test_grapheme_breaks.rb117
-rw-r--r--test/ruby/enc/test_regex_casefold.rb4
7 files changed, 182 insertions, 177 deletions
diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb
index bde47017a2..de18ac865c 100644
--- a/test/ruby/enc/test_case_comprehensive.rb
+++ b/test/ruby/enc/test_case_comprehensive.rb
@@ -24,7 +24,7 @@ class TestComprehensiveCaseMapping < Test::Unit::TestCase
def test_data_files_available
unless TestComprehensiveCaseMapping.data_files_available?
- skip "Unicode data files not available in #{UNICODE_DATA_PATH}."
+ omit "Unicode data files not available in #{UNICODE_DATA_PATH}."
end
end
end
@@ -37,7 +37,7 @@ TestComprehensiveCaseMapping.data_files_available? and class TestComprehensiveC
end
def self.read_data_file(filename)
- IO.foreach(expand_filename(filename), encoding: Encoding::ASCII_8BIT) do |line|
+ File.foreach(expand_filename(filename), encoding: Encoding::ASCII_8BIT) do |line|
if $. == 1
if filename == 'UnicodeData'
elsif line.start_with?("# #{filename}-#{UNICODE_VERSION}.txt")
diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb
index 31acdc4331..a7d1ed0d16 100644
--- a/test/ruby/enc/test_case_mapping.rb
+++ b/test/ruby/enc/test_case_mapping.rb
@@ -47,7 +47,7 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
# different properties; careful: roundtrip isn't always guaranteed
def check_swapcase_properties(expected, start, *flags)
assert_equal expected, start.swapcase(*flags)
- temp = start
+ temp = +start
assert_equal expected, temp.swapcase!(*flags)
assert_equal start, start.swapcase(*flags).swapcase(*flags)
assert_equal expected, expected.swapcase(*flags).swapcase(*flags)
@@ -61,10 +61,10 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
end
def test_invalid
- assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".force_encoding('UTF-8').upcase }
- assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".force_encoding('UTF-8').downcase }
- assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".force_encoding('UTF-8').capitalize }
- assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".force_encoding('UTF-8').swapcase }
+ assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').upcase }
+ assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').downcase }
+ assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".dup.force_encoding('UTF-8').capitalize }
+ assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').swapcase }
end
def test_general
diff --git a/test/ruby/enc/test_case_options.rb b/test/ruby/enc/test_case_options.rb
index e9bf50fcfc..e9c81d804e 100644
--- a/test/ruby/enc/test_case_options.rb
+++ b/test/ruby/enc/test_case_options.rb
@@ -19,7 +19,7 @@ class TestCaseOptions < Test::Unit::TestCase
def assert_raise_both_types(*options)
assert_raise_functional_operations 'a', *options
- assert_raise_bang_operations 'a', *options
+ assert_raise_bang_operations(+'a', *options)
assert_raise_functional_operations :a, *options
end
@@ -51,7 +51,7 @@ class TestCaseOptions < Test::Unit::TestCase
def assert_okay_both_types(*options)
assert_okay_functional_operations 'a', *options
- assert_okay_bang_operations 'a', *options
+ assert_okay_bang_operations(+'a', *options)
assert_okay_functional_operations :a, *options
end
@@ -69,10 +69,10 @@ class TestCaseOptions < Test::Unit::TestCase
assert_raise(ArgumentError) { 'a'.upcase :fold }
assert_raise(ArgumentError) { 'a'.capitalize :fold }
assert_raise(ArgumentError) { 'a'.swapcase :fold }
- assert_nothing_raised { 'a'.downcase! :fold }
- assert_raise(ArgumentError) { 'a'.upcase! :fold }
- assert_raise(ArgumentError) { 'a'.capitalize! :fold }
- assert_raise(ArgumentError) { 'a'.swapcase! :fold }
+ assert_nothing_raised { 'a'.dup.downcase! :fold }
+ assert_raise(ArgumentError) { 'a'.dup.upcase! :fold }
+ assert_raise(ArgumentError) { 'a'.dup.capitalize! :fold }
+ assert_raise(ArgumentError) { 'a'.dup.swapcase! :fold }
assert_nothing_raised { :a.downcase :fold }
assert_raise(ArgumentError) { :a.upcase :fold }
assert_raise(ArgumentError) { :a.capitalize :fold }
diff --git a/test/ruby/enc/test_cesu8.rb b/test/ruby/enc/test_cesu8.rb
index d9debe76cd..68a08389ea 100644
--- a/test/ruby/enc/test_cesu8.rb
+++ b/test/ruby/enc/test_cesu8.rb
@@ -106,4 +106,8 @@ EOT
assert_equal chr, ord.chr("cesu-8")
end
end
+
+ def test_cesu8_left_adjust_char_head
+ assert_equal("", "\u{10000}".encode("cesu-8").chop)
+ end
end
diff --git a/test/ruby/enc/test_emoji_breaks.rb b/test/ruby/enc/test_emoji_breaks.rb
index cdde4da9bf..bb5114680e 100644
--- a/test/ruby/enc/test_emoji_breaks.rb
+++ b/test/ruby/enc/test_emoji_breaks.rb
@@ -4,50 +4,47 @@
require "test/unit"
class TestEmojiBreaks < Test::Unit::TestCase
-end
-
-class TestEmojiBreaks::BreakTest
- attr_reader :string, :comment, :filename, :line_number, :type, :shortname
-
- def initialize(filename, line_number, data, comment='')
- @filename = filename
- @line_number = line_number
- @comment = comment.gsub(/\s+/, ' ').strip
- if filename=='emoji-test' or filename=='emoji-variation-sequences'
- codes, @type = data.split(/\s*;\s*/)
- @shortname = ''
- else
- codes, @type, @shortname = data.split(/\s*;\s*/)
+ class BreakTest
+ attr_reader :string, :comment, :filename, :line_number, :type, :shortname
+
+ def initialize(filename, line_number, data, comment='')
+ @filename = filename
+ @line_number = line_number
+ @comment = comment.gsub(/\s+/, ' ').strip
+ if filename=='emoji-test' or filename=='emoji-variation-sequences'
+ codes, @type = data.split(/\s*;\s*/)
+ @shortname = ''
+ else
+ codes, @type, @shortname = data.split(/\s*;\s*/)
+ end
+ @type = @type.gsub(/\s+/, ' ').strip
+ @shortname = @shortname.gsub(/\s+/, ' ').strip
+ @string = codes.split(/\s+/)
+ .map do |ch|
+ c = ch.to_i(16)
+ # eliminate cases with surrogates
+ # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
+ c.chr('UTF-8')
+ end.join
end
- @type = @type.gsub(/\s+/, ' ').strip
- @shortname = @shortname.gsub(/\s+/, ' ').strip
- @string = codes.split(/\s+/)
- .map do |ch|
- c = ch.to_i(16)
- # eliminate cases with surrogates
- # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
- c.chr('UTF-8')
- end.join
end
-end
-class TestEmojiBreaks::BreakFile
- attr_reader :basename, :fullname, :version
- FILES = []
+ class BreakFile
+ attr_reader :basename, :fullname, :version
+ FILES = []
- def initialize(basename, path, version)
- @basename = basename
- @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
- @version = version
- FILES << self
- end
+ def initialize(basename, path, version)
+ @basename = basename
+ @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
+ @version = version
+ FILES << self
+ end
- def self.files
- FILES
+ def self.files
+ FILES
+ end
end
-end
-class TestEmojiBreaks < Test::Unit::TestCase
UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
UNICODE_DATA_PATH = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}/ucd/emoji", __dir__)
EMOJI_VERSION = RbConfig::CONFIG['UNICODE_EMOJI_VERSION']
@@ -56,7 +53,7 @@ class TestEmojiBreaks < Test::Unit::TestCase
EMOJI_DATA_FILES = %w[emoji-sequences emoji-test emoji-zwj-sequences].map do |basename|
BreakFile.new(basename, EMOJI_DATA_PATH, EMOJI_VERSION)
end
- UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION[0..-3]) # [0..-3] deals with a versioning mismatch problem in Unicode
+ UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION)
EMOJI_DATA_FILES << UNICODE_DATA_FILE
def self.data_files_available?
@@ -68,83 +65,90 @@ class TestEmojiBreaks < Test::Unit::TestCase
def test_data_files_available
assert_equal 4, EMOJI_DATA_FILES.size # debugging test
unless TestEmojiBreaks.data_files_available?
- skip "Emoji data files not available in #{EMOJI_DATA_PATH}."
+ omit "Emoji data files not available in #{EMOJI_DATA_PATH}."
end
end
-end
-TestEmojiBreaks.data_files_available? and class TestEmojiBreaks
- def read_data
- tests = []
- EMOJI_DATA_FILES.each do |file|
- version_mismatch = true
- file_tests = []
- IO.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
- line.chomp!
- raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt" if $.==1 and not line=="# #{file.basename}.txt"
- version_mismatch = false if line =~ /^# Version: #{file.version}/
- next if line.match?(/\A(#|\z)/)
- if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
- range_start = $1.to_i(16)
- range_end = $2.to_i(16)
- rest = $3
- (range_start..range_end).each do |code_point|
- file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
+ if data_files_available?
+ def read_data
+ tests = []
+ EMOJI_DATA_FILES.each do |file|
+ version_mismatch = true
+ file_tests = []
+ File.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
+ line.chomp!
+ if $.==1
+ if line=="# #{file.basename}-#{file.version}.txt"
+ version_mismatch = false
+ elsif line!="# #{file.basename}.txt"
+ raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt"
+ end
+ end
+ version_mismatch = false if line =~ /^# Version: #{file.version}/ # 13.0 and older
+ version_mismatch = false if line =~ /^# Used with Emoji Version #{EMOJI_VERSION}/ # 14.0 and newer
+ next if line.match?(/\A(#|\z)/)
+ if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
+ range_start = $1.to_i(16)
+ range_end = $2.to_i(16)
+ rest = $3
+ (range_start..range_end).each do |code_point|
+ file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
+ end
+ else
+ file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
end
- else
- file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
end
+ raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch
+ tests += file_tests
end
- raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch
- tests += file_tests
+ tests
end
- tests
- end
-
- def all_tests
- @@tests ||= read_data
- rescue Errno::ENOENT
- @@tests ||= []
- end
- def test_single_emoji
- all_tests.each do |test|
- expected = [test.string]
- actual = test.string.each_grapheme_cluster.to_a
- assert_equal expected, actual,
- "file: #{test.filename}, line #{test.line_number}, " +
- "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ def all_tests
+ @@tests ||= read_data
+ rescue Errno::ENOENT
+ @@tests ||= []
end
- end
- def test_embedded_emoji
- all_tests.each do |test|
- expected = ["\t", test.string, "\t"]
- actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
- assert_equal expected, actual,
- "file: #{test.filename}, line #{test.line_number}, " +
- "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ def test_single_emoji
+ all_tests.each do |test|
+ expected = [test.string]
+ actual = test.string.each_grapheme_cluster.to_a
+ assert_equal expected, actual,
+ "file: #{test.filename}, line #{test.line_number}, " +
+ "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ end
end
- end
- # test some pseodorandom combinations of emoji
- def test_mixed_emoji
- srand 0
- length = all_tests.length
- step = 503 # use a prime number
- all_tests.each do |test1|
- start = rand step
- start.step(by: step, to: length-1) do |t2|
- test2 = all_tests[t2]
- # exclude skin tones, because they glue to previous grapheme clusters
- next if (0x1F3FB..0x1F3FF).include? test2.string.ord
- expected = [test1.string, test2.string]
- actual = (test1.string+test2.string).each_grapheme_cluster.to_a
+ def test_embedded_emoji
+ all_tests.each do |test|
+ expected = ["\t", test.string, "\t"]
+ actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
assert_equal expected, actual,
- "file1: #{test1.filename}, line1 #{test1.line_number}, " +
- "file2: #{test2.filename}, line2 #{test2.line_number},\n" +
- "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
- "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
+ "file: #{test.filename}, line #{test.line_number}, " +
+ "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ end
+ end
+
+ # test some pseodorandom combinations of emoji
+ def test_mixed_emoji
+ srand 0
+ length = all_tests.length
+ step = 503 # use a prime number
+ all_tests.each do |test1|
+ start = rand step
+ start.step(by: step, to: length-1) do |t2|
+ test2 = all_tests[t2]
+ # exclude skin tones, because they glue to previous grapheme clusters
+ next if (0x1F3FB..0x1F3FF).include? test2.string.ord
+ expected = [test1.string, test2.string]
+ actual = (test1.string+test2.string).each_grapheme_cluster.to_a
+ assert_equal expected, actual,
+ "file1: #{test1.filename}, line1 #{test1.line_number}, " +
+ "file2: #{test2.filename}, line2 #{test2.line_number},\n" +
+ "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
+ "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
+ end
end
end
end
diff --git a/test/ruby/enc/test_grapheme_breaks.rb b/test/ruby/enc/test_grapheme_breaks.rb
index 2d210946a9..7e6d722d40 100644
--- a/test/ruby/enc/test_grapheme_breaks.rb
+++ b/test/ruby/enc/test_grapheme_breaks.rb
@@ -4,31 +4,28 @@
require "test/unit"
class TestGraphemeBreaksFromFile < Test::Unit::TestCase
-end
-
-class TestGraphemeBreaksFromFile::BreakTest
- attr_reader :clusters, :string, :comment, :line_number
+ class BreakTest
+ attr_reader :clusters, :string, :comment, :line_number
- def initialize(line_number, data, comment)
- @line_number = line_number
- @comment = comment
- @clusters = data.sub(/\A\s*÷\s*/, '')
- .sub(/\s*÷\s*\z/, '')
- .split(/\s*÷\s*/)
- .map do |cl|
- cl.split(/\s*×\s*/)
- .map do |ch|
- c = ch.to_i(16)
- # eliminate cases with surrogates
- raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
- c.chr('UTF-8')
- end.join
- end
- @string = @clusters.join
+ def initialize(line_number, data, comment)
+ @line_number = line_number
+ @comment = comment
+ @clusters = data.sub(/\A\s*÷\s*/, '')
+ .sub(/\s*÷\s*\z/, '')
+ .split(/\s*÷\s*/)
+ .map do |cl|
+ cl.split(/\s*×\s*/)
+ .map do |ch|
+ c = ch.to_i(16)
+ # eliminate cases with surrogates
+ raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
+ c.chr('UTF-8')
+ end.join
+ end
+ @string = @clusters.join
+ end
end
-end
-class TestGraphemeBreaksFromFile < Test::Unit::TestCase
UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
path = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}", __dir__)
UNICODE_DATA_PATH = File.directory?("#{path}/ucd/auxiliary") ? "#{path}/ucd/auxiliary" : path
@@ -40,56 +37,56 @@ class TestGraphemeBreaksFromFile < Test::Unit::TestCase
def test_data_files_available
unless TestGraphemeBreaksFromFile.file_available?
- skip "Unicode data file GraphemeBreakTest not available in #{UNICODE_DATA_PATH}."
+ omit "Unicode data file GraphemeBreakTest not available in #{UNICODE_DATA_PATH}."
end
end
-end
-TestGraphemeBreaksFromFile.file_available? and class TestGraphemeBreaksFromFile
- def read_data
- tests = []
- IO.foreach(GRAPHEME_BREAK_TEST_FILE, encoding: Encoding::UTF_8) do |line|
- if $. == 1 and not line.start_with?("# GraphemeBreakTest-#{UNICODE_VERSION}.txt")
- raise "File Version Mismatch"
+ if file_available?
+ def read_data
+ tests = []
+ File.foreach(GRAPHEME_BREAK_TEST_FILE, encoding: Encoding::UTF_8) do |line|
+ if $. == 1 and not line.start_with?("# GraphemeBreakTest-#{UNICODE_VERSION}.txt")
+ raise "File Version Mismatch"
+ end
+ next if /\A#/.match? line
+ tests << BreakTest.new($., *line.chomp.split('#')) rescue 'whatever'
end
- next if /\A#/.match? line
- tests << BreakTest.new($., *line.chomp.split('#')) rescue 'whatever'
+ tests
end
- tests
- end
- def all_tests
- @@tests ||= read_data
- rescue Errno::ENOENT
- @@tests ||= []
- end
+ def all_tests
+ @@tests ||= read_data
+ rescue Errno::ENOENT
+ @@tests ||= []
+ end
- def test_each_grapheme_cluster
- all_tests.each do |test|
- expected = test.clusters
- actual = test.string.each_grapheme_cluster.to_a
- assert_equal expected, actual,
- "line #{test.line_number}, expected '#{expected}', " +
- "but got '#{actual}', comment: #{test.comment}"
+ def test_each_grapheme_cluster
+ all_tests.each do |test|
+ expected = test.clusters
+ actual = test.string.each_grapheme_cluster.to_a
+ assert_equal expected, actual,
+ "line #{test.line_number}, expected '#{expected}', " +
+ "but got '#{actual}', comment: #{test.comment}"
+ end
end
- end
- def test_backslash_X
- all_tests.each do |test|
- clusters = test.clusters.dup
- string = test.string.dup
- removals = 0
- while string.sub!(/\A\X/, '')
- removals += 1
- clusters.shift
- expected = clusters.join
+ def test_backslash_X
+ all_tests.each do |test|
+ clusters = test.clusters.dup
+ string = test.string.dup
+ removals = 0
+ while string.sub!(/\A\X/, '')
+ removals += 1
+ clusters.shift
+ expected = clusters.join
+ assert_equal expected, string,
+ "line #{test.line_number}, removals: #{removals}, expected '#{expected}', " +
+ "but got '#{string}', comment: #{test.comment}"
+ end
assert_equal expected, string,
- "line #{test.line_number}, removals: #{removals}, expected '#{expected}', " +
+ "line #{test.line_number}, after last removal, expected '#{expected}', " +
"but got '#{string}', comment: #{test.comment}"
end
- assert_equal expected, string,
- "line #{test.line_number}, after last removal, expected '#{expected}', " +
- "but got '#{string}', comment: #{test.comment}"
end
end
end
diff --git a/test/ruby/enc/test_regex_casefold.rb b/test/ruby/enc/test_regex_casefold.rb
index ec5dc7f220..b5d5c6e337 100644
--- a/test/ruby/enc/test_regex_casefold.rb
+++ b/test/ruby/enc/test_regex_casefold.rb
@@ -19,7 +19,7 @@ class TestCaseFold < Test::Unit::TestCase
end
def read_tests
- IO.readlines("#{UNICODE_DATA_PATH}/CaseFolding.txt", encoding: Encoding::ASCII_8BIT)
+ File.readlines("#{UNICODE_DATA_PATH}/CaseFolding.txt", encoding: Encoding::ASCII_8BIT)
.collect.with_index { |linedata, linenumber| [linenumber.to_i+1, linedata.chomp] }
.reject { |number, data| data =~ /^(#|$)/ }
.collect do |linenumber, linedata|
@@ -39,7 +39,7 @@ class TestCaseFold < Test::Unit::TestCase
@@tests ||= read_tests
rescue Errno::ENOENT => e
@@tests ||= []
- skip e.message
+ omit e.message
end
def self.generate_test_casefold(encoding)