summaryrefslogtreecommitdiff
path: root/test/ruby/enc/test_emoji_breaks.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/ruby/enc/test_emoji_breaks.rb')
-rw-r--r--test/ruby/enc/test_emoji_breaks.rb208
1 files changed, 106 insertions, 102 deletions
diff --git a/test/ruby/enc/test_emoji_breaks.rb b/test/ruby/enc/test_emoji_breaks.rb
index cdde4da9bf..bb5114680e 100644
--- a/test/ruby/enc/test_emoji_breaks.rb
+++ b/test/ruby/enc/test_emoji_breaks.rb
@@ -4,50 +4,47 @@
require "test/unit"
class TestEmojiBreaks < Test::Unit::TestCase
-end
-
-class TestEmojiBreaks::BreakTest
- attr_reader :string, :comment, :filename, :line_number, :type, :shortname
-
- def initialize(filename, line_number, data, comment='')
- @filename = filename
- @line_number = line_number
- @comment = comment.gsub(/\s+/, ' ').strip
- if filename=='emoji-test' or filename=='emoji-variation-sequences'
- codes, @type = data.split(/\s*;\s*/)
- @shortname = ''
- else
- codes, @type, @shortname = data.split(/\s*;\s*/)
+ class BreakTest
+ attr_reader :string, :comment, :filename, :line_number, :type, :shortname
+
+ def initialize(filename, line_number, data, comment='')
+ @filename = filename
+ @line_number = line_number
+ @comment = comment.gsub(/\s+/, ' ').strip
+ if filename=='emoji-test' or filename=='emoji-variation-sequences'
+ codes, @type = data.split(/\s*;\s*/)
+ @shortname = ''
+ else
+ codes, @type, @shortname = data.split(/\s*;\s*/)
+ end
+ @type = @type.gsub(/\s+/, ' ').strip
+ @shortname = @shortname.gsub(/\s+/, ' ').strip
+ @string = codes.split(/\s+/)
+ .map do |ch|
+ c = ch.to_i(16)
+ # eliminate cases with surrogates
+ # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
+ c.chr('UTF-8')
+ end.join
end
- @type = @type.gsub(/\s+/, ' ').strip
- @shortname = @shortname.gsub(/\s+/, ' ').strip
- @string = codes.split(/\s+/)
- .map do |ch|
- c = ch.to_i(16)
- # eliminate cases with surrogates
- # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
- c.chr('UTF-8')
- end.join
end
-end
-class TestEmojiBreaks::BreakFile
- attr_reader :basename, :fullname, :version
- FILES = []
+ class BreakFile
+ attr_reader :basename, :fullname, :version
+ FILES = []
- def initialize(basename, path, version)
- @basename = basename
- @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
- @version = version
- FILES << self
- end
+ def initialize(basename, path, version)
+ @basename = basename
+ @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
+ @version = version
+ FILES << self
+ end
- def self.files
- FILES
+ def self.files
+ FILES
+ end
end
-end
-class TestEmojiBreaks < Test::Unit::TestCase
UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
UNICODE_DATA_PATH = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}/ucd/emoji", __dir__)
EMOJI_VERSION = RbConfig::CONFIG['UNICODE_EMOJI_VERSION']
@@ -56,7 +53,7 @@ class TestEmojiBreaks < Test::Unit::TestCase
EMOJI_DATA_FILES = %w[emoji-sequences emoji-test emoji-zwj-sequences].map do |basename|
BreakFile.new(basename, EMOJI_DATA_PATH, EMOJI_VERSION)
end
- UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION[0..-3]) # [0..-3] deals with a versioning mismatch problem in Unicode
+ UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION)
EMOJI_DATA_FILES << UNICODE_DATA_FILE
def self.data_files_available?
@@ -68,83 +65,90 @@ class TestEmojiBreaks < Test::Unit::TestCase
def test_data_files_available
assert_equal 4, EMOJI_DATA_FILES.size # debugging test
unless TestEmojiBreaks.data_files_available?
- skip "Emoji data files not available in #{EMOJI_DATA_PATH}."
+ omit "Emoji data files not available in #{EMOJI_DATA_PATH}."
end
end
-end
-TestEmojiBreaks.data_files_available? and class TestEmojiBreaks
- def read_data
- tests = []
- EMOJI_DATA_FILES.each do |file|
- version_mismatch = true
- file_tests = []
- IO.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
- line.chomp!
- raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt" if $.==1 and not line=="# #{file.basename}.txt"
- version_mismatch = false if line =~ /^# Version: #{file.version}/
- next if line.match?(/\A(#|\z)/)
- if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
- range_start = $1.to_i(16)
- range_end = $2.to_i(16)
- rest = $3
- (range_start..range_end).each do |code_point|
- file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
+ if data_files_available?
+ def read_data
+ tests = []
+ EMOJI_DATA_FILES.each do |file|
+ version_mismatch = true
+ file_tests = []
+ File.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
+ line.chomp!
+ if $.==1
+ if line=="# #{file.basename}-#{file.version}.txt"
+ version_mismatch = false
+ elsif line!="# #{file.basename}.txt"
+ raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt"
+ end
+ end
+ version_mismatch = false if line =~ /^# Version: #{file.version}/ # 13.0 and older
+ version_mismatch = false if line =~ /^# Used with Emoji Version #{EMOJI_VERSION}/ # 14.0 and newer
+ next if line.match?(/\A(#|\z)/)
+ if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
+ range_start = $1.to_i(16)
+ range_end = $2.to_i(16)
+ rest = $3
+ (range_start..range_end).each do |code_point|
+ file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
+ end
+ else
+ file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
end
- else
- file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
end
+ raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch
+ tests += file_tests
end
- raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch
- tests += file_tests
+ tests
end
- tests
- end
-
- def all_tests
- @@tests ||= read_data
- rescue Errno::ENOENT
- @@tests ||= []
- end
- def test_single_emoji
- all_tests.each do |test|
- expected = [test.string]
- actual = test.string.each_grapheme_cluster.to_a
- assert_equal expected, actual,
- "file: #{test.filename}, line #{test.line_number}, " +
- "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ def all_tests
+ @@tests ||= read_data
+ rescue Errno::ENOENT
+ @@tests ||= []
end
- end
- def test_embedded_emoji
- all_tests.each do |test|
- expected = ["\t", test.string, "\t"]
- actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
- assert_equal expected, actual,
- "file: #{test.filename}, line #{test.line_number}, " +
- "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ def test_single_emoji
+ all_tests.each do |test|
+ expected = [test.string]
+ actual = test.string.each_grapheme_cluster.to_a
+ assert_equal expected, actual,
+ "file: #{test.filename}, line #{test.line_number}, " +
+ "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ end
end
- end
- # test some pseodorandom combinations of emoji
- def test_mixed_emoji
- srand 0
- length = all_tests.length
- step = 503 # use a prime number
- all_tests.each do |test1|
- start = rand step
- start.step(by: step, to: length-1) do |t2|
- test2 = all_tests[t2]
- # exclude skin tones, because they glue to previous grapheme clusters
- next if (0x1F3FB..0x1F3FF).include? test2.string.ord
- expected = [test1.string, test2.string]
- actual = (test1.string+test2.string).each_grapheme_cluster.to_a
+ def test_embedded_emoji
+ all_tests.each do |test|
+ expected = ["\t", test.string, "\t"]
+ actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
assert_equal expected, actual,
- "file1: #{test1.filename}, line1 #{test1.line_number}, " +
- "file2: #{test2.filename}, line2 #{test2.line_number},\n" +
- "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
- "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
+ "file: #{test.filename}, line #{test.line_number}, " +
+ "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+ end
+ end
+
+ # test some pseodorandom combinations of emoji
+ def test_mixed_emoji
+ srand 0
+ length = all_tests.length
+ step = 503 # use a prime number
+ all_tests.each do |test1|
+ start = rand step
+ start.step(by: step, to: length-1) do |t2|
+ test2 = all_tests[t2]
+ # exclude skin tones, because they glue to previous grapheme clusters
+ next if (0x1F3FB..0x1F3FF).include? test2.string.ord
+ expected = [test1.string, test2.string]
+ actual = (test1.string+test2.string).each_grapheme_cluster.to_a
+ assert_equal expected, actual,
+ "file1: #{test1.filename}, line1 #{test1.line_number}, " +
+ "file2: #{test2.filename}, line2 #{test2.line_number},\n" +
+ "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
+ "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
+ end
end
end
end