22 files changed, 2665 insertions, 0 deletions
diff --git a/test/ruby/enc/test_big5.rb b/test/ruby/enc/test_big5.rb
new file mode 100644
index 0000000000..5dcf93e8e3
--- /dev/null
+++ b/test/ruby/enc/test_big5.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestBig5 < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("big5")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\xa1\xa1").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0xa1a1, s("\xa1\xa1").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\xa1\xa1"), 0xa1a1.chr("big5"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
+    assert_match(r, s("\xa1\xa1\xa1\xa1"))
+  end
+
+  def test_left_adjust_char_head
+    assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
+  end
+end
diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb
new file mode 100644
index 0000000000..b812b88b83
--- /dev/null
+++ b/test/ruby/enc/test_case_comprehensive.rb
@@ -0,0 +1,306 @@
+# frozen_string_literal: true
+# Copyright © 2016 Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+class TestComprehensiveCaseMapping < Test::Unit::TestCase
+  UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
+  path = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}", __dir__)
+  UNICODE_DATA_PATH = File.directory?("#{path}/ucd") ? "#{path}/ucd" : path
+
+  def self.hex2utf8(s)
+    s.split(' ').map { |c| c.to_i(16) }.pack('U*')
+  end
+
+  def self.expand_filename(basename)
+    File.expand_path("#{UNICODE_DATA_PATH}/#{basename}.txt", __dir__)
+  end
+
+  def self.data_files_available?
+    %w[UnicodeData CaseFolding SpecialCasing].all? do |f|
+      File.exist?(expand_filename(f))
+    end
+  end
+
+  def test_data_files_available
+    unless TestComprehensiveCaseMapping.data_files_available?
+      omit "Unicode data files not available in #{UNICODE_DATA_PATH}."
+    end
+  end
+end
+
+TestComprehensiveCaseMapping.data_files_available? and  class TestComprehensiveCaseMapping
+  (CaseTest = Struct.new(:method_name, :attributes, :first_data, :follow_data)).class_eval do
+    def initialize(method_name, attributes, first_data, follow_data=first_data)
+      super
+    end
+  end
+
+  def self.read_data_file(filename)
+    File.foreach(expand_filename(filename), encoding: Encoding::ASCII_8BIT) do |line|
+      if $. == 1
+        if filename == 'UnicodeData'
+        elsif line.start_with?("# #{filename}-#{UNICODE_VERSION}.txt")
+        else
+          raise "File Version Mismatch"
+        end
+      end
+      next if /\A(?:[\#@]|\s*\z)|Surrogate/.match?(line)
+      data = line.chomp.split('#')[0].split(/;\s*/, 15)
+      code = data[0].to_i(16).chr(Encoding::UTF_8)
+      yield code, data
+    end
+  end
+
+  def self.read_data
+    @@codepoints = []
+
+    downcase  = Hash.new { |h, c| c }
+    upcase    = Hash.new { |h, c| c }
+    titlecase = Hash.new { |h, c| c }
+    casefold  = Hash.new { |h, c| c }
+    swapcase  = Hash.new { |h, c| c }
+    turkic_upcase    = Hash.new { |h, c| upcase[c] }
+    turkic_downcase  = Hash.new { |h, c| downcase[c] }
+    turkic_titlecase = Hash.new { |h, c| titlecase[c] }
+    turkic_swapcase  = Hash.new { |h, c| swapcase[c] }
+    ascii_upcase     = Hash.new { |h, c| /\A[a-zA-Z]\z/.match?(c) ? upcase[c] : c }
+    ascii_downcase   = Hash.new { |h, c| /\A[a-zA-Z]\z/.match?(c) ? downcase[c] : c }
+    ascii_titlecase  = Hash.new { |h, c| /\A[a-zA-Z]\z/.match?(c) ? titlecase[c] : c }
+    ascii_swapcase   = Hash.new { |h, c| /\A[a-z]\z/.match?(c) ? upcase[c] : (/\A[A-Z]\z/.match?(c) ? downcase[c] : c) }
+
+    read_data_file('UnicodeData') do |code, data|
+      @@codepoints << code
+      upcase[code] = hex2utf8 data[12] unless data[12].empty?
+      downcase[code] = hex2utf8 data[13] unless data[13].empty?
+      if code>="\u1C90" and code<="\u1CBF" # exception for Georgian: use lowercase for titlecase
+        titlecase[code] = hex2utf8(data[13]) unless data[13].empty?
+      else
+        titlecase[code] = hex2utf8 data[14] unless data[14].empty?
+      end
+    end
+    read_data_file('CaseFolding') do |code, data|
+      casefold[code] = hex2utf8(data[2]) if data[1] =~ /^[CF]$/
+    end
+
+    read_data_file('SpecialCasing') do |code, data|
+      case data[4]
+      when ''
+        upcase[code] = hex2utf8 data[3]
+        downcase[code] = hex2utf8 data[1]
+        titlecase[code] = hex2utf8 data[2]
+      when /\Atr\s*/
+        if data[4]!='tr After_I'
+          turkic_upcase[code] = hex2utf8 data[3]
+          turkic_downcase[code] = hex2utf8 data[1]
+          turkic_titlecase[code] = hex2utf8 data[2]
+        end
+      end
+    end
+
+    @@codepoints.each do |c|
+      if upcase[c] != c
+        if downcase[c] != c
+          swapcase[c] = turkic_swapcase[c] =
+            case c
+            when "\u01C5" then "\u0064\u017D"
+            when "\u01C8" then "\u006C\u004A"
+            when "\u01CB" then "\u006E\u004A"
+            when "\u01F2" then "\u0064\u005A"
+            else # Greek
+              downcase[upcase[c][0]] + "\u0399"
+            end
+        else
+          swapcase[c] = upcase[c]
+          turkic_swapcase[c] = turkic_upcase[c]
+        end
+      else
+        if downcase[c] != c
+          swapcase[c] = downcase[c]
+          turkic_swapcase[c] = turkic_downcase[c]
+        end
+      end
+    end
+
+    [
+      CaseTest.new(:downcase,   [], downcase),
+      CaseTest.new(:upcase,     [], upcase),
+      CaseTest.new(:capitalize, [], titlecase, downcase),
+      CaseTest.new(:swapcase,   [], swapcase),
+      CaseTest.new(:downcase,   [:fold],       casefold),
+      CaseTest.new(:upcase,     [:turkic],     turkic_upcase),
+      CaseTest.new(:downcase,   [:turkic],     turkic_downcase),
+      CaseTest.new(:capitalize, [:turkic],     turkic_titlecase, turkic_downcase),
+      CaseTest.new(:swapcase,   [:turkic],     turkic_swapcase),
+      CaseTest.new(:upcase,     [:ascii],      ascii_upcase),
+      CaseTest.new(:downcase,   [:ascii],      ascii_downcase),
+      CaseTest.new(:capitalize, [:ascii],      ascii_titlecase, ascii_downcase),
+      CaseTest.new(:swapcase,   [:ascii],      ascii_swapcase),
+    ]
+  end
+
+  def self.all_tests
+    @@tests ||= read_data
+  rescue Errno::ENOENT
+    @@tests ||= []
+  end
+
+  def self.generate_unicode_case_mapping_tests(encoding)
+    all_tests.each do |test|
+      attributes = test.attributes.map(&:to_s).join '-'
+      attributes.prepend '_' unless attributes.empty?
+      define_method "test_#{encoding}_#{test.method_name}#{attributes}" do
+        @@codepoints.each do |code|
+          source = code.encode(encoding) * 5
+          target = "#{test.first_data[code]}#{test.follow_data[code]*4}".encode(encoding)
+          result = source.__send__(test.method_name, *test.attributes)
+          assert_equal target, result,
+            proc{"from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"}
+        end
+      end
+    end
+  end
+
+  def self.generate_single_byte_case_mapping_tests(encoding)
+    all_tests
+    # precalculate codepoints to speed up testing for small encodings
+    codepoints = []
+    (0..255).each do |cp|
+      begin
+        codepoints << cp.chr(encoding).encode('UTF-8')
+      rescue Encoding::UndefinedConversionError, RangeError
+      end
+    end
+    all_tests.each do |test|
+      attributes = test.attributes.map(&:to_s).join '-'
+      attributes.prepend '_' unless attributes.empty?
+      define_method "test_#{encoding}_#{test.method_name}#{attributes}" do
+        codepoints.each do |code|
+          begin
+            source = code.encode(encoding) * 5
+            begin
+              target = "#{test.first_data[code]}#{test.follow_data[code]*4}".encode(encoding)
+            rescue Encoding::UndefinedConversionError
+              if test.first_data[code]=="i\u0307" or test.follow_data[code]=="i\u0307" # explicit dot above
+                first_data = test.first_data[code]=="i\u0307" ? 'i' : test.first_data[code]
+                follow_data = test.follow_data[code]=="i\u0307" ? 'i' : test.follow_data[code]
+                target = "#{first_data}#{follow_data*4}".encode(encoding)
+              elsif code =~ /i|I/ # special case for Turkic
+                raise
+              else
+                target = source
+              end
+            end
+            result = source.send(test.method_name, *test.attributes)
+            assert_equal target, result,
+              proc{"from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"}
+          rescue Encoding::UndefinedConversionError
+          end
+        end
+      end
+    end
+  end
+
+  # test for encodings that don't yet (or will never) deal with non-ASCII characters
+  def self.generate_ascii_only_case_mapping_tests(encoding)
+    all_tests
+    # preselect codepoints to speed up testing for small encodings
+    codepoints = @@codepoints.select do |code|
+      begin
+        code.encode(encoding)
+        true
+      rescue Encoding::UndefinedConversionError
+        false
+      end
+    end
+    define_method "test_#{encoding}_upcase" do
+      codepoints.each do |code|
+        begin
+          source = code.encode(encoding) * 5
+          target = source.tr 'a-z', 'A-Z'
+          result = source.upcase
+          assert_equal target, result,
+            "from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+    define_method "test_#{encoding}_downcase" do
+      codepoints.each do |code|
+        begin
+          source = code.encode(encoding) * 5
+          target = source.tr 'A-Z', 'a-z'
+          result = source.downcase
+          assert_equal target, result,
+            "from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+    define_method "test_#{encoding}_capitalize" do
+      codepoints.each do |code|
+        begin
+          source = code.encode(encoding) * 5
+          target = source[0].tr('a-z', 'A-Z') + source[1..-1].tr('A-Z', 'a-z')
+          result = source.capitalize
+          assert_equal target, result,
+            "from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+    define_method "test_#{encoding}_swapcase" do
+      codepoints.each do |code|
+        begin
+          source = code.encode(encoding) * 5
+          target = source.tr('a-zA-Z', 'A-Za-z')
+          result = source.swapcase
+          assert_equal target, result,
+            "from #{code*5} (#{source.dump}) expected #{target.dump} but was #{result.dump}"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+  end
+
+  generate_single_byte_case_mapping_tests 'US-ASCII'
+  generate_single_byte_case_mapping_tests 'ASCII-8BIT'
+  generate_single_byte_case_mapping_tests 'ISO-8859-1'
+  generate_single_byte_case_mapping_tests 'ISO-8859-2'
+  generate_single_byte_case_mapping_tests 'ISO-8859-3'
+  generate_single_byte_case_mapping_tests 'ISO-8859-4'
+  generate_single_byte_case_mapping_tests 'ISO-8859-5'
+  generate_single_byte_case_mapping_tests 'ISO-8859-6'
+  generate_single_byte_case_mapping_tests 'ISO-8859-7'
+  generate_single_byte_case_mapping_tests 'ISO-8859-8'
+  generate_single_byte_case_mapping_tests 'ISO-8859-9'
+  generate_single_byte_case_mapping_tests 'ISO-8859-10'
+  generate_single_byte_case_mapping_tests 'ISO-8859-11'
+  generate_single_byte_case_mapping_tests 'ISO-8859-13'
+  generate_single_byte_case_mapping_tests 'ISO-8859-14'
+  generate_single_byte_case_mapping_tests 'ISO-8859-15'
+  generate_single_byte_case_mapping_tests 'ISO-8859-16'
+  generate_ascii_only_case_mapping_tests 'KOI8-R'
+  generate_ascii_only_case_mapping_tests 'KOI8-U'
+  generate_ascii_only_case_mapping_tests 'Big5'
+  generate_ascii_only_case_mapping_tests 'EUC-JP'
+  generate_ascii_only_case_mapping_tests 'EUC-KR'
+  generate_ascii_only_case_mapping_tests 'GB18030'
+  generate_ascii_only_case_mapping_tests 'GB2312'
+  generate_ascii_only_case_mapping_tests 'GBK'
+  generate_ascii_only_case_mapping_tests 'Shift_JIS'
+  generate_ascii_only_case_mapping_tests 'Windows-31J'
+  generate_single_byte_case_mapping_tests 'Windows-1250'
+  generate_single_byte_case_mapping_tests 'Windows-1251'
+  generate_single_byte_case_mapping_tests 'Windows-1252'
+  generate_single_byte_case_mapping_tests 'Windows-1253'
+  generate_single_byte_case_mapping_tests 'Windows-1254'
+  generate_single_byte_case_mapping_tests 'Windows-1255'
+  generate_ascii_only_case_mapping_tests 'Windows-1256'
+  generate_single_byte_case_mapping_tests 'Windows-1257'
+  generate_unicode_case_mapping_tests 'UTF-8'
+  generate_unicode_case_mapping_tests 'UTF-16BE'
+  generate_unicode_case_mapping_tests 'UTF-16LE'
+  generate_unicode_case_mapping_tests 'UTF-32BE'
+  generate_unicode_case_mapping_tests 'UTF-32LE'
+end
diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb
new file mode 100644
index 0000000000..a7d1ed0d16
--- /dev/null
+++ b/test/ruby/enc/test_case_mapping.rb
@@ -0,0 +1,231 @@
+# Copyright © 2016 Kimihito Matsui (松井 仁人) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+# preliminary tests, using  as a guard
+# to test new implementation strategy
+class TestCaseMappingPreliminary < Test::Unit::TestCase
+  # checks, including idempotence and non-modification; not always guaranteed
+  def check_upcase_properties(expected, start, *flags)
+    assert_equal expected, start.upcase(*flags)
+    temp = start.dup
+    assert_equal expected, temp.upcase!(*flags) unless expected==temp
+    assert_equal nil, temp.upcase!(*flags) if expected==temp
+    assert_equal expected, expected.upcase(*flags)
+    temp = expected.dup
+    assert_nil   temp.upcase!(*flags)
+  end
+
+  def check_downcase_properties(expected, start, *flags)
+    assert_equal expected, start.downcase(*flags)
+    temp = start.dup
+    assert_equal expected, temp.downcase!(*flags) unless expected==temp
+    assert_equal nil, temp.downcase!(*flags) if expected==temp
+    assert_equal expected, expected.downcase(*flags)
+    temp = expected.dup
+    assert_nil   temp.downcase!(*flags)
+  end
+
+  def check_capitalize_properties(expected, start, *flags)
+    assert_equal expected, start.capitalize(*flags)
+    temp = start.dup
+    assert_equal expected, temp.capitalize!(*flags) unless expected==temp
+    assert_equal nil, temp.capitalize!(*flags) if expected==temp
+    assert_equal expected, expected.capitalize(*flags)
+    temp = expected.dup
+    assert_nil   temp.capitalize!(*flags)
+  end
+
+  def check_capitalize_suffixes(lower, upper)
+    while upper.length > 1
+      lower = lower[1..-1]
+      check_capitalize_properties upper[0]+lower, upper
+      upper = upper[1..-1]
+    end
+  end
+
+  # different properties; careful: roundtrip isn't always guaranteed
+  def check_swapcase_properties(expected, start, *flags)
+    assert_equal expected, start.swapcase(*flags)
+    temp = +start
+    assert_equal expected, temp.swapcase!(*flags)
+    assert_equal start, start.swapcase(*flags).swapcase(*flags)
+    assert_equal expected, expected.swapcase(*flags).swapcase(*flags)
+  end
+
+  def test_ascii
+    check_downcase_properties   'yukihiro matsumoto (matz)', 'Yukihiro MATSUMOTO (MATZ)'
+    check_upcase_properties     'YUKIHIRO MATSUMOTO (MATZ)', 'yukihiro matsumoto (matz)'
+    check_capitalize_properties 'Yukihiro matsumoto (matz)', 'yukihiro MATSUMOTO (MATZ)'
+    check_swapcase_properties   'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)'
+  end
+
+  def test_invalid
+    assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').upcase }
+    assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').downcase }
+    assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".dup.force_encoding('UTF-8').capitalize }
+    assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').swapcase }
+  end
+
+  def test_general
+    check_downcase_properties   'résumé dürst ĭñŧėřŋãţĳňőńæłĩżàťïōņ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤÏŌŅ'
+    check_upcase_properties     'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤÏŌŅ', 'résumé dürst ĭñŧėřŋãţĳňőńæłĩżàťïōņ'
+    check_capitalize_suffixes   'résumé dürst ĭñŧėřŋãţĳňőńæłĩżàťïōņ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤÏŌŅ'
+    check_swapcase_properties   'résumé DÜRST ĭñŧėřŊÃŢĲŇŐŃæłĩżàťïōņ', 'RÉSUMÉ dürst ĬÑŦĖŘŋãţĳňőńÆŁĨŻÀŤÏŌŅ'
+  end
+
+  def test_one_way_upcase
+    check_upcase_properties     'ΜΜΜΜΜ', 'µµµµµ' # MICRO SIGN -> Greek Mu
+    check_downcase_properties   'µµµµµ', 'µµµµµ' # MICRO SIGN -> Greek Mu
+    check_capitalize_properties 'Μµµµµ', 'µµµµµ' # MICRO SIGN -> Greek Mu
+    check_capitalize_properties 'Μµµµµ', 'µµµµµ', :turkic # MICRO SIGN -> Greek Mu
+    check_capitalize_properties 'H̱ẖẖẖẖ', 'ẖẖẖẖẖ'
+    check_capitalize_properties 'Βϐϐϐϐ', 'ϐϐϐϐϐ'
+    check_capitalize_properties 'Θϑϑϑϑ', 'ϑϑϑϑϑ'
+    check_capitalize_properties 'Φϕ', 'ϕϕ'
+    check_capitalize_properties 'Πϖ', 'ϖϖ'
+    check_capitalize_properties 'Κϰ', 'ϰϰ'
+    check_capitalize_properties 'Ρϱϱ', 'ϱϱϱ'
+    check_capitalize_properties 'Εϵ', 'ϵϵ'
+    check_capitalize_properties 'Ιͅͅͅͅ', 'ͅͅͅͅͅ'
+    check_capitalize_properties 'Sſſſſ', 'ſſſſſ'
+  end
+
+  def test_various
+    check_upcase_properties     'Μ', 'µ' # MICRO SIGN -> Greek Mu
+    check_downcase_properties   'µµµµµ', 'µµµµµ' # MICRO SIGN
+    check_capitalize_properties 'Ss', 'ß'
+    check_upcase_properties     'SS', 'ß'
+  end
+
+  def test_cherokee
+    check_downcase_properties   "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'
+    check_upcase_properties     'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79"
+    check_capitalize_suffixes   "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'
+    assert_equal                'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'.downcase(:fold)
+    assert_equal                'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79".downcase(:fold)
+  end
+
+  def test_titlecase
+    check_downcase_properties   'ǳ ǆ ǉ ǌ', 'ǲ ǅ ǈ ǋ'
+    check_downcase_properties   'ǳ ǆ ǉ ǌ', 'Ǳ Ǆ Ǉ Ǌ'
+    check_upcase_properties     'Ǳ Ǆ Ǉ Ǌ', 'ǲ ǅ ǈ ǋ'
+    check_upcase_properties     'Ǳ Ǆ Ǉ Ǌ', 'ǳ ǆ ǉ ǌ'
+    check_capitalize_properties 'ǲ', 'Ǳ'
+    check_capitalize_properties 'ǅ', 'Ǆ'
+    check_capitalize_properties 'ǈ', 'Ǉ'
+    check_capitalize_properties 'ǋ', 'Ǌ'
+    check_capitalize_properties 'ǲ', 'ǳ'
+    check_capitalize_properties 'ǅ', 'ǆ'
+    check_capitalize_properties 'ǈ', 'ǉ'
+    check_capitalize_properties 'ǋ', 'ǌ'
+  end
+
+  def test_swapcase
+    assert_equal                'dZ', 'ǲ'.swapcase
+    assert_equal                'dŽ', 'ǅ'.swapcase
+    assert_equal                'lJ', 'ǈ'.swapcase
+    assert_equal                'nJ', 'ǋ'.swapcase
+    assert_equal                'ἀΙ', 'ᾈ'.swapcase
+    assert_equal                'ἣΙ', 'ᾛ'.swapcase
+    assert_equal                'ὧΙ', 'ᾯ'.swapcase
+    assert_equal                'αΙ', 'ᾼ'.swapcase
+    assert_equal                'ηΙ', 'ῌ'.swapcase
+    assert_equal                'ωΙ', 'ῼ'.swapcase
+  end
+
+  def test_ascii_option
+    check_downcase_properties   'yukihiro matsumoto (matz)', 'Yukihiro MATSUMOTO (MATZ)', :ascii
+    check_upcase_properties     'YUKIHIRO MATSUMOTO (MATZ)', 'yukihiro matsumoto (matz)', :ascii
+    check_capitalize_properties 'Yukihiro matsumoto (matz)', 'yukihiro MATSUMOTO (MATZ)', :ascii
+    check_swapcase_properties   'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)', :ascii
+    check_downcase_properties   'yukİhİro matsumoto (matz)', 'YUKİHİRO MATSUMOTO (MATZ)', :ascii
+    check_downcase_properties   'rÉsumÉ dÜrst ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤĬŌŅ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤĬŌŅ', :ascii
+    check_swapcase_properties   'rÉsumÉ dÜrst ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤĬŌŅ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢĲŇŐŃÆŁĨŻÀŤĬŌŅ', :ascii
+  end
+
+  def test_fold_option
+    check_downcase_properties   'ss', 'ß', :fold
+    check_downcase_properties   'fifl', 'ﬁﬂ', :fold
+    check_downcase_properties   'σ', 'ς', :fold
+    check_downcase_properties   'μ', 'µ', :fold # MICRO SIGN -> Greek mu
+  end
+
+  def test_turcic
+    check_downcase_properties   'yukihiro matsumoto (matz)', 'Yukihiro MATSUMOTO (MATZ)', :turkic
+    check_upcase_properties     'YUKİHİRO MATSUMOTO (MATZ)', 'Yukihiro Matsumoto (matz)', :turkic
+    check_downcase_properties   "yuki\u0307hi\u0307ro matsumoto (matz)", 'YUKİHİRO MATSUMOTO (MATZ)'
+  end
+
+  def test_greek
+    check_downcase_properties   'αβγδεζηθικλμνξοπρστυφχψω', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ'
+    check_upcase_properties     'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ', 'αβγδεζηθικλμνξοπρστυφχψω'
+  end
+
+  # This test checks against problems when changing the order of mapping results
+  # in some of the entries of the unfolding table (related to
+  # https://bugs.ruby-lang.org/issues/12990).
+  def test_reorder_unfold
+    # GREEK SMALL LETTER IOTA
+    assert_equal 0, "\u03B9" =~ /\u0345/i
+    assert_equal 0, "\u0345" =~ /\u03B9/i
+    assert_equal 0, "\u03B9" =~ /\u0399/i
+    assert_equal 0, "\u0399" =~ /\u03B9/i
+    assert_equal 0, "\u03B9" =~ /\u1fbe/i
+    assert_equal 0, "\u1fbe" =~ /\u03B9/i
+
+    # GREEK SMALL LETTER MU
+    assert_equal 0, "\u03BC" =~ /\u00B5/i
+    assert_equal 0, "\u00B5" =~ /\u03BC/i
+    assert_equal 0, "\u03BC" =~ /\u039C/i
+    assert_equal 0, "\u039C" =~ /\u03BC/i
+
+    # CYRILLIC SMALL LETTER MONOGRAPH UK
+    assert_equal 0, "\uA64B" =~ /\u1c88/i
+    assert_equal 0, "\u1c88" =~ /\uA64B/i
+    assert_equal 0, "\uA64B" =~ /\ua64A/i
+    assert_equal 0, "\ua64A" =~ /\uA64B/i
+  end
+
+  def test_georgian_canary
+    message = "Reexamine implementation of Georgian in String#capitalize"
+    assert_equal false, "\u1CBB".match?(/\p{assigned}/), message
+    assert_equal false, "\u1CBC".match?(/\p{assigned}/), message
+  end
+
+  def test_georgian_unassigned
+    message = "Unassigned codepoints should not be converted"
+    assert_equal "\u1CBB", "\u1CBB".capitalize, message
+    assert_equal "\u1CBC", "\u1CBC".capitalize, message
+  end
+
+  def test_georgian_capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u1C92".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u10D2".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u1C92".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u10D2".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u1C92".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u10D2".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u1C92".capitalize
+    assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u10D2".capitalize
+  end
+
+  def test_shift_jis_downcase_ascii
+    s = ("A".."Z").map {|c| "\x89#{c}"}.join("").force_encoding("Shift_JIS")
+    assert_equal s, s.downcase(:ascii)
+  end
+
+  def test_shift_jis_upcase_ascii
+    s = ("a".."z").map {|c| "\x89#{c}"}.join("").force_encoding("Shift_JIS")
+    assert_equal s, s.upcase(:ascii)
+  end
+
+  def no_longer_a_test_buffer_allocations
+    assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic)
+    assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic)
+    assert_equal 'TURKISH*ı'*1_000, ('I'*1_000).downcase(:turkic)
+    assert_equal 'TURKISH*ı'*10_000, ('I'*10_000).downcase(:turkic)
+    assert_equal 'TURKISH*ı'*100_000, ('I'*100_000).downcase(:turkic)
+    assert_equal 'TURKISH*ı'*1_000_000, ('I'*1_000_000).downcase(:turkic)
+  end
+end
diff --git a/test/ruby/enc/test_case_options.rb b/test/ruby/enc/test_case_options.rb
new file mode 100644
index 0000000000..e9c81d804e
--- /dev/null
+++ b/test/ruby/enc/test_case_options.rb
@@ -0,0 +1,81 @@
+# Copyright © 2016 Kimihito Matsui (松井 仁人) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+class TestCaseOptions < Test::Unit::TestCase
+  def assert_raise_functional_operations(arg, *options)
+    assert_raise(ArgumentError) { arg.upcase(*options) }
+    assert_raise(ArgumentError) { arg.downcase(*options) }
+    assert_raise(ArgumentError) { arg.capitalize(*options) }
+    assert_raise(ArgumentError) { arg.swapcase(*options) }
+  end
+
+  def assert_raise_bang_operations(arg, *options)
+    assert_raise(ArgumentError) { arg.upcase!(*options) }
+    assert_raise(ArgumentError) { arg.downcase!(*options) }
+    assert_raise(ArgumentError) { arg.capitalize!(*options) }
+    assert_raise(ArgumentError) { arg.swapcase!(*options) }
+  end
+
+  def assert_raise_both_types(*options)
+    assert_raise_functional_operations 'a', *options
+    assert_raise_bang_operations(+'a', *options)
+    assert_raise_functional_operations :a, *options
+  end
+
+  def test_option_errors
+    assert_raise_both_types :invalid
+    assert_raise_both_types :lithuanian, :turkic, :fold
+    assert_raise_both_types :fold, :fold
+    assert_raise_both_types :ascii, :fold
+    assert_raise_both_types :fold, :ascii
+    assert_raise_both_types :ascii, :turkic
+    assert_raise_both_types :turkic, :ascii
+    assert_raise_both_types :ascii, :lithuanian
+    assert_raise_both_types :lithuanian, :ascii
+  end
+
+  def assert_okay_functional_operations(arg, *options)
+    assert_nothing_raised { arg.upcase(*options) }
+    assert_nothing_raised { arg.downcase(*options) }
+    assert_nothing_raised { arg.capitalize(*options) }
+    assert_nothing_raised { arg.swapcase(*options) }
+  end
+
+  def assert_okay_bang_operations(arg, *options)
+    assert_nothing_raised { arg.upcase!(*options) }
+    assert_nothing_raised { arg.downcase!(*options) }
+    assert_nothing_raised { arg.capitalize!(*options) }
+    assert_nothing_raised { arg.swapcase!(*options) }
+  end
+
+  def assert_okay_both_types(*options)
+    assert_okay_functional_operations 'a', *options
+    assert_okay_bang_operations(+'a', *options)
+    assert_okay_functional_operations :a, *options
+  end
+
+  def test_options_okay
+    assert_okay_both_types
+    assert_okay_both_types :ascii
+    assert_okay_both_types :turkic
+    assert_okay_both_types :lithuanian
+    assert_okay_both_types :turkic, :lithuanian
+    assert_okay_both_types :lithuanian, :turkic
+  end
+
+  def test_operation_specific   # :fold option only allowed on downcase
+    assert_nothing_raised { 'a'.downcase :fold }
+    assert_raise(ArgumentError) { 'a'.upcase :fold }
+    assert_raise(ArgumentError) { 'a'.capitalize :fold }
+    assert_raise(ArgumentError) { 'a'.swapcase :fold }
+    assert_nothing_raised { 'a'.dup.downcase! :fold }
+    assert_raise(ArgumentError) { 'a'.dup.upcase! :fold }
+    assert_raise(ArgumentError) { 'a'.dup.capitalize! :fold }
+    assert_raise(ArgumentError) { 'a'.dup.swapcase! :fold }
+    assert_nothing_raised { :a.downcase :fold }
+    assert_raise(ArgumentError) { :a.upcase :fold }
+    assert_raise(ArgumentError) { :a.capitalize :fold }
+    assert_raise(ArgumentError) { :a.swapcase :fold }
+  end
+end
diff --git a/test/ruby/enc/test_cesu8.rb b/test/ruby/enc/test_cesu8.rb
new file mode 100644
index 0000000000..68a08389ea
--- /dev/null
+++ b/test/ruby/enc/test_cesu8.rb
@@ -0,0 +1,113 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+class TestCESU8 < Test::Unit::TestCase
+
+  def encdump(obj)
+    case obj
+    when String
+      obj.dump
+    when Regexp
+      "Regexp.new(#{encdump(obj.source)}, #{obj.options})"
+    else
+      raise Argument, "unexpected: #{obj.inspect}"
+    end
+  end
+
+  def enccall(recv, meth, *args)
+    desc = ''
+    if String === recv
+      desc << encdump(recv)
+    else
+      desc << recv.inspect
+    end
+    desc << '.' << meth.to_s
+    if !args.empty?
+      desc << '('
+      args.each_with_index {|a, i|
+        desc << ',' if 0 < i
+        if String === a
+          desc << encdump(a)
+        else
+          desc << a.inspect
+        end
+      }
+      desc << ')'
+    end
+    result = nil
+    assert_nothing_raised(desc) {
+      result = recv.send(meth, *args)
+    }
+    result
+  end
+
+  def assert_str_equal(expected, actual, message=nil)
+    full_message = build_message(message, <<EOT)
+#{encdump expected} expected but not equal to
+#{encdump actual}.
+EOT
+    assert_equal(expected, actual, full_message)
+  end
+
+  # tests start
+
+  def test_cesu8_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00",
+        "\x7f",
+        "\u0080",
+        "\u07ff",
+        "\u0800",
+        "\ud7ff",
+        "\xed\xa0\x80\xed\xb0\x80",
+        "\xed\xaf\xbf\xed\xbf\xbf",
+        "\ue000",
+        "\uffff",
+      ].each {|s|
+        s.force_encoding("cesu-8")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "\x80",
+        "\xc0\x80",
+        "\xc0",
+        "\xe0\x80\x80",
+        "\xed\xa0\x80",
+        "\xed\xb0\x80\xed\xb0\x80",
+        "\xe0",
+        "\xff",
+      ].each {|s|
+        s.force_encoding("cesu-8")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+
+  def test_cesu8_ord
+    [
+      ["\x00", 0],
+      ["\x7f", 0x7f],
+      ["\u0080", 0x80],
+      ["\u07ff", 0x7ff],
+      ["\u0800", 0x800],
+      ["\ud7ff", 0xd7ff],
+      ["\xed\xa0\x80\xed\xb0\x80", 0x10000],
+      ["\xed\xaf\xbf\xed\xbf\xbf", 0x10ffff],
+      ["\xee\x80\x80", 0xe000],
+      ["\xef\xbf\xbf", 0xffff],
+    ].each do |chr, ord|
+      chr.force_encoding("cesu-8")
+      assert_equal ord, chr.ord
+      assert_equal chr, ord.chr("cesu-8")
+    end
+  end
+
+  def test_cesu8_left_adjust_char_head
+    assert_equal("", "\u{10000}".encode("cesu-8").chop)
+  end
+end
diff --git a/test/ruby/enc/test_cp949.rb b/test/ruby/enc/test_cp949.rb
new file mode 100644
index 0000000000..0684162d5b
--- /dev/null
+++ b/test/ruby/enc/test_cp949.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestCP949 < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("cp949")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\xa1\xa1").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0xa1a1, s("\xa1\xa1").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\xa1\xa1"), 0xa1a1.chr("cp949"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
+    assert_match(r, s("\xa1\xa1\xa1\xa1"))
+  end
+
+  def test_left_adjust_char_head
+    assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
+  end
+end
diff --git a/test/ruby/enc/test_emoji.rb b/test/ruby/enc/test_emoji.rb
new file mode 100644
index 0000000000..330ff70cb9
--- /dev/null
+++ b/test/ruby/enc/test_emoji.rb
@@ -0,0 +1,443 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+module Emoji
+
+  class TestRenameSJIS < Test::Unit::TestCase
+    def test_shift_jis
+      assert_raise(ArgumentError) { "".force_encoding("Shift_JIS-DoCoMo") }
+      assert_raise(ArgumentError) { "".force_encoding("Shift_JIS-KDDI") }
+      assert_raise(ArgumentError) { "".force_encoding("Shift_JIS-SoftBank") }
+    end
+  end
+
+  class TestUTF8_BLACK_SUN_WITH_RAYS < Test::Unit::TestCase
+    include Emoji
+
+    def setup
+      @codes = {
+        "UTF8-DoCoMo"     => utf8_docomo("\u{E63E}"),
+        "UTF8-KDDI"       => utf8_kddi("\u{E488}"),
+        "UTF8-SoftBank"   => utf8_softbank("\u{E04A}"),
+        "UTF-8"           => "\u{2600}",
+      }
+    end
+
+    def test_convert
+      @codes.each do |from_enc, from_str|
+        @codes.each do |to_enc, to_str|
+          next if from_enc == to_enc
+          assert_equal to_str, from_str.encode(to_enc), "convert from #{from_enc} to #{to_enc}"
+        end
+      end
+    end
+  end
+
+  class TestDoCoMo < Test::Unit::TestCase
+    include Emoji
+
+    def setup
+      setup_instance_variable(self)
+    end
+
+    def test_encoding_name
+      %w(UTF8-DoCoMo
+         SJIS-DoCoMo).each do |n|
+        assert_include Encoding.name_list, n, "encoding not found: #{n}"
+      end
+    end
+
+    def test_comparison
+      assert_not_equal Encoding::UTF_8, Encoding::UTF8_DoCoMo
+      assert_not_equal Encoding::Windows_31J, Encoding::SJIS_DoCoMo
+    end
+
+    def test_from_utf8
+      assert_nothing_raised { assert_equal utf8_docomo(@aiueo_utf8), to_utf8_docomo(@aiueo_utf8) }
+      assert_nothing_raised { assert_equal sjis_docomo(@aiueo_sjis), to_sjis_docomo(@aiueo_utf8) }
+    end
+
+    def test_from_sjis
+      assert_nothing_raised { assert_equal utf8_docomo(@aiueo_utf8), to_utf8_docomo(@aiueo_sjis) }
+      assert_nothing_raised { assert_equal sjis_docomo(@aiueo_sjis), to_sjis_docomo(@aiueo_sjis) }
+    end
+
+    def test_to_utf8
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@utf8_docomo) }
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@sjis_docomo) }
+    end
+
+    def test_to_sjis
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@utf8_docomo) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@sjis_docomo) }
+    end
+
+    def test_to_eucjp
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@utf8_docomo) }
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@sjis_docomo) }
+    end
+
+    def test_docomo
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@sjis_docomo) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_docomo) }
+    end
+
+    def test_to_kddi
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@utf8_docomo) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@utf8_docomo) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@utf8_docomo) }
+
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@sjis_docomo) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@sjis_docomo) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@sjis_docomo) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_kddi(@utf8_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_kddi(@utf8_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_iso2022jp_kddi(@utf8_docomo_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_kddi(@sjis_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_kddi(@sjis_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_iso2022jp_kddi(@sjis_docomo_only) }
+    end
+
+    def test_to_softbank
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@utf8_docomo) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_docomo) }
+
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@sjis_docomo) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@sjis_docomo) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_softbank(@utf8_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_softbank(@utf8_docomo_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_softbank(@sjis_docomo_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_softbank(@sjis_docomo_only) }
+    end
+  end
+
+  class TestKDDI < Test::Unit::TestCase
+    include Emoji
+
+    def setup
+      setup_instance_variable(self)
+    end
+
+    def test_encoding_name
+      %w(UTF8-KDDI
+         SJIS-KDDI
+         ISO-2022-JP-KDDI
+         stateless-ISO-2022-JP-KDDI).each do |n|
+        assert_include Encoding.name_list, n, "encoding not found: #{n}"
+      end
+    end
+
+    def test_comparison
+      assert_not_equal Encoding::UTF_8, Encoding::UTF8_KDDI
+      assert_not_equal Encoding::Windows_31J, Encoding::SJIS_KDDI
+      assert_not_equal Encoding::ISO_2022_JP, Encoding::ISO_2022_JP_KDDI
+      assert_not_equal Encoding::Stateless_ISO_2022_JP, Encoding::Stateless_ISO_2022_JP_KDDI
+    end
+
+    def test_from_utf8
+      assert_nothing_raised { assert_equal utf8_kddi(@aiueo_utf8), to_utf8_kddi(@aiueo_utf8) }
+      assert_nothing_raised { assert_equal sjis_kddi(@aiueo_sjis), to_sjis_kddi(@aiueo_utf8) }
+      assert_nothing_raised { assert_equal iso2022jp_kddi(@aiueo_iso2022jp), to_iso2022jp_kddi(@aiueo_utf8) }
+    end
+
+    def test_from_sjis
+      assert_nothing_raised { assert_equal utf8_kddi(@aiueo_utf8), to_utf8_kddi(@aiueo_sjis) }
+      assert_nothing_raised { assert_equal sjis_kddi(@aiueo_sjis), to_sjis_kddi(@aiueo_sjis) }
+      assert_nothing_raised { assert_equal iso2022jp_kddi(@aiueo_iso2022jp), to_iso2022jp_kddi(@aiueo_sjis) }
+    end
+
+    def test_from_iso2022jp
+      assert_nothing_raised { assert_equal utf8_kddi(@aiueo_utf8), to_utf8_kddi(@aiueo_iso2022jp) }
+      assert_nothing_raised { assert_equal sjis_kddi(@aiueo_sjis), to_sjis_kddi(@aiueo_iso2022jp) }
+      assert_nothing_raised { assert_equal iso2022jp_kddi(@aiueo_iso2022jp), to_iso2022jp_kddi(@aiueo_iso2022jp) }
+    end
+
+    def test_to_utf8
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@utf8_kddi) }
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@utf8_undoc_kddi) }
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@iso2022jp_kddi) }
+    end
+
+    def test_to_sjis
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@utf8_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@utf8_undoc_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@sjis_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@iso2022jp_kddi) }
+    end
+
+    def test_to_eucjp
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@utf8_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@utf8_undoc_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@sjis_kddi) }
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@iso2022jp_kddi) }
+    end
+
+    def test_kddi
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@iso2022jp_kddi) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@utf8_undoc_kddi) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@iso2022jp_kddi) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@utf8_undoc_kddi) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@iso2022jp_kddi) }
+    end
+
+    def test_to_docomo
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@utf8_kddi) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@utf8_undoc_kddi) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_undoc_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@sjis_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@iso2022jp_kddi) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@iso2022jp_kddi) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_docomo, to_utf8_docomo(@utf8_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_docomo, to_utf8_docomo(@utf8_undoc_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_undoc_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_docomo, to_utf8_docomo(@sjis_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_docomo, to_sjis_docomo(@sjis_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_docomo, to_utf8_docomo(@iso2022jp_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_docomo, to_sjis_docomo(@iso2022jp_kddi_only) }
+    end
+
+    def test_to_softbank
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@utf8_kddi) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@utf8_undoc_kddi) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_undoc_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@sjis_kddi) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@sjis_kddi) }
+
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@iso2022jp_kddi) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@iso2022jp_kddi) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_softbank, to_utf8_softbank(@utf8_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_softbank, to_utf8_softbank(@utf8_undoc_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_undoc_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_softbank, to_utf8_softbank(@sjis_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_softbank, to_sjis_softbank(@sjis_kddi_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @utf8_softbank, to_utf8_softbank(@iso2022jp_kddi_only) }
+      assert_raise(Encoding::UndefinedConversionError) { assert_equal @sjis_softbank, to_sjis_softbank(@iso2022jp_kddi_only) }
+    end
+  end
+
+  class TestSoftBank < Test::Unit::TestCase
+    include Emoji
+
+    def setup
+      setup_instance_variable(self)
+    end
+
+    def test_encoding_name
+      %w(UTF8-SoftBank
+         SJIS-SoftBank).each do |n|
+        assert_include Encoding.name_list, n, "encoding not found: #{n}"
+      end
+    end
+
+    def test_comparison
+      assert_not_equal Encoding::UTF_8, Encoding::UTF8_SoftBank
+      assert_not_equal Encoding::Windows_31J, Encoding::SJIS_SoftBank
+    end
+
+    def test_from_utf8
+      assert_nothing_raised { assert_equal utf8_softbank(@aiueo_utf8), to_utf8_softbank(@aiueo_utf8) }
+      assert_nothing_raised { assert_equal sjis_softbank(@aiueo_sjis), to_sjis_softbank(@aiueo_utf8) }
+    end
+
+    def test_from_sjis
+      assert_nothing_raised { assert_equal utf8_softbank(@aiueo_utf8), to_utf8_softbank(@aiueo_sjis) }
+      assert_nothing_raised { assert_equal sjis_softbank(@aiueo_sjis), to_sjis_softbank(@aiueo_sjis) }
+    end
+
+    def test_to_utf8
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@utf8_softbank) }
+      assert_nothing_raised { assert_equal @utf8, to_utf8(@sjis_softbank) }
+    end
+
+    def test_to_sjis
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@utf8_softbank) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis(@sjis_softbank) }
+    end
+
+    def test_to_eucjp
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@utf8_softbank) }
+      assert_raise(Encoding::UndefinedConversionError) { to_eucjp(@sjis_softbank) }
+    end
+
+    def test_softbank
+      assert_nothing_raised { assert_equal @utf8_softbank, to_utf8_softbank(@sjis_softbank) }
+      assert_nothing_raised { assert_equal @sjis_softbank, to_sjis_softbank(@utf8_softbank) }
+    end
+
+    def test_to_docomo
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@utf8_softbank) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@utf8_softbank) }
+
+      assert_nothing_raised { assert_equal @utf8_docomo, to_utf8_docomo(@sjis_softbank) }
+      assert_nothing_raised { assert_equal @sjis_docomo, to_sjis_docomo(@sjis_softbank) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_docomo(@utf8_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_docomo(@utf8_softbank_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_docomo(@sjis_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_docomo(@sjis_softbank_only) }
+    end
+
+    def test_to_kddi
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@utf8_softbank) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@utf8_softbank) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@utf8_softbank) }
+
+      assert_nothing_raised { assert_equal @utf8_kddi, to_utf8_kddi(@sjis_softbank) }
+      assert_nothing_raised { assert_equal @sjis_kddi, to_sjis_kddi(@sjis_softbank) }
+      assert_nothing_raised { assert_equal @iso2022jp_kddi, to_iso2022jp_kddi(@sjis_softbank) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_kddi(@utf8_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_kddi(@utf8_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_iso2022jp_kddi(@utf8_softbank_only) }
+
+      assert_raise(Encoding::UndefinedConversionError) { to_utf8_kddi(@sjis_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_sjis_kddi(@sjis_softbank_only) }
+      assert_raise(Encoding::UndefinedConversionError) { to_iso2022jp_kddi(@sjis_softbank_only) }
+    end
+  end
+
+  private
+
+  def setup_instance_variable(obj)
+    obj.instance_eval do
+      @aiueo_utf8 = "\u{3042}\u{3044}\u{3046}\u{3048}\u{304A}"
+      @aiueo_sjis = to_sjis(@aiueo_utf8)
+      @aiueo_iso2022jp = to_iso2022jp(@aiueo_utf8)
+
+      @utf8 = "\u{2600}"
+
+      @utf8_docomo = utf8_docomo("\u{E63E}")
+      @sjis_docomo = sjis_docomo("\xF8\x9F")
+      @utf8_docomo_only = utf8_docomo("\u{E6B1}")
+      @sjis_docomo_only = sjis_docomo("\xF9\x55")
+
+      @utf8_kddi = utf8_kddi("\u{E488}")
+      @utf8_undoc_kddi = utf8_kddi("\u{EF60}")
+      @sjis_kddi = sjis_kddi("\xF6\x60")
+      @iso2022jp_kddi = iso2022jp_kddi("\x1B$B\x75\x41\x1B(B")
+      @stateless_iso2022jp_kddi = stateless_iso2022jp_kddi("\x92\xF5\xC1")
+      @utf8_kddi_only = utf8_kddi("\u{E5B3}")
+      @utf8_undoc_kddi_only = utf8_kddi("\u{F0D0}")
+      @sjis_kddi_only = sjis_kddi("\xF7\xD0")
+      @iso2022jp_kddi_only = iso2022jp_kddi("\x1B$B\x78\x52\x1B(B")
+      @stateless_iso2022jp_kddi_only = stateless_iso2022jp_kddi("\x92\xF8\xD2")
+
+      @utf8_softbank = utf8_softbank("\u{E04A}")
+      @sjis_softbank = sjis_softbank("\xF9\x8B")
+      @utf8_softbank_only = utf8_softbank("\u{E524}")
+      @sjis_softbank_only = sjis_softbank("\xFB\xC4")
+    end
+  end
+
+  def utf8(str)
+    str.force_encoding("UTF-8")
+  end
+
+  def to_utf8(str)
+    str.encode("UTF-8")
+  end
+
+  def to_sjis(str)
+    str.encode("Windows-31J")
+  end
+
+  def to_eucjp(str)
+    str.encode("eucJP-ms")
+  end
+
+  def to_iso2022jp(str)
+    str.encode("ISO-2022-JP")
+  end
+
+  def utf8_docomo(str)
+    str.force_encoding("UTF8-DoCoMo")
+  end
+
+  def to_utf8_docomo(str)
+    str.encode("UTF8-DoCoMo")
+  end
+
+  def utf8_kddi(str)
+    str.force_encoding("UTF8-KDDI")
+  end
+
+  def to_utf8_kddi(str)
+    str.encode("UTF8-KDDI")
+  end
+
+  def utf8_softbank(str)
+    str.force_encoding("UTF8-SoftBank")
+  end
+
+  def to_utf8_softbank(str)
+    str.encode("UTF8-SoftBank")
+  end
+
+  def sjis_docomo(str)
+    str.force_encoding("SJIS-DoCoMo")
+  end
+
+  def to_sjis_docomo(str)
+    str.encode("SJIS-DoCoMo")
+  end
+
+  def sjis_kddi(str)
+    str.force_encoding("SJIS-KDDI")
+  end
+
+  def to_sjis_kddi(str)
+    str.encode("SJIS-KDDI")
+  end
+
+  def sjis_softbank(str)
+    str.force_encoding("SJIS-SoftBank")
+  end
+
+  def to_sjis_softbank(str)
+    str.encode("SJIS-SoftBank")
+  end
+
+  def iso2022jp_kddi(str)
+    str.force_encoding("ISO-2022-JP-KDDI")
+  end
+
+  def to_iso2022jp_kddi(str)
+    str.encode("ISO-2022-JP-KDDI")
+  end
+
+  def stateless_iso2022jp_kddi(str)
+    str.force_encoding("stateless-ISO-2022-JP-KDDI")
+  end
+
+  def to_stateless_iso2022jp_kddi(str)
+    str.encode("stateless-ISO-2022-JP-KDDI")
+  end
+
+end
diff --git a/test/ruby/enc/test_emoji_breaks.rb b/test/ruby/enc/test_emoji_breaks.rb
new file mode 100644
index 0000000000..0873e681c3
--- /dev/null
+++ b/test/ruby/enc/test_emoji_breaks.rb
@@ -0,0 +1,155 @@
+# frozen_string_literal: true
+# Copyright © 2018 Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+class TestEmojiBreaks < Test::Unit::TestCase
+  class BreakTest
+    attr_reader :string, :comment, :filename, :line_number, :type, :shortname
+
+    def initialize(filename, line_number, data, comment='')
+      @filename = filename
+      @line_number = line_number
+      @comment = comment.gsub(/\s+/, ' ').strip
+      if filename=='emoji-test' or filename=='emoji-variation-sequences'
+        codes, @type = data.split(/\s*;\s*/)
+        @shortname = ''
+      else
+        codes, @type, @shortname = data.split(/\s*;\s*/)
+      end
+      @type = @type.gsub(/\s+/, ' ').strip
+      @shortname = @shortname.gsub(/\s+/, ' ').strip
+      @string = codes.split(/\s+/)
+                     .map do |ch|
+                            c = ch.to_i(16)
+                             # eliminate cases with surrogates
+                            # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
+                            c.chr('UTF-8')
+                          end.join
+    end
+  end
+
+  class BreakFile
+    attr_reader :basename, :fullname, :version
+    FILES = []
+
+    def initialize(basename, path, version)
+      @basename = basename
+      @fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
+      @version  = version
+      FILES << self
+    end
+
+    def self.files
+      FILES
+    end
+  end
+
+  UNICODE_VERSION   = RbConfig::CONFIG['UNICODE_VERSION']
+  UNICODE_DATA_PATH = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}/ucd/emoji", __dir__)
+  EMOJI_VERSION     = RbConfig::CONFIG['UNICODE_EMOJI_VERSION']
+  EMOJI_DATA_PATH   = File.expand_path("../../../enc/unicode/data/emoji/#{EMOJI_VERSION}", __dir__)
+
+  EMOJI_DATA_FILES  = %w[emoji-sequences emoji-test emoji-zwj-sequences].map do |basename|
+    BreakFile.new(basename, EMOJI_DATA_PATH, EMOJI_VERSION)
+  end
+  UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, EMOJI_VERSION)
+  EMOJI_DATA_FILES << UNICODE_DATA_FILE
+
+  def self.data_files_available?
+    EMOJI_DATA_FILES.all? do |f|
+      File.exist?(f.fullname)
+    end
+  end
+
+  def test_data_files_available
+    assert_equal 4, EMOJI_DATA_FILES.size # debugging test
+    unless TestEmojiBreaks.data_files_available?
+      omit "Emoji data files not available in #{EMOJI_DATA_PATH}."
+    end
+  end
+
+  if data_files_available?
+    def read_data
+      tests = []
+      EMOJI_DATA_FILES.each do |file|
+        version_mismatch = true
+        file_tests = []
+        File.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
+          line.chomp!
+          if $.==1
+            if line=="# #{file.basename}-#{file.version}.txt"
+              version_mismatch = false
+            elsif line!="# #{file.basename}.txt"
+              raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt"
+            end
+          end
+          version_mismatch = false  if line =~ /^# Version: #{file.version}/                 # 13.0 and older
+          version_mismatch = false  if line =~ /^# Used with Emoji Version #{EMOJI_VERSION}/ # 14.0 and newer
+          next  if line.match?(/\A(#|\z)/)
+          if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/  # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
+            range_start = $1.to_i(16)
+            range_end   = $2.to_i(16)
+            rest        = $3
+            (range_start..range_end).each do |code_point|
+              file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
+            end
+          else
+            file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
+          end
+        end
+        raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}"  if version_mismatch
+        tests += file_tests
+      end
+      tests
+    end
+
+    def all_tests
+      @@tests ||= read_data
+    rescue Errno::ENOENT
+      @@tests ||= []
+    end
+
+    def test_single_emoji
+      all_tests.each do |test|
+        expected = [test.string]
+        actual = test.string.each_grapheme_cluster.to_a
+        assert_equal expected, actual,
+          "file: #{test.filename}, line #{test.line_number}, " +
+          "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+      end
+    end
+
+    def test_embedded_emoji
+      all_tests.each do |test|
+        expected = ["\t", test.string, "\t"]
+        actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
+        assert_equal expected, actual,
+          "file: #{test.filename}, line #{test.line_number}, " +
+          "type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
+      end
+    end
+
+    # test some pseodorandom combinations of emoji
+    def test_mixed_emoji
+      srand 0
+      length = all_tests.length
+      step =  503 # use a prime number
+      all_tests.each do |test1|
+        start = rand step
+        start.step(by: step, to: length-1) do |t2|
+          test2 = all_tests[t2]
+          # exclude skin tones, because they glue to previous grapheme clusters
+          next  if (0x1F3FB..0x1F3FF).include? test2.string.ord
+          expected = [test1.string, test2.string]
+          actual = (test1.string+test2.string).each_grapheme_cluster.to_a
+          assert_equal expected, actual,
+            "file1: #{test1.filename}, line1 #{test1.line_number}, " +
+            "file2: #{test2.filename}, line2 #{test2.line_number},\n" +
+            "type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
+            "type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
+        end
+      end
+    end
+  end
+end
diff --git a/test/ruby/enc/test_euc_jp.rb b/test/ruby/enc/test_euc_jp.rb
new file mode 100644
index 0000000000..4aec69e4db
--- /dev/null
+++ b/test/ruby/enc/test_euc_jp.rb
@@ -0,0 +1,25 @@
+# vim: set fileencoding=euc-jp
+# frozen_string_literal: false
+
+require "test/unit"
+
+class TestEUC_JP < Test::Unit::TestCase
+  def test_mbc_case_fold
+    assert_match(/(��)(a)\1\2/i, "��a��A")
+    assert_match(/(��)(a)\1\2/i, "��a��A")
+  end
+
+  def test_property
+    assert_match(/��{0}\p{Hiragana}{4}/, "�Ҥ餬��")
+    assert_no_match(/��{0}\p{Hiragana}{4}/, "��������")
+    assert_no_match(/��{0}\p{Hiragana}{4}/, "��������")
+    assert_no_match(/��{0}\p{Katakana}{4}/, "�Ҥ餬��")
+    assert_match(/��{0}\p{Katakana}{4}/, "��������")
+    assert_no_match(/��{0}\p{Katakana}{4}/, "��������")
+    assert_raise(RegexpError) { Regexp.new('��{0}\p{foobarbaz}') }
+  end
+
+  def test_charboundary
+    assert_nil(/\xA2\xA2/ =~ "\xA1\xA2\xA2\xA3")
+  end
+end
diff --git a/test/ruby/enc/test_euc_kr.rb b/test/ruby/enc/test_euc_kr.rb
new file mode 100644
index 0000000000..c9de2cc4e1
--- /dev/null
+++ b/test/ruby/enc/test_euc_kr.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestEucKr < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("euc-kr")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\xa1\xa1").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0xa1a1, s("\xa1\xa1").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\xa1\xa1"), 0xa1a1.chr("euc-kr"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
+    assert_match(r, s("\xa1\xa1\xa1\xa1"))
+  end
+
+  def test_left_adjust_char_head
+    assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
+  end
+
+  def test_euro_sign
+    assert_equal("\u{20ac}", s("\xa2\xe6").encode("utf-8"))
+  end
+
+  def test_registered_mark
+    assert_equal("\u{00ae}", s("\xa2\xe7").encode("utf-8"))
+  end
+end
diff --git a/test/ruby/enc/test_euc_tw.rb b/test/ruby/enc/test_euc_tw.rb
new file mode 100644
index 0000000000..649b1b81c6
--- /dev/null
+++ b/test/ruby/enc/test_euc_tw.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestEucTw < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("euc-tw")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\xa1\xa1").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0xa1a1, s("\xa1\xa1").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\xa1\xa1"), 0xa1a1.chr("euc-tw"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\xa1\xa1)\\1"), "i")
+    assert_match(r, s("\xa1\xa1\xa1\xa1"))
+  end
+
+  def test_left_adjust_char_head
+    assert_equal(s("\xa1\xa1"), s("\xa1\xa1\xa1\xa1").chop)
+  end
+end
diff --git a/test/ruby/enc/test_gb18030.rb b/test/ruby/enc/test_gb18030.rb
new file mode 100644
index 0000000000..76ac785951
--- /dev/null
+++ b/test/ruby/enc/test_gb18030.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestGB18030 < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("gb18030")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\x81\x40").size)
+    assert_equal(1, s("\x81\x30\x81\x30").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0x8140, s("\x81\x40").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\x81\x40"), 0x8140.chr("gb18030"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\x81\x40)\\1"), "i")
+    assert_match(r, s("\x81\x40\x81\x40"))
+  end
+
+  def scheck(c, i)
+    assert_equal(s(c.reverse.take(c.size - i).join), s(c.reverse.join).chop)
+  end
+
+  def fcheck(c)
+    c = s(c.reverse.join)
+    assert_raise(ArgumentError, c) { c.chop }
+  end
+
+  def test_left_adjust_char_head
+    # C1: 00-2f, 3a-3f, 7f, ff
+    # C2: 40-7e, 80
+    # C4: 30-39
+    # CM: 81-fe
+    c1 = "\x2f"
+    c2 = "\x40"
+    c4 = "\x30"
+    cm = "\x81"
+
+    # S_START-c1
+    # S_START-c2-S_one_C2-0
+    # S_START-c2-S_one_C2-c1
+    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-c1
+    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-c1
+    # S_START-c2-S_one_C2-cm-S_odd_CM_one_CX-cm-S_even_CM_one_CX-cm-S_odd_CM_one_CX(rec)
+    # S_START-c4-S_one_C4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-c4-S_one_C4_odd_CMC4(rec)
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-c4-S_one_C4_even_CMC4-cm-S_odd_CMC4-cm-S_odd_CM_odd_CMC4-cm-S_even_CM_odd_CMC4-cm-S_odd_CM_odd_CMC4(rec)
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-c1
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-c4-S_one_C4_odd_CMC4-cm-S_even_CMC4-cm-S_odd_CM_even_CMC4-cm-S_even_CM_even_CMC4-cm-S_odd_CM_even_CMC4(rec)
+    # S_START-c4-S_one_C4-cm-S_one_CMC4-cm-S_even_CM_one_CX(rec)
+    # S_START-cm-S_one_CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-c4-S_odd_C4CM(rec)
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-c4-S_even_C4CM-cm-S_one_CM_even_C4CM-cm-S_even_CM_even_C4CM-cm-S_odd_CM_even_C4CM-cm-S_even_CM_even_C4CM(rec)
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-c1
+    # S_START-cm-S_one_CM-c4-S_odd_C4CM-cm-S_one_CM_odd_C4CM-cm-S_even_CM_odd_C4CM-cm-S_odd_CM_odd_C4CM-cm-S_even_CM_odd_C4CM(rec)
+    # S_START-cm-S_one_CM-cm-S_odd_CM_one_CX(rec)
+
+    scheck([c1], 1)
+    scheck([c2], 1)
+    scheck([c2, c1], 1)
+    scheck([c2, cm, c1], 2)
+    scheck([c2, cm, cm, c1], 1)
+    scheck([c2, cm, cm, cm], 2)
+    scheck([c4], 1)
+    scheck([c4, c1], 1)
+    scheck([c4, cm], 2)
+    scheck([c4, cm, c1], 2)
+    scheck([c4, cm, c4, c1], 2)
+    scheck([c4, cm, c4, cm], 4)
+    scheck([c4, cm, c4, cm, c1], 4)
+    scheck([c4, cm, c4, cm, c4], 4)
+    scheck([c4, cm, c4, cm, c4, c1], 4)
+    scheck([c4, cm, c4, cm, c4, cm], 2)
+    scheck([c4, cm, c4, cm, c4, cm, c1], 2)
+    scheck([c4, cm, c4, cm, c4, cm, c4], 2)
+    scheck([c4, cm, c4, cm, c4, cm, cm, c1], 4)
+    scheck([c4, cm, c4, cm, c4, cm, cm, cm], 2)
+    scheck([c4, cm, c4, cm, c4, cm, cm, cm, c1], 2)
+    scheck([c4, cm, c4, cm, c4, cm, cm, cm, cm], 4)
+    scheck([c4, cm, c4, cm, cm, c1], 2)
+    scheck([c4, cm, c4, cm, cm, cm], 4)
+    scheck([c4, cm, c4, cm, cm, cm, c1], 4)
+    scheck([c4, cm, c4, cm, cm, cm, cm], 2)
+    scheck([c4, cm, cm], 1)
+    scheck([cm], 1)
+    scheck([cm, c1], 1)
+    scheck([cm, c4, c1], 1)
+    scheck([cm, c4, cm], 3)
+    scheck([cm, c4, cm, c1], 3)
+    scheck([cm, c4, cm, c4], 3)
+    scheck([cm, c4, cm, c4, c1], 3)
+    scheck([cm, c4, cm, c4, cm], 1)
+    scheck([cm, c4, cm, c4, cm, c1], 1)
+    scheck([cm, c4, cm, c4, cm, c4], 1)
+    scheck([cm, c4, cm, c4, cm, cm, c1], 3)
+    scheck([cm, c4, cm, c4, cm, cm, cm], 1)
+    scheck([cm, c4, cm, c4, cm, cm, cm, c1], 1)
+    scheck([cm, c4, cm, c4, cm, cm, cm, cm], 3)
+    scheck([cm, c4, cm, cm, c1], 1)
+    scheck([cm, c4, cm, cm, cm], 3)
+    scheck([cm, c4, cm, cm, cm, c1], 3)
+    scheck([cm, c4, cm, cm, cm, cm], 1)
+    scheck([cm, cm], 2)
+  end
+end
diff --git a/test/ruby/enc/test_gbk.rb b/test/ruby/enc/test_gbk.rb
new file mode 100644
index 0000000000..2e541b5821
--- /dev/null
+++ b/test/ruby/enc/test_gbk.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestGBK < Test::Unit::TestCase
+  def s(s)
+    s.force_encoding("gbk")
+  end
+
+  def test_mbc_enc_len
+    assert_equal(1, s("\x81\x40").size)
+  end
+
+  def test_mbc_to_code
+    assert_equal(0x8140, s("\x81\x40").ord)
+  end
+
+  def test_code_to_mbc
+    assert_equal(s("\x81\x40"), 0x8140.chr("gbk"))
+  end
+
+  def test_mbc_case_fold
+    r = Regexp.new(s("(\x81\x40)\\1"), "i")
+    assert_match(r, s("\x81\x40\x81\x40"))
+  end
+
+  def test_left_adjust_char_head
+    assert_equal(s("\x81\x40"), s("\x81\x40\x81\x40").chop)
+  end
+end
diff --git a/test/ruby/enc/test_grapheme_breaks.rb b/test/ruby/enc/test_grapheme_breaks.rb
new file mode 100644
index 0000000000..7e6d722d40
--- /dev/null
+++ b/test/ruby/enc/test_grapheme_breaks.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+# Copyright © 2018 Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+class TestGraphemeBreaksFromFile < Test::Unit::TestCase
+  class BreakTest
+    attr_reader :clusters, :string, :comment, :line_number
+
+    def initialize(line_number, data, comment)
+      @line_number = line_number
+      @comment = comment
+      @clusters = data.sub(/\A\s*÷\s*/, '')
+                      .sub(/\s*÷\s*\z/, '')
+                      .split(/\s*÷\s*/)
+                      .map do |cl|
+                        cl.split(/\s*×\s*/)
+                          .map do |ch|
+                            c = ch.to_i(16)
+                             # eliminate cases with surrogates
+                            raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
+                            c.chr('UTF-8')
+                          end.join
+                      end
+      @string = @clusters.join
+    end
+  end
+
+  UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
+  path = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}", __dir__)
+  UNICODE_DATA_PATH = File.directory?("#{path}/ucd/auxiliary") ? "#{path}/ucd/auxiliary" : path
+  GRAPHEME_BREAK_TEST_FILE = File.expand_path("#{UNICODE_DATA_PATH}/GraphemeBreakTest.txt", __dir__)
+
+  def self.file_available?
+    File.exist? GRAPHEME_BREAK_TEST_FILE
+  end
+
+  def test_data_files_available
+    unless TestGraphemeBreaksFromFile.file_available?
+      omit "Unicode data file GraphemeBreakTest not available in #{UNICODE_DATA_PATH}."
+    end
+  end
+
+  if file_available?
+    def read_data
+      tests = []
+      File.foreach(GRAPHEME_BREAK_TEST_FILE, encoding: Encoding::UTF_8) do |line|
+        if $. == 1 and not line.start_with?("# GraphemeBreakTest-#{UNICODE_VERSION}.txt")
+          raise "File Version Mismatch"
+        end
+        next if /\A#/.match? line
+        tests << BreakTest.new($., *line.chomp.split('#')) rescue 'whatever'
+      end
+      tests
+    end
+
+    def all_tests
+      @@tests ||= read_data
+    rescue Errno::ENOENT
+      @@tests ||= []
+    end
+
+    def test_each_grapheme_cluster
+      all_tests.each do |test|
+        expected = test.clusters
+        actual = test.string.each_grapheme_cluster.to_a
+        assert_equal expected, actual,
+          "line #{test.line_number}, expected '#{expected}', " +
+          "but got '#{actual}', comment: #{test.comment}"
+      end
+    end
+
+    def test_backslash_X
+      all_tests.each do |test|
+        clusters = test.clusters.dup
+        string = test.string.dup
+        removals = 0
+        while string.sub!(/\A\X/, '')
+          removals += 1
+          clusters.shift
+          expected = clusters.join
+          assert_equal expected, string,
+            "line #{test.line_number}, removals: #{removals}, expected '#{expected}', " +
+            "but got '#{string}', comment: #{test.comment}"
+        end
+        assert_equal expected, string,
+          "line #{test.line_number}, after last removal, expected '#{expected}', " +
+          "but got '#{string}', comment: #{test.comment}"
+      end
+    end
+  end
+end
diff --git a/test/ruby/enc/test_iso_8859.rb b/test/ruby/enc/test_iso_8859.rb
new file mode 100644
index 0000000000..ed663be243
--- /dev/null
+++ b/test/ruby/enc/test_iso_8859.rb
@@ -0,0 +1,166 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+class TestISO8859 < Test::Unit::TestCase
+  ASSERTS = %q(
+    assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
+    assert_match(/^(\xdf)\1$/i, "ssss")
+    # assert_match(/^(\xdf)\1$/i, "\xdfss") # this must be bug...
+    assert_match(/^[\xdfz]+$/i, "sszzsszz")
+    assert_match(/^SS$/i, "\xdf")
+    assert_match(/^Ss$/i, "\xdf")
+    ((0xc0..0xde).to_a - [0xd7]).each do |c|
+      c1 = c.chr("ENCODING")
+      c2 = (c + 0x20).chr("ENCODING")
+      assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+      assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+      assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+      assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+    end
+    assert_match(/^\xff$/i, "\xff")
+  )
+
+  def test_iso_8859_1
+    eval("# encoding: iso8859-1\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-1"))
+  end
+
+  def test_iso_8859_2
+    eval("# encoding: iso8859-2\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-2"))
+  end
+
+  def test_iso_8859_3
+    # todo: decide on behavior, test, and fix implementation re. İ and ı (0xA9/0xB9)
+    # treating them as case equivalents is definitely an error
+    eval(%q(# encoding: iso8859-3
+      assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
+      assert_match(/^(\xdf)\1$/i, "ssss")
+      assert_match(/^[\xdfz]+$/i, "sszzsszz")
+      assert_match(/^SS$/i, "\xdf")
+      assert_match(/^Ss$/i, "\xdf")
+      [0xa1, 0xa6, *(0xaa..0xac), 0xaf].each do |c|
+        c1 = c.chr("iso8859-3")
+        c2 = (c + 0x10).chr("iso8859-3")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+      ([*(0xc0..0xde)] - [0xc3, 0xd0, 0xd7]).each do |c|
+        c1 = c.chr("iso8859-3")
+        c2 = (c + 0x20).chr("iso8859-3")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+    ))
+  end
+
+  def test_iso_8859_4
+    eval("# encoding: iso8859-4\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-4"))
+  end
+
+  def test_iso_8859_5
+    eval(%q(# encoding: iso8859-5
+      (0xb0..0xcf).each do |c|
+        c1 = c.chr("iso8859-5")
+        c2 = (c + 0x20).chr("iso8859-5")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+      ((0xa1..0xaf).to_a - [0xad]).each do |c|
+        c1 = c.chr("iso8859-5")
+        c2 = (c + 0x50).chr("iso8859-5")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+    ))
+  end
+
+  def test_iso_8859_6
+    eval(%q(# encoding: iso8859-6
+      [0xa4, 0xac, 0xbb, 0xbf, *(0xc1..0xda), *(0xe0..0xf2)].each do |c|
+        c1 = c.chr("iso8859-6")
+        assert_match(/^(#{ c1 })\1$/i, c1 * 2)
+      end
+    ))
+  end
+
+  def test_iso_8859_7
+    eval(%q(# encoding: iso8859-7
+      ((0xa0..0xfe).to_a - [0xae, 0xd2]).each do |c|
+        c1 = c.chr("iso8859-7")
+        assert_match(/^(#{ c1 })\1$/i, c1 * 2)
+      end
+      ((0xc1..0xd9).to_a - [0xd2]).each do |c|
+        c1 = c.chr("iso8859-7")
+        c2 = (c + 0x20).chr("iso8859-7")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+    ))
+  end
+
+  def test_iso_8859_8
+    eval(%q(# encoding: iso8859-8
+      [0xa0, *(0xa2..0xbe), *(0xdf..0xfa), 0xfc, 0xfd].each do |c|
+        c1 = c.chr("iso8859-8")
+        assert_match(/^(#{ c1 })\1$/i, c1 * 2)
+      end
+    ))
+  end
+
+  def test_iso_8859_9
+    eval(%q(# encoding: iso8859-9
+      assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
+      assert_match(/^(\xdf)\1$/i, "ssss")
+      assert_match(/^[\xdfz]+$/i, "sszzsszz")
+      assert_match(/^SS$/i, "\xdf")
+      assert_match(/^Ss$/i, "\xdf")
+      ([*(0xc0..0xde)] - [0xd7, 0xdd]).each do |c|
+        c1 = c.chr("iso8859-9")
+        c2 = (c + 0x20).chr("iso8859-9")
+        assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+        assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+        assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+        assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+      end
+    ))
+  end
+
+  def test_iso_8859_10
+    eval("# encoding: iso8859-10\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-10"))
+  end
+
+  def test_iso_8859_11
+    eval(%q(# encoding: iso8859-11
+      [*(0xa0..0xda), *(0xdf..0xfb)].each do |c|
+        c1 = c.chr("iso8859-11")
+        assert_match(/^(#{ c1 })\1$/i, c1 * 2)
+      end
+    ))
+  end
+
+  def test_iso_8859_13
+    eval("# encoding: iso8859-13\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-13"))
+  end
+
+  def test_iso_8859_14
+    eval("# encoding: iso8859-14\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-14"))
+  end
+
+  def test_iso_8859_15
+    eval("# encoding: iso8859-15\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-15"))
+  end
+
+  def test_iso_8859_16
+    eval("# encoding: iso8859-16\n" + ASSERTS.gsub(/ENCODING/m, "iso8859-16"))
+  end
+end
+
diff --git a/test/ruby/enc/test_koi8.rb b/test/ruby/enc/test_koi8.rb
new file mode 100644
index 0000000000..4a4d233e8d
--- /dev/null
+++ b/test/ruby/enc/test_koi8.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: false
+require "test/unit"
+
+class TestKOI8 < Test::Unit::TestCase
+  ASSERTS = %q(
+    (0xc0..0xdf).each do |c|
+      c1 = c.chr("ENCODING")
+      c2 = (c + 0x20).chr("ENCODING")
+      assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+      assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+      assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+      assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+    end
+  )
+
+  def test_koi8_r
+    eval("# encoding: koi8-r\n" + ASSERTS.gsub("ENCODING", "koi8-r"))
+  end
+
+  def test_koi8_u
+    eval("# encoding: koi8-u\n" + ASSERTS.gsub("ENCODING", "koi8-u"))
+  end
+end
diff --git a/test/ruby/enc/test_regex_casefold.rb b/test/ruby/enc/test_regex_casefold.rb
new file mode 100644
index 0000000000..b5d5c6e337
--- /dev/null
+++ b/test/ruby/enc/test_regex_casefold.rb
@@ -0,0 +1,120 @@
+# Copyright Kimihito Matsui (松井 仁人) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
+
+require "test/unit"
+
+class TestCaseFold < Test::Unit::TestCase
+
+  UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
+  path = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}", __dir__)
+  UNICODE_DATA_PATH = File.directory?("#{path}/ucd") ? "#{path}/ucd" : path
+  CaseTest = Struct.new :source, :target, :kind, :line
+
+  def check_downcase_properties(expected, start, *flags)
+    assert_equal expected, start.downcase(*flags)
+    temp = start.dup
+    assert_equal expected, temp.downcase!(*flags)
+    assert_equal expected, expected.downcase(*flags)
+    temp = expected
+    assert_nil   temp.downcase!(*flags)
+  end
+
+  def read_tests
+    File.readlines("#{UNICODE_DATA_PATH}/CaseFolding.txt", encoding: Encoding::ASCII_8BIT)
+    .collect.with_index { |linedata, linenumber| [linenumber.to_i+1, linedata.chomp] }
+    .reject { |number, data| data =~ /^(#|$)/ }
+    .collect do |linenumber, linedata|
+      data, _ = linedata.split(/#\s*/)
+      code, kind, result, _ = data.split(/;\s*/)
+      CaseTest.new code.to_i(16).chr('UTF-8'),
+                   result.split(/ /).collect { |hex| hex.to_i(16) }.pack('U*'),
+                   kind, linenumber
+    end.select { |test| test.kind=='C' }
+  end
+
+  def to_codepoints(string)
+    string.codepoints.collect { |cp| cp.to_s(16).upcase.rjust(4, '0') }
+  end
+
+  def setup
+    @@tests ||= read_tests
+  rescue Errno::ENOENT => e
+    @@tests ||= []
+    omit e.message
+  end
+
+  def self.generate_test_casefold(encoding)
+    define_method "test_mbc_case_fold_#{encoding}" do
+      @@tests.each do |test|
+        begin
+          source = test.source.encode encoding
+          target = test.target.encode encoding
+          assert_equal 5, "12345#{target}67890" =~ /#{source}/i,
+              "12345#{to_codepoints(target)}67890 and /#{to_codepoints(source)}/ do not match case-insensitive " +
+              "(CaseFolding.txt line #{test[:line]})"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+
+    define_method "test_get_case_fold_codes_by_str_#{encoding}" do
+      @@tests.each do |test|
+        begin
+          source = test.source.encode encoding
+          target = test.target.encode encoding
+          assert_equal 5, "12345#{source}67890" =~ /#{target}/i,
+              "12345#{to_codepoints(source)}67890 and /#{to_codepoints(target)}/ do not match case-insensitive " +
+              "(CaseFolding.txt line #{test[:line]}), " +
+              "error may also be triggered by mbc_case_fold"
+        rescue Encoding::UndefinedConversionError
+        end
+      end
+    end
+
+    define_method "test_apply_all_case_fold_#{encoding}" do
+      @@tests.each do |test|
+        begin
+          source = test.source.encode encoding
+          target = test.target.encode encoding
+          reg = '\p{Upper}'
+          regexp = Regexp.compile reg.encode(encoding)
+          regexpi = Regexp.compile reg.encode(encoding), Regexp::IGNORECASE
+            assert_equal 5, "12345#{target}67890" =~ regexpi,
+                "12345#{to_codepoints(target)}67890 and /#{reg}/i do not match " +
+                "(CaseFolding.txt line #{test[:line]})"
+        rescue Encoding::UndefinedConversionError
+          source = source
+          regexp = regexp
+        end
+      end
+    end
+  end
+
+  def test_downcase_fold
+    @@tests.each do |test|
+      check_downcase_properties test.target, test.source, :fold
+    end
+  end
+
+  # start with good encodings only
+  generate_test_casefold 'US-ASCII'
+  generate_test_casefold 'ISO-8859-1'
+  generate_test_casefold 'ISO-8859-2'
+  generate_test_casefold 'ISO-8859-3'
+  generate_test_casefold 'ISO-8859-4'
+  generate_test_casefold 'ISO-8859-5'
+  generate_test_casefold 'ISO-8859-6'
+  # generate_test_casefold 'ISO-8859-7'
+  generate_test_casefold 'ISO-8859-8'
+  generate_test_casefold 'ISO-8859-9'
+  generate_test_casefold 'ISO-8859-10'
+  generate_test_casefold 'ISO-8859-11'
+  generate_test_casefold 'ISO-8859-13'
+  generate_test_casefold 'ISO-8859-14'
+  generate_test_casefold 'ISO-8859-15'
+  generate_test_casefold 'ISO-8859-16'
+  generate_test_casefold 'Windows-1250'
+  # generate_test_casefold 'Windows-1251'
+  generate_test_casefold 'Windows-1252'
+  generate_test_casefold 'koi8-r'
+  generate_test_casefold 'koi8-u'
+end
diff --git a/test/ruby/enc/test_shift_jis.rb b/test/ruby/enc/test_shift_jis.rb
new file mode 100644
index 0000000000..059992d167
--- /dev/null
+++ b/test/ruby/enc/test_shift_jis.rb
@@ -0,0 +1,28 @@
+# vim: set fileencoding=shift_jis
+# frozen_string_literal: false
+
+require "test/unit"
+
+class TestShiftJIS < Test::Unit::TestCase
+  def test_mbc_case_fold
+    assert_match(/(��)(a)\1\2/i, "��a��A")
+    assert_match(/(��)(a)\1\2/i, "��a�`A")
+  end
+
+  def test_property
+    assert_match(/��{0}\p{Hiragana}{4}/, "�Ђ炪��")
+    assert_no_match(/��{0}\p{Hiragana}{4}/, "�J�^�J�i")
+    assert_no_match(/��{0}\p{Hiragana}{4}/, "��������")
+    assert_no_match(/��{0}\p{Katakana}{4}/, "�Ђ炪��")
+    assert_match(/��{0}\p{Katakana}{4}/, "�J�^�J�i")
+    assert_no_match(/��{0}\p{Katakana}{4}/, "��������")
+    assert_raise(RegexpError) { Regexp.new('��{0}\p{foobarbaz}') }
+  end
+
+  def test_code_to_mbclen
+    s = "����������"
+    s << 0x82a9
+    assert_equal("������������", s)
+    assert_raise(RangeError) { s << 0x82 }
+  end
+end
diff --git a/test/ruby/enc/test_utf16.rb b/test/ruby/enc/test_utf16.rb
new file mode 100644
index 0000000000..e08f2ea14e
--- /dev/null
+++ b/test/ruby/enc/test_utf16.rb
@@ -0,0 +1,397 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+class TestUTF16 < Test::Unit::TestCase
+  def encdump(obj)
+    case obj
+    when String
+      d = obj.dump
+      if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
+        d
+      else
+        "#{d}.force_encoding(#{obj.encoding.name.dump})"
+      end
+    when Regexp
+      "Regexp.new(#{encdump(obj.source)}, #{obj.options})"
+    else
+      raise Argument, "unexpected: #{obj.inspect}"
+    end
+  end
+
+  def enccall(recv, meth, *args)
+    desc = ''
+    if String === recv
+      desc << encdump(recv)
+    else
+      desc << recv.inspect
+    end
+    desc << '.' << meth.to_s
+    if !args.empty?
+      desc << '('
+      args.each_with_index {|a, i|
+        desc << ',' if 0 < i
+        if String === a
+          desc << encdump(a)
+        else
+          desc << a.inspect
+        end
+      }
+      desc << ')'
+    end
+    result = nil
+    assert_nothing_raised(desc) {
+      result = recv.send(meth, *args)
+    }
+    result
+  end
+
+  def assert_str_equal(expected, actual, message=nil)
+    full_message = build_message(message, <<EOT)
+#{encdump expected} expected but not equal to
+#{encdump actual}.
+EOT
+    assert_equal(expected, actual, full_message)
+  end
+
+  # tests start
+
+  def test_utf16be_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00",
+        "\xd7\xff",
+        "\xd8\x00\xdc\x00",
+        "\xdb\xff\xdf\xff",
+        "\xe0\x00",
+        "\xff\xff",
+      ].each {|s|
+        s.force_encoding("utf-16be")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "\x00",
+        "\xd7",
+        "\xd8\x00",
+        "\xd8\x00\xd8\x00",
+        "\xdc\x00",
+        "\xdc\x00\xd8\x00",
+        "\xdc\x00\xdc\x00",
+        "\xe0",
+        "\xff",
+      ].each {|s|
+        s.force_encoding("utf-16be")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+
+  def test_utf16le_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00",
+        "\xff\xd7",
+        "\x00\xd8\x00\xdc",
+        "\xff\xdb\xff\xdf",
+        "\x00\xe0",
+        "\xff\xff",
+      ].each {|s|
+        s.force_encoding("utf-16le")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "\x00",
+        "\xd7",
+        "\x00\xd8",
+        "\x00\xd8\x00\xd8",
+        "\x00\xdc",
+        "\x00\xdc\x00\xd8",
+        "\x00\xdc\x00\xdc",
+        "\xe0",
+        "\xff",
+      ].each {|s|
+        s.force_encoding("utf-16le")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+
+  def test_strftime
+    s = "aa".force_encoding("utf-16be")
+    assert_raise(ArgumentError, "Time.now.strftime(#{encdump s})") { Time.now.strftime(s) }
+  end
+
+  def test_intern
+    s = "aaaa".force_encoding("utf-16be")
+    assert_equal(s.encoding, s.intern.to_s.encoding, "#{encdump s}.intern.to_s.encoding")
+  end
+
+  def test_sym_eq
+    s = "aa".force_encoding("utf-16le")
+    assert_not_equal(:aa, s.intern, "#{encdump s}.intern != :aa")
+  end
+
+  def test_compatible
+    s1 = "aa".force_encoding("utf-16be")
+    s2 = "z".force_encoding("us-ascii")
+    assert_nil(Encoding.compatible?(s1, s2), "Encoding.compatible?(#{encdump s1}, #{encdump s2})")
+  end
+
+  def test_casecmp
+    s1 = "aa".force_encoding("utf-16be")
+    s2 = "AA"
+    assert_not_equal(0, s1.casecmp(s2), "#{encdump s1}.casecmp(#{encdump s2})")
+  end
+
+  def test_end_with
+    s1 = "ab".force_encoding("utf-16be")
+    s2 = "b".force_encoding("utf-16be")
+    assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
+  end
+
+  def test_hex
+    assert_raise(Encoding::CompatibilityError) {
+      "ff".encode("utf-16le").hex
+    }
+    assert_raise(Encoding::CompatibilityError) {
+      "ff".encode("utf-16be").hex
+    }
+  end
+
+  def test_oct
+    assert_raise(Encoding::CompatibilityError) {
+      "77".encode("utf-16le").oct
+    }
+    assert_raise(Encoding::CompatibilityError) {
+      "77".encode("utf-16be").oct
+    }
+  end
+
+  def test_count
+    s1 = "aa".force_encoding("utf-16be")
+    s2 = "aa"
+    assert_raise(Encoding::CompatibilityError, "#{encdump s1}.count(#{encdump s2})") {
+      s1.count(s2)
+    }
+  end
+
+  def test_plus
+    s1 = "a".force_encoding("us-ascii")
+    s2 = "aa".force_encoding("utf-16be")
+    assert_raise(Encoding::CompatibilityError, "#{encdump s1} + #{encdump s2}") {
+      s1 + s2
+    }
+  end
+
+  def test_encoding_find
+    assert_raise(ArgumentError) {
+      Encoding.find("utf-8".force_encoding("utf-16be"))
+    }
+  end
+
+  def test_interpolation
+    s = "aa".force_encoding("utf-16be")
+    assert_raise(Encoding::CompatibilityError, "\"a\#{#{encdump s}}\"") {
+      "a#{s}"
+    }
+  end
+
+  def test_slice!
+    enccall("aa".force_encoding("UTF-16BE"), :slice!, -1)
+  end
+
+  def test_plus_empty1
+    s1 = ""
+    s2 = "aa".force_encoding("utf-16be")
+    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
+      s1 + s2
+    }
+  end
+
+  def test_plus_empty2
+    s1 = "aa"
+    s2 = "".force_encoding("utf-16be")
+    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
+      s1 + s2
+    }
+  end
+
+  def test_plus_nonempty
+    s1 = "aa"
+    s2 = "bb".force_encoding("utf-16be")
+    assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
+      s1 + s2
+    }
+  end
+
+  def test_concat_empty1
+    s1 = ""
+    s2 = "aa".force_encoding("utf-16be")
+    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
+      s1 << s2
+    }
+  end
+
+  def test_concat_empty2
+    s1 = "aa"
+    s2 = "".force_encoding("utf-16be")
+    assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
+      s1 << s2
+    }
+  end
+
+  def test_concat_nonempty
+    s1 = "aa"
+    s2 = "bb".force_encoding("utf-16be")
+    assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
+      s1 << s2
+    }
+  end
+
+  def test_chomp
+    s = "\1\n".force_encoding("utf-16be")
+    assert_equal(s, s.chomp, "#{encdump s}.chomp")
+    s = "\0\n".force_encoding("utf-16be")
+    assert_equal("", s.chomp, "#{encdump s}.chomp")
+    s = "\0\r\0\n".force_encoding("utf-16be")
+    assert_equal("", s.chomp, "#{encdump s}.chomp")
+  end
+
+  def test_succ
+    s = "\xff\xff".force_encoding("utf-16be")
+    assert_predicate(s.succ, :valid_encoding?, "#{encdump s}.succ.valid_encoding?")
+
+    s = "\xdb\xff\xdf\xff".force_encoding("utf-16be")
+    assert_predicate(s.succ, :valid_encoding?, "#{encdump s}.succ.valid_encoding?")
+  end
+
+  def test_regexp_union
+    enccall(Regexp, :union, "aa".force_encoding("utf-16be"), "bb".force_encoding("utf-16be"))
+  end
+
+  def test_empty_regexp
+    s = "".force_encoding("utf-16be")
+    assert_equal(Encoding.find("utf-16be"), Regexp.new(s).encoding,
+                "Regexp.new(#{encdump s}).encoding")
+  end
+
+  def test_regexp_match
+    assert_raise(Encoding::CompatibilityError) { Regexp.new("aa".force_encoding("utf-16be")) =~ "aa" }
+  end
+
+  def test_gsub
+    s = "abcd".force_encoding("utf-16be")
+    assert_nothing_raised {
+      s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
+    }
+    s = "ab\0\ncd".force_encoding("utf-16be")
+    assert_raise(Encoding::CompatibilityError) {
+      s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
+    }
+  end
+
+  def test_split_awk
+    s = " ab cd ".encode("utf-16be")
+    r = s.split(" ".encode("utf-16be"))
+    assert_equal(2, r.length)
+    assert_str_equal("ab".encode("utf-16be"), r[0])
+    assert_str_equal("cd".encode("utf-16be"), r[1])
+  end
+
+  def test_count2
+    e = "abc".count("^b")
+    assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))
+    assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))
+  end
+
+  def test_header
+    assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }
+    assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }
+  end
+
+
+  def test_is_mbc_newline
+    sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")
+    sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")
+    al = sl.lines.to_a
+    ab = sb.lines.to_a
+    assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)
+    assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)
+    assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)
+    assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)
+    assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)
+    assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)
+
+    sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")
+    sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")
+    sl2 = "f\0o\0o\0".force_encoding("utf-16le")
+    sb2 = "\0f\0o\0o".force_encoding("utf-16be")
+    assert_equal(sl2, sl.chomp)
+    assert_equal(sl2, sl.chomp.chomp)
+    assert_equal(sb2, sb.chomp)
+    assert_equal(sb2, sb.chomp.chomp)
+
+    sl = "f\0o\0o\0\n".force_encoding("utf-16le")
+    sb = "\0f\0o\0o\n".force_encoding("utf-16be")
+    assert_equal(sl, sl.chomp)
+    assert_equal(sb, sb.chomp)
+  end
+
+  def test_code_to_mbc
+    assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))
+    assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))
+  end
+
+  def utf8_to_utf16(s, e)
+    s.chars.map {|c| c.ord.chr(e) }.join
+  end
+
+  def test_mbc_case_fold
+    rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")
+    rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")
+    assert_equal(Encoding.find("utf-16le"), rl.encoding)
+    assert_equal(Encoding.find("utf-16be"), rb.encoding)
+    assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))
+    assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))
+  end
+
+  def test_surrogate_pair
+    sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")
+    sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")
+
+    assert_equal(1, sl.size)
+    assert_equal(1, sb.size)
+    assert_equal(0x20bb7, sl.ord)
+    assert_equal(0x20bb7, sb.ord)
+    assert_equal(sl, 0x20bb7.chr("utf-16le"))
+    assert_equal(sb, 0x20bb7.chr("utf-16be"))
+    assert_equal("", sl.chop)
+    assert_equal("", sb.chop)
+  end
+
+  def test_regexp_escape
+    s = "\0*".force_encoding("UTF-16BE")
+    r = Regexp.new(Regexp.escape(s))
+    assert_match(r, s, "#{encdump(r)} =~ #{encdump(s)}")
+  end
+
+  def test_casecmp2
+    assert_equal(0, "\0A".force_encoding("UTF-16BE").casecmp("\0a".force_encoding("UTF-16BE")))
+    assert_not_equal(0, "\0A".force_encoding("UTF-16LE").casecmp("\0a".force_encoding("UTF-16LE")))
+    assert_not_equal(0, "A\0".force_encoding("UTF-16BE").casecmp("a\0".force_encoding("UTF-16BE")))
+    assert_equal(0, "A\0".force_encoding("UTF-16LE").casecmp("a\0".force_encoding("UTF-16LE")))
+
+    ary = ["01".force_encoding("UTF-16LE"),
+           "10".force_encoding("UTF-16LE")]
+    e = ary.sort {|x,y| x <=> y }
+    a = ary.sort {|x,y| x.casecmp(y) }
+    assert_equal(e, a)
+  end
+end
diff --git a/test/ruby/enc/test_utf32.rb b/test/ruby/enc/test_utf32.rb
new file mode 100644
index 0000000000..76379abca0
--- /dev/null
+++ b/test/ruby/enc/test_utf32.rb
@@ -0,0 +1,162 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+class TestUTF32 < Test::Unit::TestCase
+  def encdump(str)
+    d = str.dump
+    if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
+      d
+    else
+      "#{d}.force_encoding(#{str.encoding.name.dump})"
+    end
+  end
+
+  def assert_str_equal(expected, actual, message=nil)
+    full_message = build_message(message, <<EOT)
+#{encdump expected} expected but not equal to
+#{encdump actual}.
+EOT
+    assert_equal(expected, actual, full_message)
+  end
+
+  def test_substr
+    assert_str_equal(
+      "abcdefgh".force_encoding("utf-32le"),
+      "abcdefgh".force_encoding("utf-32le")[0,3])
+    assert_str_equal(
+      "abcdefgh".force_encoding("utf-32be"),
+      "abcdefgh".force_encoding("utf-32be")[0,3])
+  end
+
+  def test_mbc_len
+    al = "abcdefghijkl".force_encoding("utf-32le").each_char.to_a
+    ab = "abcdefghijkl".force_encoding("utf-32be").each_char.to_a
+    assert_equal("abcd".force_encoding("utf-32le"), al.shift)
+    assert_equal("efgh".force_encoding("utf-32le"), al.shift)
+    assert_equal("ijkl".force_encoding("utf-32le"), al.shift)
+    assert_equal("abcd".force_encoding("utf-32be"), ab.shift)
+    assert_equal("efgh".force_encoding("utf-32be"), ab.shift)
+    assert_equal("ijkl".force_encoding("utf-32be"), ab.shift)
+  end
+
+  def ascii_to_utf16le(s)
+    s.unpack("C*").map {|x| [x,0,0,0] }.flatten.pack("C*").force_encoding("utf-32le")
+  end
+
+  def ascii_to_utf16be(s)
+    s.unpack("C*").map {|x| [0,0,0,x] }.flatten.pack("C*").force_encoding("utf-32be")
+  end
+
+  def test_mbc_newline
+    al = ascii_to_utf16le("foo\nbar\nbaz\n").lines.to_a
+    ab = ascii_to_utf16be("foo\nbar\nbaz\n").lines.to_a
+
+    assert_equal(ascii_to_utf16le("foo\n"), al.shift)
+    assert_equal(ascii_to_utf16le("bar\n"), al.shift)
+    assert_equal(ascii_to_utf16le("baz\n"), al.shift)
+    assert_equal(ascii_to_utf16be("foo\n"), ab.shift)
+    assert_equal(ascii_to_utf16be("bar\n"), ab.shift)
+    assert_equal(ascii_to_utf16be("baz\n"), ab.shift)
+
+    sl = "a\0".force_encoding("utf-32le")
+    sb = "a\0".force_encoding("utf-32be")
+    assert_equal(sl, sl.chomp)
+    assert_equal(sb, sb.chomp)
+  end
+
+  def test_mbc_to_code
+    sl = "a\0\0\0".force_encoding("utf-32le")
+    sb = "\0\0\0a".force_encoding("utf-32be")
+    assert_equal("a".ord, sl.ord)
+    assert_equal("a".ord, sb.ord)
+  end
+
+  def utf8_to_utf32(s, e)
+    s.chars.map {|c| c.ord.chr(e) }.join
+  end
+
+  def test_mbc_case_fold
+    rl = Regexp.new(utf8_to_utf32("^(\u3042)(a)\\1\\2$", "utf-32le"), "i")
+    rb = Regexp.new(utf8_to_utf32("^(\u3042)(a)\\1\\2$", "utf-32be"), "i")
+    assert_equal(Encoding.find("utf-32le"), rl.encoding)
+    assert_equal(Encoding.find("utf-32be"), rb.encoding)
+    assert_match(rl, utf8_to_utf32("\u3042a\u3042a", "utf-32le"))
+    assert_match(rb, utf8_to_utf32("\u3042a\u3042a", "utf-32be"))
+  end
+
+  def test_code_to_mbc
+    sl = "a\0\0\0".force_encoding("utf-32le")
+    sb = "\0\0\0a".force_encoding("utf-32be")
+    assert_equal(sl, "a".ord.chr("utf-32le"))
+    assert_equal(sb, "a".ord.chr("utf-32be"))
+  end
+
+  def test_utf32be_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00\x00\x00",
+        "\x00\x00\x00a",
+        "\x00\x00\x30\x40",
+        "\x00\x00\xd7\xff",
+        "\x00\x00\xe0\x00",
+        "\x00\x00\xff\xff",
+        "\x00\x10\xff\xff",
+      ].each {|s|
+        s.force_encoding("utf-32be")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "a",
+        "\x00a",
+        "\x00\x00a",
+        "\x00\x00\xd8\x00",
+        "\x00\x00\xdb\xff",
+        "\x00\x00\xdc\x00",
+        "\x00\x00\xdf\xff",
+        "\x00\x11\x00\x00",
+      ].each {|s|
+        s.force_encoding("utf-32be")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+
+  def test_utf32le_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00\x00\x00",
+        "a\x00\x00\x00",
+        "\x40\x30\x00\x00",
+        "\xff\xd7\x00\x00",
+        "\x00\xe0\x00\x00",
+        "\xff\xff\x00\x00",
+        "\xff\xff\x10\x00",
+      ].each {|s|
+        s.force_encoding("utf-32le")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "a",
+        "a\x00",
+        "a\x00\x00",
+        "\x00\xd8\x00\x00",
+        "\xff\xdb\x00\x00",
+        "\x00\xdc\x00\x00",
+        "\xff\xdf\x00\x00",
+        "\x00\x00\x11\x00",
+      ].each {|s|
+        s.force_encoding("utf-32le")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+end
+
diff --git a/test/ruby/enc/test_windows_1251.rb b/test/ruby/enc/test_windows_1251.rb
new file mode 100644
index 0000000000..002dbaa3cc
--- /dev/null
+++ b/test/ruby/enc/test_windows_1251.rb
@@ -0,0 +1,17 @@
+# encoding:windows-1251
+# frozen_string_literal: false
+
+require "test/unit"
+
+class TestWindows1251 < Test::Unit::TestCase
+  def test_windows_1251
+    (0xc0..0xdf).each do |c|
+      c1 = c.chr("windows-1251")
+      c2 = (c + 0x20).chr("windows-1251")
+      assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+      assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+      assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+      assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+    end
+  end
+end
diff --git a/test/ruby/enc/test_windows_1252.rb b/test/ruby/enc/test_windows_1252.rb
new file mode 100644
index 0000000000..f264cba759
--- /dev/null
+++ b/test/ruby/enc/test_windows_1252.rb
@@ -0,0 +1,26 @@
+# encoding:windows-1252
+# frozen_string_literal: false
+
+require "test/unit"
+
+class TestWindows1252 < Test::Unit::TestCase
+  def test_stset
+    assert_match(/^(\xdf)\1$/i, "\xdf\xdf")
+    assert_match(/^(\xdf)\1$/i, "ssss")
+    # assert_match(/^(\xdf)\1$/i, "\xdfss") # this must be bug...
+    assert_match(/^[\xdfz]+$/i, "sszzsszz")
+    assert_match(/^SS$/i, "\xdf")
+    assert_match(/^Ss$/i, "\xdf")
+  end
+
+  def test_windows_1252
+    [0x8a, 0x8c, 0x8e, *0xc0..0xd6, *0xd8..0xde, 0x9f].zip([0x9a, 0x9c, 0x9e, *0xe0..0xf6, *0xf8..0xfe, 0xff]).each do |c1, c2|
+      c1 = c1.chr("windows-1252")
+      c2 = c2.chr("windows-1252")
+      assert_match(/^(#{ c1 })\1$/i, c2 + c1)
+      assert_match(/^(#{ c2 })\1$/i, c1 + c2)
+      assert_match(/^[#{ c1 }]+$/i, c2 + c1)
+      assert_match(/^[#{ c2 }]+$/i, c1 + c2)
+    end
+  end
+end