merge revision(s) 5b1bf8dd2d08ae7371ecf025967376bb794ed651: [Backport #16099]

UTF LE is fixed at least the first 2 bytes * io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it should be a little-endian UTF, 16 or 32. [Bug #16099] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@67772 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2019-08-26 16:44:03 +0000
committer: usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2019-08-26 16:44:03 +0000
commit: f8efd7f9e852adda9fddb5682360bde263352e02 (patch)
tree: f40db02b1bec803fd8e4ae0c44394fadacfc65d3
parent: ef626cd09c448df9193c8d5f40143317d0185fd9 (diff)
4 files changed, 9 insertions, 12 deletions
diff --git a/io.c b/io.c
index 59bbb4d5b2..178ec14b58 100644
--- a/io.c
+++ b/io.c
@@ -6025,12 +6025,9 @@ io_strip_bom(VALUE io)
 		    return ENCINDEX_UTF_32LE;
 		}
 		rb_io_ungetbyte(io, b4);
-		rb_io_ungetbyte(io, b3);
-	    }
-	    else {
-		rb_io_ungetbyte(io, b3);
-		return ENCINDEX_UTF_16LE;
 	    }
+	    rb_io_ungetbyte(io, b3);
+	    return ENCINDEX_UTF_16LE;
 	}
 	rb_io_ungetbyte(io, b2);
 	break;
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index 9a795ee02f..10bfbd9ae0 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase
   end
 
   def test_bom_32le
-    assert_bom(["\xFF\xFE\0", "\0"], __method__)
+    assert_bom(["\xFF", "\xFE\0\0"], __method__)
   end
 
   def test_truncate_wbuf
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 8ba34845fa..9ff5307fc3 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2082,14 +2082,14 @@ EOT
 
   def test_strip_bom
     with_tmpdir {
-      text = "\uFEFFa"
-      stripped = "a"
+      text = "\uFEFF\u0100a"
+      stripped = "\u0100a"
       %w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
         path = '%s-bom.txt' % name
         content = text.encode(name)
         generate_file(path, content)
         result = File.read(path, mode: 'rb:BOM|UTF-8')
-        assert_equal(content[1].force_encoding("ascii-8bit"),
+        assert_equal(content[1..-1].force_encoding("ascii-8bit"),
                      result.force_encoding("ascii-8bit"))
         result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8')
         assert_equal(Encoding::UTF_8, result.encoding)
@@ -2099,10 +2099,10 @@ EOT
       bug3407 = '[ruby-core:30641]'
       path = 'UTF-8-bom.txt'
       result = File.read(path, encoding: 'BOM|UTF-8')
-      assert_equal("a", result.force_encoding("ascii-8bit"), bug3407)
+      assert_equal(stripped.b, result.force_encoding("ascii-8bit"), bug3407)
 
       bug8323 = '[ruby-core:54563] [Bug #8323]'
-      expected = "a\xff".force_encoding("utf-8")
+      expected = (stripped.b + "\xff").force_encoding("utf-8")
       open(path, 'ab') {|f| f.write("\xff")}
       result = File.read(path, encoding: 'BOM|UTF-8')
       assert_not_predicate(result, :valid_encoding?, bug8323)
diff --git a/version.h b/version.h
index beade843b4..ca386c09b4 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
 #define RUBY_VERSION "2.5.6"
 #define RUBY_RELEASE_DATE "2019-08-27"
-#define RUBY_PATCHLEVEL 190
+#define RUBY_PATCHLEVEL 191
 
 #define RUBY_RELEASE_YEAR 2019
 #define RUBY_RELEASE_MONTH 8
author	usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2019-08-26 16:44:03 +0000
committer	usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2019-08-26 16:44:03 +0000
commit	f8efd7f9e852adda9fddb5682360bde263352e02 (patch)
tree	f40db02b1bec803fd8e4ae0c44394fadacfc65d3
parent	ef626cd09c448df9193c8d5f40143317d0185fd9 (diff)