summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-08-18 06:53:02 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-08-18 06:53:02 +0000
commit1ea42115afaeba2aad4bbe8567be4b938c499d31 (patch)
treef9e67d15afb00b2aff84c6aee21d2c277dba49b1
parente1d3b318fb3ac090aa72619ec649d875d05ede08 (diff)
merge revision(s) 5b1bf8dd2d08ae7371ecf025967376bb794ed651: [Backport #16099]
UTF LE is fixed at least the first 2 bytes * io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it should be a little-endian UTF, 16 or 32. [Bug #16099] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67746 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--io.c7
-rw-r--r--test/ruby/test_file.rb2
-rw-r--r--test/ruby/test_io_m17n.rb10
-rw-r--r--version.h2
4 files changed, 9 insertions, 12 deletions
diff --git a/io.c b/io.c
index e581145564..94e425a277 100644
--- a/io.c
+++ b/io.c
@@ -6129,12 +6129,9 @@ io_strip_bom(VALUE io)
return ENCINDEX_UTF_32LE;
}
rb_io_ungetbyte(io, b4);
- rb_io_ungetbyte(io, b3);
- }
- else {
- rb_io_ungetbyte(io, b3);
- return ENCINDEX_UTF_16LE;
}
+ rb_io_ungetbyte(io, b3);
+ return ENCINDEX_UTF_16LE;
}
rb_io_ungetbyte(io, b2);
break;
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index 36c154d36c..3deab76e93 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase
end
def test_bom_32le
- assert_bom(["\xFF\xFE\0", "\0"], __method__)
+ assert_bom(["\xFF", "\xFE\0\0"], __method__)
end
def test_truncate_wbuf
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 8dfa5d5500..416a24ba6b 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2082,14 +2082,14 @@ EOT
def test_strip_bom
with_tmpdir {
- text = "\uFEFFa"
- stripped = "a"
+ text = "\uFEFF\u0100a"
+ stripped = "\u0100a"
%w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
path = '%s-bom.txt' % name
content = text.encode(name)
generate_file(path, content)
result = File.read(path, mode: 'rb:BOM|UTF-8')
- assert_equal(content[1].force_encoding("ascii-8bit"),
+ assert_equal(content[1..-1].force_encoding("ascii-8bit"),
result.force_encoding("ascii-8bit"))
result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8')
assert_equal(Encoding::UTF_8, result.encoding)
@@ -2099,10 +2099,10 @@ EOT
bug3407 = '[ruby-core:30641]'
path = 'UTF-8-bom.txt'
result = File.read(path, encoding: 'BOM|UTF-8')
- assert_equal("a", result.force_encoding("ascii-8bit"), bug3407)
+ assert_equal(stripped.b, result.force_encoding("ascii-8bit"), bug3407)
bug8323 = '[ruby-core:54563] [Bug #8323]'
- expected = "a\xff".force_encoding("utf-8")
+ expected = (stripped.b + "\xff").force_encoding("utf-8")
open(path, 'ab') {|f| f.write("\xff")}
result = File.read(path, encoding: 'BOM|UTF-8')
assert_not_predicate(result, :valid_encoding?, bug8323)
diff --git a/version.h b/version.h
index 14a158f826..5ea21f14f4 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.6.3"
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 96
+#define RUBY_PATCHLEVEL 97
#define RUBY_RELEASE_YEAR 2019
#define RUBY_RELEASE_MONTH 8