summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-05-25 01:39:30 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-05-25 01:39:30 +0000
commitcb1a793510a8ae7acfb92d46cecd37d100903f9c (patch)
treebd0bd6cc21625daa291168f14ac31ed9bf952cdb
parent046883d2ca8c1b2074b261947ea708e6695f667e (diff)
merge revision(s) 35766:
* io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35782 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--io.c68
-rw-r--r--test/ruby/test_file.rb51
-rw-r--r--version.h2
4 files changed, 89 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index 7734335dac..bde494c680 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Fri May 25 10:38:06 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203]
+
Fri May 25 10:36:38 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (f_arglist): should reset lexical states after empty
diff --git a/io.c b/io.c
index 916281dd49..1aa58d5be7 100644
--- a/io.c
+++ b/io.c
@@ -4900,65 +4900,63 @@ static void io_encoding_set(rb_io_t *, VALUE, VALUE, VALUE);
static int
io_strip_bom(VALUE io)
{
- int b1, b2, b3, b4;
- switch (b1 = FIX2INT(rb_io_getbyte(io))) {
- case 0xEF:
- b2 = FIX2INT(rb_io_getbyte(io));
- if (b2 == 0xBB) {
- b3 = FIX2INT(rb_io_getbyte(io));
- if (b3 == 0xBF) {
+ VALUE b1, b2, b3, b4;
+
+ if (NIL_P(b1 = rb_io_getbyte(io))) return 0;
+ switch (b1) {
+ case INT2FIX(0xEF):
+ if (NIL_P(b2 = rb_io_getbyte(io))) break;
+ if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) {
+ if (b3 == INT2FIX(0xBF)) {
return rb_utf8_encindex();
}
- rb_io_ungetbyte(io, INT2FIX(b3));
+ rb_io_ungetbyte(io, b3);
}
- rb_io_ungetbyte(io, INT2FIX(b2));
+ rb_io_ungetbyte(io, b2);
break;
- case 0xFE:
- b2 = FIX2INT(rb_io_getbyte(io));
- if (b2 == 0xFF) {
+ case INT2FIX(0xFE):
+ if (NIL_P(b2 = rb_io_getbyte(io))) break;
+ if (b2 == INT2FIX(0xFF)) {
return rb_enc_find_index("UTF-16BE");
}
- rb_io_ungetbyte(io, INT2FIX(b2));
+ rb_io_ungetbyte(io, b2);
break;
- case 0xFF:
- b2 = FIX2INT(rb_io_getbyte(io));
- if (b2 == 0xFE) {
- b3 = FIX2INT(rb_io_getbyte(io));
- if (b3 == 0) {
- b4 = FIX2INT(rb_io_getbyte(io));
- if (b4 == 0) {
+ case INT2FIX(0xFF):
+ if (NIL_P(b2 = rb_io_getbyte(io))) break;
+ if (b2 == INT2FIX(0xFE)) {
+ b3 = rb_io_getbyte(io);
+ if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) {
+ if (b4 == INT2FIX(0)) {
return rb_enc_find_index("UTF-32LE");
}
- rb_io_ungetbyte(io, INT2FIX(b4));
+ rb_io_ungetbyte(io, b4);
+ rb_io_ungetbyte(io, b3);
}
else {
- rb_io_ungetbyte(io, INT2FIX(b3));
+ rb_io_ungetbyte(io, b3);
return rb_enc_find_index("UTF-16LE");
}
- rb_io_ungetbyte(io, INT2FIX(b3));
}
- rb_io_ungetbyte(io, INT2FIX(b2));
+ rb_io_ungetbyte(io, b2);
break;
- case 0:
- b2 = FIX2INT(rb_io_getbyte(io));
- if (b2 == 0) {
- b3 = FIX2INT(rb_io_getbyte(io));
- if (b3 == 0xFE) {
- b4 = FIX2INT(rb_io_getbyte(io));
- if (b4 == 0xFF) {
+ case INT2FIX(0):
+ if (NIL_P(b2 = rb_io_getbyte(io))) break;
+ if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) {
+ if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) {
+ if (b4 == INT2FIX(0xFF)) {
return rb_enc_find_index("UTF-32BE");
}
- rb_io_ungetbyte(io, INT2FIX(b4));
+ rb_io_ungetbyte(io, b4);
}
- rb_io_ungetbyte(io, INT2FIX(b3));
+ rb_io_ungetbyte(io, b3);
}
- rb_io_ungetbyte(io, INT2FIX(b2));
+ rb_io_ungetbyte(io, b2);
break;
}
- rb_io_ungetbyte(io, INT2FIX(b1));
+ rb_io_ungetbyte(io, b1);
return 0;
}
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index cab0f26113..26f68df958 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -37,6 +37,57 @@ class TestFile < Test::Unit::TestCase
include TestEOF::Seek
+ def test_empty_file_bom
+ bug6487 = '[ruby-core:45203]'
+ f = Tempfile.new(__method__.to_s)
+ f.close
+ assert File.exist? f.path
+ assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')}
+ assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')}
+ f.close(true)
+ end
+
+ def assert_bom(bytes, name)
+ bug6487 = '[ruby-core:45203]'
+
+ f = Tempfile.new(name.to_s)
+ f.sync = true
+ expected = ""
+ result = nil
+ bytes[0...-1].each do |x|
+ f.write x
+ f.write ' '
+ f.pos -= 1
+ expected << x
+ assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+ assert_equal("#{expected} ".force_encoding("utf-8"), result)
+ end
+ f.write bytes[-1]
+ assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+ assert_equal '', result, "valid bom"
+ f.close(true)
+ end
+
+ def test_bom_8
+ assert_bom(["\xEF", "\xBB", "\xBF"], __method__)
+ end
+
+ def test_bom_16be
+ assert_bom(["\xFE", "\xFF"], __method__)
+ end
+
+ def test_bom_16le
+ assert_bom(["\xFF", "\xFE"], __method__)
+ end
+
+ def test_bom_32be
+ assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__)
+ end
+
+ def test_bom_32le
+ assert_bom(["\xFF\xFE\0", "\0"], __method__)
+ end
+
def test_truncate_wbuf
f = Tempfile.new("test-truncate")
f.print "abc"
diff --git a/version.h b/version.h
index 747a11b4ed..ba4f689a80 100644
--- a/version.h
+++ b/version.h
@@ -1,5 +1,5 @@
#define RUBY_VERSION "1.9.3"
-#define RUBY_PATCHLEVEL 227
+#define RUBY_PATCHLEVEL 228
#define RUBY_RELEASE_DATE "2012-05-25"
#define RUBY_RELEASE_YEAR 2012