summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2019-06-08 21:35:33 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2019-06-13 18:13:05 +0900
commite717d6faa8463c70407e6aaf116c6b6181f30be6 (patch)
tree8a8596542d9f1f6cde179a6151c51f8ff08c399a
parentbdc8b3789ad388ff1d573369de6b085483b17098 (diff)
IO#set_encoding_by_bom
* io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set the encoding by BOM if exists. [Bug #15210]
-rw-r--r--NEWS7
-rw-r--r--io.c44
-rw-r--r--test/ruby/test_io_m17n.rb8
3 files changed, 56 insertions, 3 deletions
diff --git a/NEWS b/NEWS
index c069f9e0eb..4ad38c9f12 100644
--- a/NEWS
+++ b/NEWS
@@ -86,6 +86,13 @@ GC::
Details on the algorithm and caveats can be found here:
https://bugs.ruby-lang.org/issues/15626
+IO::
+
+ New method::
+
+ * Added IO#set_encoding_by_bom to check the BOM and set the external
+ encoding. [Bug #15210]
+
Integer::
Modified method::
diff --git a/io.c b/io.c
index 893cabc248..0db981e39c 100644
--- a/io.c
+++ b/io.c
@@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io)
return 0;
}
-static void
+static rb_encoding *
io_set_encoding_by_bom(VALUE io)
{
int idx = io_strip_bom(io);
rb_io_t *fptr;
+ rb_encoding *extenc = NULL;
GetOpenFile(io, fptr);
if (idx) {
- io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
- rb_io_internal_encoding(io), Qnil);
+ extenc = rb_enc_from_index(idx);
+ io_encoding_set(fptr, rb_enc_from_encoding(extenc),
+ rb_io_internal_encoding(io), Qnil);
}
else {
fptr->encs.enc2 = NULL;
}
+ return extenc;
}
static VALUE
@@ -8308,6 +8311,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
/*
* call-seq:
+ * ios.set_encoding_by_bom -> encoding or nil
+ *
+ * Checks if +ios+ starts with a BOM, and then consumes it and sets
+ * the external encoding. Returns the result encoding if found, or
+ * nil. If +ios+ is not binmode or its encoding has been set
+ * already, an exception will be raised.
+ *
+ * File.write("bom.txt", "\u{FEFF}abc")
+ * ios = File.open("bom.txt", "rb")
+ * ios.set_encoding_by_bom #=> #<Encoding:UTF-8>
+ *
+ * File.write("nobom.txt", "abc")
+ * ios = File.open("nobom.txt", "rb")
+ * ios.set_encoding_by_bom #=> nil
+ */
+
+static VALUE
+rb_io_set_encoding_by_bom(VALUE io)
+{
+ rb_io_t *fptr;
+
+ GetOpenFile(io, fptr);
+ if (!(fptr->mode & FMODE_BINMODE)) {
+ rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
+ }
+ if (fptr->encs.enc2) {
+ rb_raise(rb_eArgError, "encoding conversion is set");
+ }
+ if (!io_set_encoding_by_bom(io)) return Qnil;
+ return rb_enc_from_encoding(fptr->encs.enc);
+}
+
+/*
+ * call-seq:
* File.new(filename, mode="r" [, opt]) -> file
* File.new(filename [, mode [, perm]] [, opt]) -> file
*
@@ -13319,6 +13356,7 @@ Init_IO(void)
rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0);
rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0);
rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1);
+ rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0);
rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0);
rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1);
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 6fb8d8a0cf..630f2eec08 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2097,6 +2097,10 @@ EOT
assert_equal(Encoding::UTF_8, result.encoding, message)
assert_equal(stripped, result, message)
end
+
+ File.open(path, "rb") {|f|
+ assert_equal(Encoding.find(name), f.set_encoding_by_bom)
+ }
}
end
end
@@ -2139,6 +2143,10 @@ EOT
assert_equal(stripped, result, bug8323)
result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
assert_equal(stripped, result, bug8323)
+
+ File.open(path, "rb") {|f|
+ assert_nil(f.set_encoding_by_bom)
+ }
}
end