summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-05-01 14:52:52 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-05-01 14:52:52 +0000
commit4d3df433e9c638e1824832edf8a6e87ffd2741ea (patch)
tree7b8182e6b061068e811357ef513d026bf1141aff
parent506ee0c16033dde58d53a34876c762750598dad8 (diff)
merge revision(s) 40462: [Backport #8323]
* io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is not a real encoding name, just a fallback. so the proper conversion should take place even if if the internal encoding is equal to the bom-prefixed name, unless actual encoding is equal to the internal encoding. [ruby-core:54563] [Bug #8323] * io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM found. [ruby-core:54569] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_0_0@40541 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog11
-rw-r--r--io.c40
-rw-r--r--test/ruby/test_io_m17n.rb24
-rw-r--r--version.h8
4 files changed, 61 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 12c0037575..dbcd45d035 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Wed May 1 23:35:18 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is
+ not a real encoding name, just a fallback. so the proper conversion
+ should take place even if if the internal encoding is equal to the
+ bom-prefixed name, unless actual encoding is equal to the internal
+ encoding. [ruby-core:54563] [Bug #8323]
+
+ * io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM
+ found. [ruby-core:54569]
+
Sat Apr 27 02:12:14 2013 KOSAKI Motohiro <kosaki.motohiro@gmail.com>
* io.c (rb_fd_fix_cloexec): use rb_update_max_fd().
diff --git a/io.c b/io.c
index 752de2cad5..ff30245baf 100644
--- a/io.c
+++ b/io.c
@@ -4835,7 +4835,7 @@ rb_io_oflags_modestr(int oflags)
* Qnil => no encoding specified (internal only)
*/
static void
-rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2)
+rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
{
int default_ext = 0;
@@ -4846,7 +4846,8 @@ rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc,
if (intern == NULL && ext != rb_ascii8bit_encoding())
/* If external is ASCII-8BIT, no default transcoding */
intern = rb_default_internal_encoding();
- if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
+ if (intern == NULL || intern == (rb_encoding *)Qnil ||
+ (!(fmode & FMODE_SETENC_BY_BOM) && (intern == ext))) {
/* No internal encoding => use external + no transcoding */
*enc = (default_ext && intern != ext) ? NULL : ext;
*enc2 = NULL;
@@ -4869,6 +4870,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
const char *p;
char encname[ENCODING_MAXNAMELEN+1];
int idx, idx2;
+ int fmode = fmode_p ? *fmode_p : 0;
rb_encoding *ext_enc, *int_enc;
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
@@ -4880,7 +4882,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
idx = -1;
else {
if (io_encname_bom_p(estr, len)) {
- if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM;
+ fmode |= FMODE_SETENC_BY_BOM;
estr += 4;
len -= 4;
}
@@ -4893,7 +4895,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
else {
long len = strlen(estr);
if (io_encname_bom_p(estr, len)) {
- if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM;
+ fmode |= FMODE_SETENC_BY_BOM;
estr += 4;
len -= 4;
memcpy(encname, estr, len);
@@ -4902,6 +4904,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
}
idx = rb_enc_find_index(estr);
}
+ if (fmode_p) *fmode_p = fmode;
if (idx >= 0)
ext_enc = rb_enc_from_index(idx);
@@ -4921,7 +4924,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
idx2 = rb_enc_find_index(p);
if (idx2 < 0)
unsupported_encoding(p);
- else if (idx2 == idx) {
+ else if (!(fmode & FMODE_SETENC_BY_BOM) && (idx2 == idx)) {
int_enc = (rb_encoding *)Qnil;
}
else
@@ -4929,7 +4932,7 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int
}
}
- rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p);
+ rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p, fmode);
}
int
@@ -4990,12 +4993,12 @@ rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2
parse_mode_enc(StringValueCStr(tmp), enc_p, enc2_p, fmode_p);
}
else {
- rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p);
+ rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p, 0);
}
}
else if (extenc != Qundef || intenc != Qundef) {
extracted = 1;
- rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p);
+ rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p, 0);
}
return extracted;
}
@@ -5066,7 +5069,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
vmode = *vmode_p;
/* Set to defaults */
- rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
vmode_handle:
if (NIL_P(vmode)) {
@@ -5094,7 +5097,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
rb_encoding *e;
e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
- rb_io_ext_int_to_encs(e, NULL, &enc, &enc2);
+ rb_io_ext_int_to_encs(e, NULL, &enc, &enc2, fmode);
}
}
@@ -5118,7 +5121,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
oflags |= O_BINARY;
#endif
if (!has_enc)
- rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2);
+ rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2, fmode);
}
#if DEFAULT_TEXTMODE
else if (NIL_P(vmode)) {
@@ -5341,13 +5344,16 @@ static void
io_set_encoding_by_bom(VALUE io)
{
int idx = io_strip_bom(io);
+ rb_io_t *fptr;
+ GetOpenFile(io, fptr);
if (idx) {
- rb_io_t *fptr;
- GetOpenFile(io, fptr);
io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
rb_io_internal_encoding(io), Qnil);
}
+ else {
+ fptr->encs.enc2 = NULL;
+ }
}
static VALUE
@@ -5357,7 +5363,7 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig
convconfig_t cc;
if (!convconfig) {
/* Set to default encodings */
- rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
+ rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2, fmode);
cc.ecflags = 0;
cc.ecopts = Qnil;
convconfig = &cc;
@@ -5391,7 +5397,7 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
/* Set to default encodings */
e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
- rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2);
+ rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
convconfig.ecflags = 0;
convconfig.ecopts = Qnil;
}
@@ -9046,7 +9052,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
else {
if (NIL_P(v1)) {
/* Set to default encodings */
- rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
+ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
@@ -9058,7 +9064,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
else {
- rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2);
+ rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2, 0);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index becafaa201..465194b6ca 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -1996,6 +1996,7 @@ EOT
def test_strip_bom
with_tmpdir {
text = "\uFEFFa"
+ stripped = "a"
%w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
path = '%s-bom.txt' % name
content = text.encode(name)
@@ -2003,11 +2004,32 @@ EOT
result = File.read(path, mode: 'rb:BOM|UTF-8')
assert_equal(content[1].force_encoding("ascii-8bit"),
result.force_encoding("ascii-8bit"))
+ result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8')
+ assert_equal(Encoding::UTF_8, result.encoding)
+ assert_equal(stripped, result)
end
bug3407 = '[ruby-core:30641]'
- result = File.read('UTF-8-bom.txt', encoding: 'BOM|UTF-8')
+ path = 'UTF-8-bom.txt'
+ result = File.read(path, encoding: 'BOM|UTF-8')
assert_equal("a", result.force_encoding("ascii-8bit"), bug3407)
+
+ bug8323 = '[ruby-core:54563] [Bug #8323]'
+ expected = "a\xff".force_encoding("utf-8")
+ open(path, 'ab') {|f| f.write("\xff")}
+ result = File.read(path, encoding: 'BOM|UTF-8')
+ assert_not_predicate(result, :valid_encoding?, bug8323)
+ assert_equal(expected, result, bug8323)
+ result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
+ assert_not_predicate(result, :valid_encoding?, bug8323)
+ assert_equal(expected, result, bug8323)
+
+ path = 'ascii.txt'
+ generate_file(path, stripped)
+ result = File.read(path, encoding: 'BOM|UTF-8')
+ assert_equal(stripped, result, bug8323)
+ result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
+ assert_equal(stripped, result, bug8323)
}
end
diff --git a/version.h b/version.h
index ee8767f2de..41ff8ff3ea 100644
--- a/version.h
+++ b/version.h
@@ -1,10 +1,10 @@
#define RUBY_VERSION "2.0.0"
-#define RUBY_RELEASE_DATE "2013-04-27"
-#define RUBY_PATCHLEVEL 175
+#define RUBY_RELEASE_DATE "2013-05-01"
+#define RUBY_PATCHLEVEL 176
#define RUBY_RELEASE_YEAR 2013
-#define RUBY_RELEASE_MONTH 4
-#define RUBY_RELEASE_DAY 27
+#define RUBY_RELEASE_MONTH 5
+#define RUBY_RELEASE_DAY 1
#include "ruby/version.h"