summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-03-05 00:32:15 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-03-05 00:32:15 +0000
commit593505ac6f802d2b5bff469425b7c76b65cc9b10 (patch)
tree92bae2daccfc2c2dd2ec9330240148b8f430ea48
parent4045cb82227e7b97342eb92b17e35f534128e90d (diff)
string.c: respect the actual encoding
* string.c (rb_enc_str_coderange): respect the actual encoding of if a BOM presents, and scan for the actual code range. [ruby-core:91662] [Bug #15635] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67167 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--string.c5
-rw-r--r--test/ruby/test_m17n.rb7
2 files changed, 10 insertions, 2 deletions
diff --git a/string.c b/string.c
index 88686db76d..449fb558db 100644
--- a/string.c
+++ b/string.c
@@ -655,12 +655,13 @@ rb_enc_str_coderange(VALUE str)
if (cr == ENC_CODERANGE_UNKNOWN) {
int encidx = ENCODING_GET(str);
rb_encoding *enc = rb_enc_from_index(encidx);
- if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
+ if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc) &&
+ rb_enc_mbminlen(enc = get_actual_encoding(encidx, str)) == 1) {
cr = ENC_CODERANGE_BROKEN;
}
else {
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
- get_actual_encoding(encidx, str));
+ enc);
}
ENC_CODERANGE_SET(str, cr);
}
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index c1184c551f..75daf61376 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -269,6 +269,13 @@ class TestM17N < Test::Unit::TestCase
assert_empty(encs, bug10598)
end
+ def test_utf_without_bom_valid
+ encs = [Encoding::UTF_16, Encoding::UTF_32].find_all {|enc|
+ !(+"abcd").encode!(enc).force_encoding(enc).valid_encoding?
+ }
+ assert_empty(encs)
+ end
+
def test_object_utf16_32_inspect
EnvUtil.suppress_warning do
begin