summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-14 04:31:25 +0000
committerusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-14 04:31:25 +0000
commit04667398c938a408e7602fc6992f594290baff6f (patch)
treef747512a1a03cb4b4cc063691d17fe3704317aa3
parentfdeb8dc28dc7b9f2546e21ee36d909f16fd55aac (diff)
merge revision(s) 59763: [Backport #13874]
string.c: fix false coderange * string.c (rb_enc_str_scrub): enc can differ from the actual encoding of the string, the cached coderange is useless then. [ruby-core:82674] [Bug #13874] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_3@59883 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog7
-rw-r--r--string.c18
-rw-r--r--test/ruby/test_transcode.rb27
-rw-r--r--version.h2
4 files changed, 44 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index c8588c9..84b68eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Thu Sep 14 13:26:31 2017 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ string.c: fix false coderange
+
+ * string.c (rb_enc_str_scrub): enc can differ from the actual encoding
+ of the string, the cached coderange is useless then. [Bug #13874]
+
Thu Sep 14 13:24:51 2017 Nobuyoshi Nakada <nobu@ruby-lang.org>
parse.y: primary should not be 0
diff --git a/string.c b/string.c
index 590256b..d54ff0a 100644
--- a/string.c
+++ b/string.c
@@ -8723,6 +8723,8 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
return str;
}
+static VALUE enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr);
+
/**
* @param str the string to be scrubbed
* @param repl the replacement character
@@ -8731,13 +8733,25 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
VALUE
rb_str_scrub(VALUE str, VALUE repl)
{
- return rb_enc_str_scrub(STR_ENC_GET(str), str, repl);
+ rb_encoding *enc = STR_ENC_GET(str);
+ return enc_str_scrub(enc, str, repl, ENC_CODERANGE(str));
}
VALUE
rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
{
- int cr = ENC_CODERANGE(str);
+ int cr = ENC_CODERANGE_UNKNOWN;
+ if (enc == STR_ENC_GET(str)) {
+ /* cached coderange makes sense only when enc equals the
+ * actual encoding of str */
+ cr = ENC_CODERANGE(str);
+ }
+ return enc_str_scrub(enc, str, repl, cr);
+}
+
+static VALUE
+enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
+{
int encidx;
VALUE buf = Qnil;
const char *rep;
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 2f97d09..567fcf9 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2094,17 +2094,19 @@ class TestTranscode < Test::Unit::TestCase
def test_valid_dummy_encoding
bug9314 = '[ruby-core:59354] [Bug #9314]'
- assert_separately(%W[- -- #{bug9314}], <<-'end;')
- bug = ARGV.shift
- result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
- assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
- result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
- assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
+ assert_separately(%W[- -- #{bug9314}], "#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ bug = ARGV.shift
+ result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
+ assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
+ result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
+ assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
end;
end
def test_loading_race
- assert_separately([], <<-'end;') #do
+ assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
bug11277 = '[ruby-dev:49106] [Bug #11277]'
num = 2
th = (0...num).map do |i|
@@ -2121,6 +2123,17 @@ class TestTranscode < Test::Unit::TestCase
end;
end
+ def test_scrub_encode_with_coderange
+ bug = '[ruby-core:82674] [Bug #13874]'
+ s = "\xe5".b
+ u = Encoding::UTF_8
+ assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
+ "should replace invalid byte")
+ assert_predicate(s, :valid_encoding?, "any char is valid in binary")
+ assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
+ "#{bug} coderange should not have side effects")
+ end
+
def test_universal_newline
bug11324 = '[ruby-core:69841] [Bug #11324]'
usascii = Encoding::US_ASCII
diff --git a/version.h b/version.h
index 648814b..c8ac13c 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.3.5"
#define RUBY_RELEASE_DATE "2017-09-14"
-#define RUBY_PATCHLEVEL 373
+#define RUBY_PATCHLEVEL 374
#define RUBY_RELEASE_YEAR 2017
#define RUBY_RELEASE_MONTH 9