diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | NEWS | 9 | ||||
-rw-r--r-- | object.c | 5 | ||||
-rw-r--r-- | string.c | 64 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 11 |
5 files changed, 93 insertions, 5 deletions
@@ -1,3 +1,12 @@ +Fri Dec 11 03:44:43 2015 NARUSE, Yui <naruse@ruby-lang.org> + + * object.c (rb_inspect): dump inspected result with rb_str_escape() + instead of raising Encoding::CompatibilityError. [Feature #11801] + + * string.c (rb_str_escape): added to dump given string like + rb_str_inspect without quotes and always dump in US-ASCII + like rb_str_dump. + Thu Dec 10 14:59:59 2015 Koichi Sasada <ko1@atdot.net> * test/ruby/test_gc.rb (test_expand_heap): relax condition (1->2). @@ -152,6 +152,10 @@ with all sufficient information, see the ChangeLog file. * Array#flatten and Array#flatten! no longer try to call #to_ary method on elements beyond the given level. [Bug #10748] + * Array#inspect doesn't raise error even if its content returns + a string which is not compatible with Encoding.default_external + as inspected result. [Feature #11801] + * Enumerable * Enumerable#chunk and Enumerable#slice_before no longer takes the initial_state argument. [Feature #10958] @@ -161,6 +165,11 @@ with all sufficient information, see the ChangeLog file. * On Windows File::Stat#ino always returned 0, but now returns BY_HANDLE_FILE_INFORMATION.nFileIndexHigh/Low. [Feature #11216] +* Hash + * Hash#inspect doesn't raise error even if its content returns + a string which is not compatible with Encoding.default_external + as inspected result. [Feature #11801] + * IO * IO#close doesn't raise when the IO object is closed. [Feature #10718] * IO#each_codepoint raises an exception at incomplete character @@ -465,6 +465,7 @@ rb_any_to_s(VALUE obj) return str; } +VALUE rb_str_escape(VALUE str); /* * If the default external encoding is ASCII compatible, the encoding of * the inspected result must be compatible with it. @@ -478,11 +479,11 @@ rb_inspect(VALUE obj) rb_encoding *ext = rb_default_external_encoding(); if (!rb_enc_asciicompat(ext)) { if (!rb_enc_str_asciionly_p(str)) - rb_raise(rb_eEncCompatError, "inspected result must be ASCII only if default external encoding is ASCII incompatible"); + return rb_str_escape(str); return str; } if (rb_enc_get(str) != ext && !rb_enc_str_asciionly_p(str)) - rb_raise(rb_eEncCompatError, "inspected result must be ASCII only or use the default external encoding"); + return rb_str_escape(str); return str; } @@ -5265,6 +5265,70 @@ rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p) return l; } +VALUE +rb_str_escape(VALUE str) +{ + int encidx = ENCODING_GET(str); + rb_encoding *enc = rb_enc_from_index(encidx); + const char *p = RSTRING_PTR(str); + const char *pend = RSTRING_END(str); + const char *prev = p; + char buf[CHAR_ESC_LEN + 1]; + VALUE result = rb_str_buf_new(0); + int unicode_p = rb_enc_unicode_p(enc); + int asciicompat = rb_enc_asciicompat(enc); + + while (p < pend) { + unsigned int c, cc; + int n = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND_P(n)) { + if (p > prev) str_buf_cat(result, prev, p - prev); + n = rb_enc_mbminlen(enc); + if (pend < p + n) + n = (int)(pend - p); + while (n--) { + snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377); + str_buf_cat(result, buf, strlen(buf)); + prev = ++p; + } + continue; + } + n = MBCLEN_CHARFOUND_LEN(n); + c = rb_enc_mbc_to_codepoint(p, pend, enc); + p += n; + switch (c) { + case '\n': cc = 'n'; break; + case '\r': cc = 'r'; break; + case '\t': cc = 't'; break; + case '\f': cc = 'f'; break; + case '\013': cc = 'v'; break; + case '\010': cc = 'b'; break; + case '\007': cc = 'a'; break; + case 033: cc = 'e'; break; + default: cc = 0; break; + } + if (cc) { + if (p - n > prev) str_buf_cat(result, prev, p - n - prev); + buf[0] = '\\'; + buf[1] = (char)cc; + str_buf_cat(result, buf, 2); + prev = p; + } + else if (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c)) { + } + else { + if (p - n > prev) str_buf_cat(result, prev, p - n - prev); + rb_str_buf_cat_escaped_char(result, c, unicode_p); + prev = p; + } + } + if (p > prev) str_buf_cat(result, prev, p - prev); + ENCODING_CODERANGE_SET(result, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + + OBJ_INFECT_RAW(result, str); + return result; +} + /* * call-seq: * str.inspect -> string diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index ca25f8502d..2e9e65b52f 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -278,7 +278,7 @@ class TestM17N < Test::Unit::TestCase o = Object.new [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" - assert_raise(Encoding::CompatibilityError) { [o].inspect } + assert_equal '[abc]', [o].inspect end ensure Encoding.default_internal = orig_int @@ -302,13 +302,18 @@ class TestM17N < Test::Unit::TestCase def o.inspect "abc".encode(Encoding.default_external) end - assert_raise(Encoding::CompatibilityError) { [o].inspect } + assert_equal '[abc]', [o].inspect Encoding.default_external = Encoding::US_ASCII def o.inspect "\u3042" end - assert_raise(Encoding::CompatibilityError) { [o].inspect } + assert_equal '[\u3042]', [o].inspect + + def o.inspect + "\x82\xa0".force_encoding(Encoding::Windows_31J) + end + assert_equal '[\x{82A0}]', [o].inspect ensure Encoding.default_internal = orig_int Encoding.default_external = orig_ext |