summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-02-21 08:34:35 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-02-21 08:34:35 +0000
commit862b86f2e4cc561861053c1485dc378c23ad794f (patch)
treec1afb9de7f50bf72ac120991951db91add8db5c0 /string.c
parent4423b56753be681b4ddd4541c48044ce289d122e (diff)
merge revision(s) 44604,44605,44606: [Backport #9415]
test_m17n.rb: split tests for inspect * test/ruby/test_m17n.rb (test_utf_16_32_inspect): split tests for each encodings. * string.c (get_actual_encoding): get actual encoding according to the BOM if exists. * string.c (rb_str_inspect): use according encoding, instead of pseudo encodings, UTF-{16,32}. [ruby-core:59757] [Bug #8940] * string.c (get_encoding): respect BOM on pseudo encodings. [ruby-dev:47895] [Bug #9415] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@45074 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c69
1 files changed, 45 insertions, 24 deletions
diff --git a/string.c b/string.c
index cc307866db..983c2a1166 100644
--- a/string.c
+++ b/string.c
@@ -121,7 +121,45 @@ VALUE rb_cSymbol;
#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
#define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
-#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
+#define STR_ENC_GET(str) get_encoding(str)
+
+rb_encoding *rb_enc_get_from_index(int index);
+
+static rb_encoding *
+get_actual_encoding(const int encidx, VALUE str)
+{
+ const unsigned char *q;
+
+ switch (encidx) {
+ case ENCINDEX_UTF_16:
+ if (RSTRING_LEN(str) < 2) break;
+ q = (const unsigned char *)RSTRING_PTR(str);
+ if (q[0] == 0xFE && q[1] == 0xFF) {
+ return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
+ }
+ if (q[0] == 0xFF && q[1] == 0xFE) {
+ return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
+ }
+ return rb_ascii8bit_encoding();
+ case ENCINDEX_UTF_32:
+ if (RSTRING_LEN(str) < 4) break;
+ q = (const unsigned char *)RSTRING_PTR(str);
+ if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
+ return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
+ }
+ if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
+ return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
+ }
+ return rb_ascii8bit_encoding();
+ }
+ return rb_enc_from_index(encidx);
+}
+
+static rb_encoding *
+get_encoding(VALUE str)
+{
+ return get_actual_encoding(ENCODING_GET(str), str);
+}
static int fstring_cmp(VALUE a, VALUE b);
@@ -4750,8 +4788,8 @@ rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
VALUE
rb_str_inspect(VALUE str)
{
- rb_encoding *enc = STR_ENC_GET(str);
- int encidx = rb_enc_to_index(enc);
+ int encidx = ENCODING_GET(str);
+ rb_encoding *enc = rb_enc_from_index(encidx), *actenc;
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
VALUE result = rb_str_buf_new(0);
@@ -4766,27 +4804,10 @@ rb_str_inspect(VALUE str)
p = RSTRING_PTR(str); pend = RSTRING_END(str);
prev = p;
- if (encidx == ENCINDEX_UTF_16 && p + 2 <= pend) {
- const unsigned char *q = (const unsigned char *)p;
- if (q[0] == 0xFE && q[1] == 0xFF)
- enc = rb_enc_from_index(ENCINDEX_UTF_16BE);
- else if (q[0] == 0xFF && q[1] == 0xFE)
- enc = rb_enc_from_index(ENCINDEX_UTF_16LE);
- else {
- enc = rb_ascii8bit_encoding();
- unicode_p = 0;
- }
- }
- else if (encidx == ENCINDEX_UTF_32 && p + 4 <= pend) {
- const unsigned char *q = (const unsigned char *)p;
- if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
- enc = rb_enc_from_index(ENCINDEX_UTF_32BE);
- else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
- enc = rb_enc_from_index(ENCINDEX_UTF_32LE);
- else {
- enc = rb_ascii8bit_encoding();
- unicode_p = 0;
- }
+ actenc = get_actual_encoding(encidx, str);
+ if (actenc != enc) {
+ enc = actenc;
+ if (unicode_p) unicode_p = rb_enc_unicode_p(enc);
}
while (p < pend) {
unsigned int c, cc;