diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-02-21 08:34:35 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-02-21 08:34:35 +0000 |
commit | 862b86f2e4cc561861053c1485dc378c23ad794f (patch) | |
tree | c1afb9de7f50bf72ac120991951db91add8db5c0 /string.c | |
parent | 4423b56753be681b4ddd4541c48044ce289d122e (diff) |
merge revision(s) 44604,44605,44606: [Backport #9415]
test_m17n.rb: split tests for inspect
* test/ruby/test_m17n.rb (test_utf_16_32_inspect): split tests for
each encodings.
* string.c (get_actual_encoding): get actual encoding according to
the BOM if exists.
* string.c (rb_str_inspect): use according encoding, instead of
pseudo encodings, UTF-{16,32}. [ruby-core:59757] [Bug #8940]
* string.c (get_encoding): respect BOM on pseudo encodings.
[ruby-dev:47895] [Bug #9415]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@45074 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 69 |
1 files changed, 45 insertions, 24 deletions
@@ -121,7 +121,45 @@ VALUE rb_cSymbol; #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr) #define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str)) -#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str)) +#define STR_ENC_GET(str) get_encoding(str) + +rb_encoding *rb_enc_get_from_index(int index); + +static rb_encoding * +get_actual_encoding(const int encidx, VALUE str) +{ + const unsigned char *q; + + switch (encidx) { + case ENCINDEX_UTF_16: + if (RSTRING_LEN(str) < 2) break; + q = (const unsigned char *)RSTRING_PTR(str); + if (q[0] == 0xFE && q[1] == 0xFF) { + return rb_enc_get_from_index(ENCINDEX_UTF_16BE); + } + if (q[0] == 0xFF && q[1] == 0xFE) { + return rb_enc_get_from_index(ENCINDEX_UTF_16LE); + } + return rb_ascii8bit_encoding(); + case ENCINDEX_UTF_32: + if (RSTRING_LEN(str) < 4) break; + q = (const unsigned char *)RSTRING_PTR(str); + if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) { + return rb_enc_get_from_index(ENCINDEX_UTF_32BE); + } + if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) { + return rb_enc_get_from_index(ENCINDEX_UTF_32LE); + } + return rb_ascii8bit_encoding(); + } + return rb_enc_from_index(encidx); +} + +static rb_encoding * +get_encoding(VALUE str) +{ + return get_actual_encoding(ENCODING_GET(str), str); +} static int fstring_cmp(VALUE a, VALUE b); @@ -4750,8 +4788,8 @@ rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p) VALUE rb_str_inspect(VALUE str) { - rb_encoding *enc = STR_ENC_GET(str); - int encidx = rb_enc_to_index(enc); + int encidx = ENCODING_GET(str); + rb_encoding *enc = rb_enc_from_index(encidx), *actenc; const char *p, *pend, *prev; char buf[CHAR_ESC_LEN + 1]; VALUE result = rb_str_buf_new(0); @@ -4766,27 +4804,10 @@ rb_str_inspect(VALUE str) p = RSTRING_PTR(str); pend = RSTRING_END(str); prev = p; - if (encidx == ENCINDEX_UTF_16 && p + 2 <= pend) { - const unsigned char *q = (const unsigned char *)p; - if (q[0] == 0xFE && q[1] == 0xFF) - enc = rb_enc_from_index(ENCINDEX_UTF_16BE); - else if (q[0] == 0xFF && q[1] == 0xFE) - enc = rb_enc_from_index(ENCINDEX_UTF_16LE); - else { - enc = rb_ascii8bit_encoding(); - unicode_p = 0; - } - } - else if (encidx == ENCINDEX_UTF_32 && p + 4 <= pend) { - const unsigned char *q = (const unsigned char *)p; - if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) - enc = rb_enc_from_index(ENCINDEX_UTF_32BE); - else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) - enc = rb_enc_from_index(ENCINDEX_UTF_32LE); - else { - enc = rb_ascii8bit_encoding(); - unicode_p = 0; - } + actenc = get_actual_encoding(encidx, str); + if (actenc != enc) { + enc = actenc; + if (unicode_p) unicode_p = rb_enc_unicode_p(enc); } while (p < pend) { unsigned int c, cc; |