summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-07-22 08:13:38 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-07-22 08:13:38 +0000
commitc6692d9410ab5b20ed3fa84db72d51d9a37a6179 (patch)
tree95e02bdf839098fed2ea6765f4f24be09ff36915 /string.c
parent21269d37a110f78d84e875af8efa2f511e101717 (diff)
* string.c (String#dump): Change escaping of non-ASCII characters in
UTF-8 to use upper-case four-digit hexadecimal escapes without braces where possible [Feature #12419]. * test/ruby/test_string.rb (test_dump): Add tests for above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55728 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/string.c b/string.c
index ec26a589f1..5b5f69a60a 100644
--- a/string.c
+++ b/string.c
@@ -5656,12 +5656,16 @@ rb_str_dump(VALUE str)
len++;
}
else {
- if (u8 && c > 0x7F) { /* \u{NN} */
+ if (u8 && c > 0x7F) { /* \u notation */
int n = rb_enc_precise_mbclen(p-1, pend, enc);
if (MBCLEN_CHARFOUND_P(n)) {
unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
- while (cc >>= 4) len++;
- len += 5;
+ if (cc <= 0xFFFF)
+ len += 6; /* \uXXXX */
+ else if (cc <= 0xFFFFF)
+ len += 9; /* \u{XXXXX} */
+ else
+ len += 10; /* \u{XXXXXX} */
p += MBCLEN_CHARFOUND_LEN(n)-1;
break;
}
@@ -5734,7 +5738,10 @@ rb_str_dump(VALUE str)
if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
p += n;
- snprintf(q, qend-q, "u{%x}", cc);
+ if (cc <= 0xFFFF)
+ snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */
+ else
+ snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */
q += strlen(q);
continue;
}