summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-23 06:53:55 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-23 06:53:55 +0000
commit5a8db29b59f5f0bd6272bbfd615d356e475c52f1 (patch)
tree0bb742aeb8800bf4204528ef1640b950e3cfc6f0 /string.c
parent472156c236c0b086bde6b26703f2f02d1be5f730 (diff)
* string.c (rb_str_dump): use \u{ff} escape for UTF-8 encoding
string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19897 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c42
1 files changed, 33 insertions, 9 deletions
diff --git a/string.c b/string.c
index fae5978325..a960970a07 100644
--- a/string.c
+++ b/string.c
@@ -4090,11 +4090,12 @@ rb_str_inspect(VALUE str)
VALUE
rb_str_dump(VALUE str)
{
- rb_encoding *enc0 = rb_enc_get(str);
+ rb_encoding *enc = rb_enc_get(str);
long len;
const char *p, *pend;
char *q, *qend;
VALUE result;
+ int u8 = (enc == rb_utf8_encoding());
len = 2; /* "" */
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
@@ -4117,14 +4118,25 @@ rb_str_dump(VALUE str)
len++;
}
else {
- len += 4; /* \xNN */
+ if (u8) { /* \u{NN} */
+ char buf[32];
+ int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
+ if (MBCLEN_CHARFOUND_P(n)) {
+ int cc = rb_enc_codepoint(p-1, pend, enc);
+ sprintf(buf, "%x", cc);
+ len += strlen(buf)+4;
+ p += n;
+ break;
+ }
+ }
+ len += 4; /* \xNN */
}
break;
}
}
- if (!rb_enc_asciicompat(enc0)) {
+ if (!rb_enc_asciicompat(enc)) {
len += 19; /* ".force_encoding('')" */
- len += strlen(enc0->name);
+ len += strlen(enc->name);
}
result = rb_str_new5(str, 0, len);
@@ -4180,19 +4192,31 @@ rb_str_dump(VALUE str)
}
else {
*q++ = '\\';
+ if (u8) {
+ int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
+ if (MBCLEN_CHARFOUND_P(n)) {
+ int cc = rb_enc_codepoint(p-1, pend, enc);
+ p += n;
+ sprintf(q, "u{%x}", cc);
+ q += strlen(q);
+ continue;
+ }
+ }
sprintf(q, "x%02X", c);
q += 3;
}
}
*q++ = '"';
- if (!rb_enc_asciicompat(enc0)) {
- sprintf(q, ".force_encoding(\"%s\")", enc0->name);
- enc0 = rb_ascii8bit_encoding();
+ *q = '\0';
+ if (!rb_enc_asciicompat(enc)) {
+ sprintf(q, ".force_encoding(\"%s\")", enc->name);
+ enc = rb_ascii8bit_encoding();
}
-
+// STR_SET_LEN(result, strlen(RSTRING_PTR(result)));
OBJ_INFECT(result, str);
/* result from dump is ASCII */
- rb_enc_associate(result, enc0);
+ rb_enc_associate(result, enc);
+ ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
return result;
}