summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--string.c15
-rw-r--r--test/ruby/test_string.rb12
3 files changed, 31 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index eeb792cf42..792c5865df 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Fri Jul 22 17:13:37 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * string.c (String#dump): Change escaping of non-ASCII characters in
+ UTF-8 to use upper-case four-digit hexadecimal escapes without braces
+ where possible [Feature #12419].
+
+ * test/ruby/test_string.rb (test_dump): Add tests for above.
+
Fri Jul 22 10:35:35 2016 Kouhei Sutou <kou@cozmixng.org>
* lib/rexml/attribute.rb (REXML::Attribute#to_string): Fix wrong
diff --git a/string.c b/string.c
index ec26a589f1..5b5f69a60a 100644
--- a/string.c
+++ b/string.c
@@ -5656,12 +5656,16 @@ rb_str_dump(VALUE str)
len++;
}
else {
- if (u8 && c > 0x7F) { /* \u{NN} */
+ if (u8 && c > 0x7F) { /* \u notation */
int n = rb_enc_precise_mbclen(p-1, pend, enc);
if (MBCLEN_CHARFOUND_P(n)) {
unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
- while (cc >>= 4) len++;
- len += 5;
+ if (cc <= 0xFFFF)
+ len += 6; /* \uXXXX */
+ else if (cc <= 0xFFFFF)
+ len += 9; /* \u{XXXXX} */
+ else
+ len += 10; /* \u{XXXXXX} */
p += MBCLEN_CHARFOUND_LEN(n)-1;
break;
}
@@ -5734,7 +5738,10 @@ rb_str_dump(VALUE str)
if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
p += n;
- snprintf(q, qend-q, "u{%x}", cc);
+ if (cc <= 0xFFFF)
+ snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */
+ else
+ snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */
q += strlen(q);
continue;
}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 4df5f4f841..790fa21536 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -614,6 +614,18 @@ CODE
def test_dump
a= S("Test") << 1 << 2 << 3 << 9 << 13 << 10
assert_equal(S('"Test\\x01\\x02\\x03\\t\\r\\n"'), a.dump)
+ b= S("\u{7F}")
+ assert_equal(S('"\\x7F"'), b.dump)
+ b= S("\u{AB}")
+ assert_equal(S('"\\u00AB"'), b.dump)
+ b= S("\u{ABC}")
+ assert_equal(S('"\\u0ABC"'), b.dump)
+ b= S("\uABCD")
+ assert_equal(S('"\\uABCD"'), b.dump)
+ b= S("\u{ABCDE}")
+ assert_equal(S('"\\u{ABCDE}"'), b.dump)
+ b= S("\u{10ABCD}")
+ assert_equal(S('"\\u{10ABCD}"'), b.dump)
end
def test_dup