summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--transcode.c18
2 files changed, 21 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index aadef36457..81d3916d85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Wed Feb 4 21:59:31 2009 Tanaka Akira <akr@fsij.org>
+
+ * transcode.c (make_econv_exception): show U+XXXX form for undefined
+ conversion error from UTF-8.
+
Wed Feb 4 21:57:37 2009 Tanaka Akira <akr@fsij.org>
* string.c (rb_str_dump): use MBCLEN_CHARFOUND_P properly.
diff --git a/transcode.c b/transcode.c
index 68fed6a1a6..0320590516 100644
--- a/transcode.c
+++ b/transcode.c
@@ -2009,9 +2009,23 @@ make_econv_exception(rb_econv_t *ec)
if (ec->last_error.result == econv_undefined_conversion) {
VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start,
ec->last_error.error_bytes_len);
- VALUE dumped;
+ VALUE dumped = Qnil;
int idx;
- dumped = rb_str_dump(bytes);
+ if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
+ rb_encoding *utf8 = rb_utf8_encoding();
+ const char *start, *end;
+ int n;
+ start = (const char *)ec->last_error.error_bytes_start;
+ end = start + ec->last_error.error_bytes_len;
+ n = rb_enc_precise_mbclen(start, end, utf8);
+ if (MBCLEN_CHARFOUND_P(n) &&
+ MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
+ unsigned int cc = rb_enc_codepoint(start, end, utf8);
+ dumped = rb_sprintf("U+%04X", cc);
+ }
+ }
+ if (dumped == Qnil)
+ dumped = rb_str_dump(bytes);
mesg = rb_sprintf("%s from %s to %s",
StringValueCStr(dumped),
ec->last_error.source_encoding,