From 9962aad7b0184e385b40c26c5a109bff7abbe43c Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 10 Aug 2013 20:44:10 +0000 Subject: * file.c (rb_str_normalize_ospath): HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through U+2FAFF are not decomposed (this avoids problems with round trip conversions from old Mac text encodings). http://developer.apple.com/library/mac/qa/qa1173/_index.html Therefore fix r42457 to exclude the range. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- file.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'file.c') diff --git a/file.c b/file.c index 3923ddeec7..347b7e6d0e 100644 --- a/file.c +++ b/file.c @@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path) #ifdef __APPLE__ VALUE -rb_str_normalize_ospath(const char *ptr, long len) +rb_str_normalize_ospath0(const char *ptr, long len) { VALUE str; CFIndex buflen = 0; @@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len) CFRelease(s); return str; } + +VALUE +rb_str_normalize_ospath(const char *ptr, long len) +{ + const char *p = ptr; + const char *e = ptr + len; + const char *p1 = p; + VALUE str = rb_str_buf_new(len); + rb_encoding *enc = rb_utf8_encoding(); + rb_enc_associate(str, enc); + + while (p < e) { + int l; + int r = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(r)) { + /* invalid byte shall not happen but */ + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + rb_str_cat2(str, "\xEF\xBF\xBD"); + p += 1; + } + l = MBCLEN_CHARFOUND_LEN(r); + int c = rb_enc_mbc_to_codepoint(p, e, enc); + if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) || + (0x2F800 <= c && c <= 0x2FAFF)) { + if (p - p1 > 0) { + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + } + rb_str_cat(str, p, l); + p += l; + p1 = p; + } + else { + p += l; + } + } + if (p - p1 > 0) { + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + } + + return str; +} #endif static long -- cgit v1.2.3