summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--dir.c4
-rw-r--r--ext/-test-/string/depend1
-rw-r--r--ext/-test-/string/extconf.rb1
-rw-r--r--ext/-test-/string/normalize.c18
-rw-r--r--file.c43
-rw-r--r--internal.h5
-rw-r--r--test/-ext-/string/test_normalize.rb105
8 files changed, 183 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e74589bb19..131ae62709a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Aug 11 04:48:14 2013 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * file.c (rb_str_normalize_ospath):
+ HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
+ U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
+ U+2FAFF are not decomposed (this avoids problems with round trip
+ conversions from old Mac text encodings).
+ http://developer.apple.com/library/mac/qa/qa1173/_index.html
+ Therefore fix r42457 to exclude the range.
+
Sun Aug 11 03:26:07 2013 Tanaka Akira <akr@fsij.org>
* bignum.c (bitsize): Fix a conditional expression.
diff --git a/dir.c b/dir.c
index 7e9c659f9ee..cf9aeafee4e 100644
--- a/dir.c
+++ b/dir.c
@@ -84,8 +84,6 @@ char *strchr(char*,char);
#include <sys/param.h>
#include <sys/mount.h>
-VALUE rb_str_normalize_ospath(const char *ptr, long len);
-
static inline int
is_hfs(DIR *dirp)
{
@@ -1420,7 +1418,7 @@ glob_helper(
name = dp->d_name;
namlen = NAMLEN(dp);
# if HAVE_HFS
- if (hfs_p && has_nonascii(name, namlen)) {
+ if (0&&hfs_p && has_nonascii(name, namlen)) {
if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
RSTRING_GETMEM(utf8str, name, namlen);
}
diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend
index aad1fab4270..86617ff2898 100644
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \
$(hdrdir)/ruby/encoding.h \
$(hdrdir)/ruby/oniguruma.h
qsort.o: $(hdrdir)/ruby/util.h
+normalize.o: $(top_srcdir)/internal.h
diff --git a/ext/-test-/string/extconf.rb b/ext/-test-/string/extconf.rb
index 42c10b994b7..10d33cbab9c 100644
--- a/ext/-test-/string/extconf.rb
+++ b/ext/-test-/string/extconf.rb
@@ -1,3 +1,4 @@
+$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
$srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
inits = $srcs.map {|s| File.basename(s, ".*")}
inits.delete("init")
diff --git a/ext/-test-/string/normalize.c b/ext/-test-/string/normalize.c
new file mode 100644
index 00000000000..22bb6d7887a
--- /dev/null
+++ b/ext/-test-/string/normalize.c
@@ -0,0 +1,18 @@
+#include "ruby.h"
+#include "internal.h"
+
+#ifdef __APPLE__
+static VALUE
+normalize_ospath(VALUE str)
+{
+ return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
+}
+#else
+#define normalize_ospath rb_f_notimplement
+#endif
+
+void
+Init_normalize(VALUE klass)
+{
+ rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
+}
diff --git a/file.c b/file.c
index 3923ddeec7c..347b7e6d0e0 100644
--- a/file.c
+++ b/file.c
@@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path)
#ifdef __APPLE__
VALUE
-rb_str_normalize_ospath(const char *ptr, long len)
+rb_str_normalize_ospath0(const char *ptr, long len)
{
VALUE str;
CFIndex buflen = 0;
@@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len)
CFRelease(s);
return str;
}
+
+VALUE
+rb_str_normalize_ospath(const char *ptr, long len)
+{
+ const char *p = ptr;
+ const char *e = ptr + len;
+ const char *p1 = p;
+ VALUE str = rb_str_buf_new(len);
+ rb_encoding *enc = rb_utf8_encoding();
+ rb_enc_associate(str, enc);
+
+ while (p < e) {
+ int l;
+ int r = rb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ /* invalid byte shall not happen but */
+ rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+ rb_str_cat2(str, "\xEF\xBF\xBD");
+ p += 1;
+ }
+ l = MBCLEN_CHARFOUND_LEN(r);
+ int c = rb_enc_mbc_to_codepoint(p, e, enc);
+ if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
+ (0x2F800 <= c && c <= 0x2FAFF)) {
+ if (p - p1 > 0) {
+ rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+ }
+ rb_str_cat(str, p, l);
+ p += l;
+ p1 = p;
+ }
+ else {
+ p += l;
+ }
+ }
+ if (p - p1 > 0) {
+ rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+ }
+
+ return str;
+}
#endif
static long
diff --git a/internal.h b/internal.h
index a4cf43b9391..d52b2e94422 100644
--- a/internal.h
+++ b/internal.h
@@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALUE y);
VALUE rb_big_mul_toom3(VALUE x, VALUE y);
VALUE rb_big_sq_fast(VALUE x);
+/* file.c */
+#ifdef __APPLE__
+VALUE rb_str_normalize_ospath(const char *ptr, long len);
+#endif
+
/* io.c */
void rb_maygvl_fd_fix_cloexec(int fd);
diff --git a/test/-ext-/string/test_normalize.rb b/test/-ext-/string/test_normalize.rb
new file mode 100644
index 00000000000..da24391110b
--- /dev/null
+++ b/test/-ext-/string/test_normalize.rb
@@ -0,0 +1,105 @@
+require 'test/unit'
+require "-test-/string/string"
+require "tempfile"
+
+class Test_StringNormalize < Test::Unit::TestCase
+=begin
+ def test_normalize_all
+ exclude = [
+ #0x340, 0x341, 0x343, 0x344
+ ]
+ (0x0080..0xFFFD).each do |n|
+ next if 0xD800 <= n && n <= 0xDFFF
+ next if exclude.include? n
+ code = n.to_s(16)
+ Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
+ ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
+ assert_equal 1, ary.size
+ result = ary[0]
+ rn = result[/\/\h+-(.+?)-/, 1]
+ #assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
+ r2 = Bug::String.new(result ).normalize_ospath
+ rn2 = r2[/\/\h+-(.+?)-/, 1]
+ if tempfile.path == result
+ if tempfile.path == r2
+ else
+ puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
+ end
+ else
+ if tempfile.path == r2
+ # puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
+ elsif result == r2
+ puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
+ else
+ puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
+ end
+ end
+ end
+ end
+ end
+=end
+
+ def test_normalize
+ %[
+ \u304C \u304B\u3099
+ \u3077 \u3075\u309A
+ \u308F\u3099 \u308F\u3099
+ \u30F4 \u30A6\u3099
+ \u30DD \u30DB\u309A
+ \u30AB\u303A \u30AB\u303A
+ \u00C1 A\u0301
+ B\u030A B\u030A
+ \u0386 \u0391\u0301
+ \u03D3 \u03D2\u0301
+ \u0401 \u0415\u0308
+ \u2260 =\u0338
+ ].scan(/(\S+)\s+(\S+)/) do |expected, src|
+ result = Bug::String.new(src).normalize_ospath
+ assert_equal expected, result,
+ "#{expected.dump} is expected but #{src.dump}"
+ end
+ rescue NotImplementedError
+ end
+
+ def test_not_normalize_kc
+ %[
+ \u2460
+ \u2162
+ \u3349
+ \u33A1
+ \u337B
+ \u2116
+ \u33CD
+ \u2121
+ \u32A4
+ \u3231
+ ].split.each do |src|
+ result = Bug::String.new(src).normalize_ospath
+ assert_equal src, result,
+ "#{src.dump} is expected not to be normalized, but #{result.dump}"
+ end
+ rescue NotImplementedError
+ end
+
+ def test_dont_normalize_hfsplus
+ %[
+ \u2190\u0338
+ \u219A
+ \u212B
+ \uF90A
+ \uF9F4
+ \uF961 \uF9DB
+ \uF96F \uF3AA
+ \uF915 \uF95C \uF9BF
+ \uFA0C
+ \uFA10
+ \uFA19
+ \uFA26
+ ].split.each do |src|
+ result = Bug::String.new(src).normalize_ospath
+ assert_equal src, result,
+ "#{src.dump} is expected not to be normalized, but #{result.dump}"
+ end
+ rescue NotImplementedError
+ end
+end