diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-17 12:50:52 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-17 12:50:52 +0000 |
commit | 635b15d66214a441a23de6363015f1241dcb59fd (patch) | |
tree | 09ca97ac7a0391cc13274c97b5820b8a781eadb8 | |
parent | d965e99b04bc250e8129f5909383ffdff53c0149 (diff) |
* string.c (rb_str_casecmp): don't use rb_enc_codepoint.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19398 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | include/ruby/encoding.h | 6 | ||||
-rw-r--r-- | string.c | 53 | ||||
-rw-r--r-- | test/ruby/enc/test_utf16.rb | 12 |
4 files changed, 57 insertions, 18 deletions
@@ -1,3 +1,7 @@ +Wed Sep 17 21:50:14 2008 Tanaka Akira <akr@fsij.org> + + * string.c (rb_str_casecmp): don't use rb_enc_codepoint. + Wed Sep 17 19:55:33 2008 Tadayoshi Funaba <tadf@dotrb.org> * complex.c (nucomp_s_convert): accepts complex diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index ca9ddc5dc9..acf10cb072 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -125,10 +125,10 @@ unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); #define rb_enc_mbc_precise_codepoint(p, e, prec_ret, enc) ONIGENC_MBC_PRECISE_CODEPOINT(enc,(UChar*)(p),(UChar*)(e),(prec_ret)) /* -> codelen>0 or raise exception */ -int rb_enc_codelen(int code, rb_encoding *enc); +int rb_enc_codelen(int codepoint, rb_encoding *enc); -/* code,ptr,encoding -> write buf */ -#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) +/* codepoint,ptr,encoding -> write buf */ +#define rb_enc_mbcput(codepoint,buf,enc) ONIGENC_CODE_TO_MBC((enc),(codepoint),(UChar*)(buf)) /* start, ptr, end, encoding -> prev_char */ #define rb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) @@ -1998,7 +1998,6 @@ rb_str_cmp_m(VALUE str1, VALUE str2) static VALUE rb_str_casecmp(VALUE str1, VALUE str2) { - long len; rb_encoding *enc; char *p1, *p1end, *p2, *p2end; @@ -2013,8 +2012,8 @@ rb_str_casecmp(VALUE str1, VALUE str2) if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) { while (p1 < p1end && p2 < p2end) { if (*p1 != *p2) { - unsigned int c1 = rb_enc_toupper(*p1 & 0xff, enc); - unsigned int c2 = rb_enc_toupper(*p2 & 0xff, enc); + unsigned int c1 = TOUPPER(*p1 & 0xff); + unsigned int c2 = TOUPPER(*p2 & 0xff); if (c1 > c2) return INT2FIX(1); if (c1 < c2) return INT2FIX(-1); } @@ -2024,18 +2023,42 @@ rb_str_casecmp(VALUE str1, VALUE str2) } else { while (p1 < p1end && p2 < p2end) { - unsigned int c1 = rb_enc_codepoint(p1, p1end, enc); - unsigned int c2 = rb_enc_codepoint(p2, p2end, enc); - - if (c1 != c2) { - c1 = rb_enc_toupper(c1, enc); - c2 = rb_enc_toupper(c2, enc); - if (c1 > c2) return INT2FIX(1); - if (c1 < c2) return INT2FIX(-1); - } - len = rb_enc_codelen(c1, enc); - p1 += len; - p2 += len; + int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc); + int l2, c2 = rb_enc_ascget(p2, p1end, &l2, enc); + + if (0 <= c1) { + if (0 <= c2) { + if (c1 != c2) { + c1 = TOUPPER(c1); + c2 = TOUPPER(c2); + if (c1 > c2) return INT2FIX(1); + if (c1 < c2) return INT2FIX(-1); + } + } + else { + return INT2FIX(-1); + } + } + else { + if (0 <= c2) { + return INT2FIX(1); + } + else { + int l, r; + l1 = rb_enc_mbclen(p1, p1end, enc); + l2 = rb_enc_mbclen(p2, p2end, enc); + l = l1; + if (l2 < l) + l = l2; + r = memcmp(p1, p2, l); + if (r != 0) + return INT2FIX(r < 0 ? -1 : 1); + if (l1 != l2) + return INT2FIX(l1 < l2 ? -1 : 1); + } + } + p1 += l1; + p2 += l2; } } if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0); diff --git a/test/ruby/enc/test_utf16.rb b/test/ruby/enc/test_utf16.rb index 463b076c99..33ca45f90a 100644 --- a/test/ruby/enc/test_utf16.rb +++ b/test/ruby/enc/test_utf16.rb @@ -368,4 +368,16 @@ EOT r = Regexp.new(Regexp.escape(s)) assert(r =~ s, "#{encdump(r)} =~ #{encdump(s)}") end + + def test_casecmp + assert_equal(0, "\0A".force_encoding("UTF-16BE").casecmp("\0a".force_encoding("UTF-16BE"))) + assert_not_equal(0, "\0A".force_encoding("UTF-16LE").casecmp("\0a".force_encoding("UTF-16LE"))) + assert_not_equal(0, "A\0".force_encoding("UTF-16BE").casecmp("a\0".force_encoding("UTF-16BE"))) + assert_equal(0, "A\0".force_encoding("UTF-16LE").casecmp("a\0".force_encoding("UTF-16LE"))) + + ary = ["ab".force_encoding("UTF-16LE"), "ba".force_encoding("UTF-16LE")] + e = ary.sort {|x,y| x <=> y } + a = ary.sort {|x,y| x.casecmp(y) } + assert_equal(e, a) + end end |