summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-16 04:05:58 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-16 04:05:58 +0000
commit327673a43b8aaf0df6ca2a494ab4aeadb49802fb (patch)
tree55047b32c93a7326a1e1ba3b516a2943b5e9ed44
parentaf75cc01bc7ed39eafbc958936c51e0221313051 (diff)
* string.c (rb_enc_strlen): add search_nonascii like character
counter for UTF-8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15499 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--string.c29
2 files changed, 34 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 470a259df8..f161abcec3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sat Feb 16 13:01:33 2008 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (rb_enc_strlen): add search_nonascii like character
+ counter for UTF-8.
+
Sat Feb 16 11:53:35 2008 Tanaka Akira <akr@fsij.org>
* encoding.c (rb_enc_strlen): moved to string.c.
diff --git a/string.c b/string.c
index 0fcf3fcc9e..d59a5d1993 100644
--- a/string.c
+++ b/string.c
@@ -597,6 +597,35 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
return (e - p) / rb_enc_mbminlen(enc);
}
+#ifdef NONASCII_MASK
+ else if (enc == rb_utf8_encoding()) {
+ if (sizeof(long) * 2 < e - p) {
+ const unsigned long *s, *t;
+ const VALUE lowbits = sizeof(unsigned long) - 1;
+ s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+ t = (const unsigned long*)(~lowbits & (VALUE)e);
+ for (c=0; p<(const char *)s; p++) {
+ if (((*p)&0xC0) != 0x80) c++;
+ }
+ while (s < t) {
+ unsigned long d = *s;
+ d = (~d ^ (d&(d<<1)))&NONASCII_MASK;
+ d = (d>>7) + (d>>15);
+ d = d + (d>>16);
+#if NONASCII_MASK == 0x8080808080808080UL
+ d = d + (d>>32);
+#endif
+ c += (long)(d&0xF);
+ s++;
+ }
+ p = (const char *)t;
+ }
+ for (; p<e; p++) {
+ if (((*p)&0xC0) != 0x80) c++;
+ }
+ return c;
+ }
+#endif
else if (rb_enc_asciicompat(enc)) {
c = 0;
while (p < e) {