summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-19 12:18:03 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-19 12:18:03 +0000
commitf1c975b87aa1ac6b50e53424ba8ff4a0c73985e9 (patch)
tree69adee03c5c39b68f6072bcd945fc61fa34771a6 /string.c
parente4021207fb0add1133b0c3e7f3b2acc2bc8d877c (diff)
* string.c (rb_enc_strlen_cr): get length with coderange scan.
* string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15550 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c61
1 files changed, 59 insertions, 2 deletions
diff --git a/string.c b/string.c
index 9bab8d4411..4540b86f74 100644
--- a/string.c
+++ b/string.c
@@ -128,12 +128,12 @@ search_nonascii(const char *p, const char *e)
const unsigned long *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
- t = (const unsigned long*)(~lowbits & (VALUE)e);
while (p < (const char *)s) {
if (!ISASCII(*p))
return p;
p++;
}
+ t = (const unsigned long*)(~lowbits & (VALUE)e);
while (s < t) {
if (*s & NONASCII_MASK) {
t = s;
@@ -619,10 +619,63 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
return c;
}
+long
+rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
+{
+ long c;
+ const char *q;
+ int ret;
+
+ *cr = 0;
+ if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
+ return (e - p) / rb_enc_mbminlen(enc);
+ }
+ else if (rb_enc_asciicompat(enc)) {
+ c = 0;
+ while (p < e) {
+ if (ISASCII(*p)) {
+ q = search_nonascii(p, e);
+ if (!q) {
+ return c + (e - p);
+ }
+ c += q - p;
+ p = q;
+ }
+ ret = rb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ p++;
+ }
+ c++;
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+ }
+
+ for (c=0; p<e; c++) {
+ ret = rb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ p++;
+ }
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+}
+
static long
str_strlen(VALUE str, rb_encoding *enc)
{
const char *p, *e;
+ int n, cr;
if (single_byte_optimizable(str)) return RSTRING_LEN(str);
if (!enc) enc = STR_ENC_GET(str);
@@ -661,7 +714,11 @@ str_strlen(VALUE str, rb_encoding *enc)
return len;
}
#endif
- return rb_enc_strlen(p, e, enc);
+ n = rb_enc_strlen_cr(p, e, enc, &cr);
+ if (cr) {
+ ENC_CODERANGE_SET(str, cr);
+ }
+ return n;
}
/*