From f1c975b87aa1ac6b50e53424ba8ff4a0c73985e9 Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 19 Feb 2008 12:18:03 +0000 Subject: * string.c (rb_enc_strlen_cr): get length with coderange scan. * string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15550 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 9bab8d4411..4540b86f74 100644 --- a/string.c +++ b/string.c @@ -128,12 +128,12 @@ search_nonascii(const char *p, const char *e) const unsigned long *s, *t; const VALUE lowbits = sizeof(unsigned long) - 1; s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (!ISASCII(*p)) return p; p++; } + t = (const unsigned long*)(~lowbits & (VALUE)e); while (s < t) { if (*s & NONASCII_MASK) { t = s; @@ -619,10 +619,63 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) return c; } +long +rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) +{ + long c; + const char *q; + int ret; + + *cr = 0; + if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { + return (e - p) / rb_enc_mbminlen(enc); + } + else if (rb_enc_asciicompat(enc)) { + c = 0; + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) { + return c + (e - p); + } + c += q - p; + p = q; + } + ret = rb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(ret)) { + *cr |= ENC_CODERANGE_VALID; + p += MBCLEN_CHARFOUND_LEN(ret); + } + else { + *cr = ENC_CODERANGE_BROKEN; + p++; + } + c++; + } + if (!*cr) *cr = ENC_CODERANGE_7BIT; + return c; + } + + for (c=0; p