summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-27 15:19:22 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-27 15:19:22 +0000
commit4dd9fd71b34cd21613404a2900085868dfac5cd4 (patch)
treea47739c65aba4e2205cedbfc7c412d2c72fd171a /string.c
parent9d014dc2546bdcc9bf899e734e7df73ae5735a2b (diff)
* string.c (rb_str_coderange_scan_restartable): coderange scaning
for partial read. * io.c (read_all): set coderange when not convert encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15617 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/string.c b/string.c
index 596459d919..26707ed0c5 100644
--- a/string.c
+++ b/string.c
@@ -201,6 +201,61 @@ coderange_scan(const char *p, long len, rb_encoding *enc)
return ENC_CODERANGE_VALID;
}
+long
+rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
+{
+ long c;
+ const char *p;
+ int ret;
+ p = s;
+
+ if (*cr == ENC_CODERANGE_BROKEN)
+ return e - s;
+
+ if (rb_enc_to_index(enc) == 0) {
+ /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
+ p = search_nonascii(p, e);
+ *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
+ return e - s;
+ }
+ else if (rb_enc_asciicompat(enc)) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
+ return e - s;
+ }
+ while (p < e) {
+ int ret = rb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ret;
+ return e - s;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ if (p < e) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ *cr = ENC_CODERANGE_VALID;
+ return e - s;
+ }
+ }
+ }
+ *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
+ return p - s;
+ }
+ else {
+ while (p < e) {
+ int ret = rb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ret;
+ return p - s;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
+ return p - s;
+ }
+}
+
static void
rb_enc_str_copy(VALUE dest, VALUE src)
{