diff options
Diffstat (limited to 'encoding.c')
-rw-r--r-- | encoding.c | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/encoding.c b/encoding.c index 53ceac851d..540aa88701 100644 --- a/encoding.c +++ b/encoding.c @@ -459,7 +459,6 @@ rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) for (c=0; p<e && nth--; c++) { int n = rb_enc_mbclen(p, e, enc); - if (n == 0) return 0; p += n; } } @@ -478,7 +477,6 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) for (c=0; p<e; c++) { int n = rb_enc_mbclen(p, e, enc); - if (n == 0) return -1; p += n; } return c; @@ -487,19 +485,39 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { - int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); - if (n == 0) { - rb_raise(rb_eArgError, "invalid mbstring sequence"); - } - return n; + int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (MBCLEN_CHARFOUND(n)) + return n; + else + return 1; } int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) { + if (e <= p) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); return ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); } +int rb_enc_get_ascii(const char *p, const char *e, rb_encoding *enc) +{ + int c, l; + if (e <= p) + return -1; + if (rb_enc_asciicompat(enc)) { + c = (unsigned char)*p; + return ISASCII(c) ? c : -1; + } + l = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND(l)) + return -1; + c = rb_enc_codepoint(p, e, enc); + if (rb_enc_isascii(c, enc)) + return c; + return -1; +} + int rb_enc_codelen(int c, rb_encoding *enc) { |