From f81b6694ed7d004054e0d11e6362769461af8905 Mon Sep 17 00:00:00 2001 From: usa Date: Tue, 1 Dec 2015 08:00:58 +0000 Subject: merge revision(s) 51583,51594,51638: [Backport #11444] * io.c (rb_io_each_codepoint): read more data when read partially. [ruby-core:70379] [Bug #11444] * io.c (rb_io_each_codepoint): raise an exception at incomplete character before EOF when conversion takes place. [Bug #11444] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@52831 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- io.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'io.c') diff --git a/io.c b/io.c index 249218d2c5..bf859578cc 100644 --- a/io.c +++ b/io.c @@ -3630,6 +3630,7 @@ rb_io_each_codepoint(VALUE io) READ_CHECK(fptr); if (NEED_READCONV(fptr)) { SET_BINARY_MODE(fptr); + r = 1; /* no invalid char yet */ for (;;) { make_readconv(fptr, 0); for (;;) { @@ -3648,13 +3649,16 @@ rb_io_each_codepoint(VALUE io) } if (more_char(fptr) == MORE_CHAR_FINISHED) { clear_readconv(fptr); - /* ignore an incomplete character before EOF */ + if (!MBCLEN_CHARFOUND_P(r)) { + enc = fptr->encs.enc; + goto invalid; + } return io; } } if (MBCLEN_INVALID_P(r)) { - rb_raise(rb_eArgError, "invalid byte sequence in %s", - rb_enc_name(fptr->encs.enc)); + enc = fptr->encs.enc; + goto invalid; } n = MBCLEN_CHARFOUND_LEN(r); if (fptr->encs.enc) { @@ -3684,8 +3688,25 @@ rb_io_each_codepoint(VALUE io) rb_yield(UINT2NUM(c)); } else if (MBCLEN_INVALID_P(r)) { + invalid: rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); } + else if (MBCLEN_NEEDMORE_P(r)) { + char cbuf[8], *p = cbuf; + int more = MBCLEN_NEEDMORE_LEN(r); + if (more > numberof(cbuf)) goto invalid; + more += n = fptr->rbuf.len; + if (more > numberof(cbuf)) goto invalid; + while ((n = (int)read_buffered_data(p, more, fptr)) > 0 && + (p += n, (more -= n) > 0)) { + if (io_fillbuf(fptr) < 0) goto invalid; + if ((n = fptr->rbuf.len) > more) n = more; + } + r = rb_enc_precise_mbclen(cbuf, p, enc); + if (!MBCLEN_CHARFOUND_P(r)) goto invalid; + c = rb_enc_codepoint(cbuf, p, enc); + rb_yield(UINT2NUM(c)); + } else { continue; } -- cgit v1.2.3