summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-01 08:00:58 +0000
committerusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-01 08:00:58 +0000
commitf81b6694ed7d004054e0d11e6362769461af8905 (patch)
tree9e26915a499d24800a65c8b401ec118addc8981b
parenta1171a95b2b6f6d7e90d5dc50911ec12c02cb96d (diff)
merge revision(s) 51583,51594,51638: [Backport #11444]
* io.c (rb_io_each_codepoint): read more data when read partially. [ruby-core:70379] [Bug #11444] * io.c (rb_io_each_codepoint): raise an exception at incomplete character before EOF when conversion takes place. [Bug #11444] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@52831 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--NEWS2
-rw-r--r--io.c27
-rw-r--r--test/ruby/test_io_m17n.rb39
-rw-r--r--version.h2
5 files changed, 76 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 2cf8194d9e..a57b69fd60 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Tue Dec 1 16:57:57 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (rb_io_each_codepoint): raise an exception at incomplete
+ character before EOF when conversion takes place. [Bug #11444]
+
+Tue Dec 1 16:57:57 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (rb_io_each_codepoint): read more data when read partially.
+ [ruby-core:70379] [Bug #11444]
+
Tue Dec 1 13:06:29 2015 NAKAMURA Usaku <usa@ruby-lang.org>
* ext/digest/sha1/sha1ossl.c: fixed build error introduced at r52797.
diff --git a/NEWS b/NEWS
index afd15faf3b..51503ebcc0 100644
--- a/NEWS
+++ b/NEWS
@@ -148,6 +148,8 @@ with all sufficient information, see the ChangeLog file.
* IO
* incompatible changes:
* open ignore internal encoding if external encoding is ASCII-8BIT.
+ * IO#each_codepoint raises an exception at incomplete character
+ before EOF when conversion takes place. [Bug #11444]
* Kernel#eval, Kernel#instance_eval, and Module#module_eval.
* Copies the scope information of the original environment, which means
diff --git a/io.c b/io.c
index 249218d2c5..bf859578cc 100644
--- a/io.c
+++ b/io.c
@@ -3630,6 +3630,7 @@ rb_io_each_codepoint(VALUE io)
READ_CHECK(fptr);
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
+ r = 1; /* no invalid char yet */
for (;;) {
make_readconv(fptr, 0);
for (;;) {
@@ -3648,13 +3649,16 @@ rb_io_each_codepoint(VALUE io)
}
if (more_char(fptr) == MORE_CHAR_FINISHED) {
clear_readconv(fptr);
- /* ignore an incomplete character before EOF */
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ enc = fptr->encs.enc;
+ goto invalid;
+ }
return io;
}
}
if (MBCLEN_INVALID_P(r)) {
- rb_raise(rb_eArgError, "invalid byte sequence in %s",
- rb_enc_name(fptr->encs.enc));
+ enc = fptr->encs.enc;
+ goto invalid;
}
n = MBCLEN_CHARFOUND_LEN(r);
if (fptr->encs.enc) {
@@ -3684,8 +3688,25 @@ rb_io_each_codepoint(VALUE io)
rb_yield(UINT2NUM(c));
}
else if (MBCLEN_INVALID_P(r)) {
+ invalid:
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
}
+ else if (MBCLEN_NEEDMORE_P(r)) {
+ char cbuf[8], *p = cbuf;
+ int more = MBCLEN_NEEDMORE_LEN(r);
+ if (more > numberof(cbuf)) goto invalid;
+ more += n = fptr->rbuf.len;
+ if (more > numberof(cbuf)) goto invalid;
+ while ((n = (int)read_buffered_data(p, more, fptr)) > 0 &&
+ (p += n, (more -= n) > 0)) {
+ if (io_fillbuf(fptr) < 0) goto invalid;
+ if ((n = fptr->rbuf.len) > more) n = more;
+ }
+ r = rb_enc_precise_mbclen(cbuf, p, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) goto invalid;
+ c = rb_enc_codepoint(cbuf, p, enc);
+ rb_yield(UINT2NUM(c));
+ }
else {
continue;
}
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 0b435dd8c1..5fbf056cd9 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -1,6 +1,7 @@
# coding: US-ASCII
require 'test/unit'
require 'tmpdir'
+require 'tempfile'
require 'timeout'
require_relative 'envutil'
@@ -2535,4 +2536,42 @@ EOT
end
}
end if /mswin|mingw/ =~ RUBY_PLATFORM
+
+ def test_each_codepoint_need_more
+ bug11444 = '[ruby-core:70379] [Bug #11444]'
+ tests = [
+ ["incomplete multibyte", "\u{1f376}".b[0,3], [], ["invalid byte sequence in UTF-8"]],
+ ["multibyte at boundary", "x"*8190+"\u{1f376}", ["1f376"], []],
+ ]
+ failure = []
+ ["bin", "text"].product(tests) do |mode, (test, data, out, err)|
+ code = <<-"end;"
+ c = nil
+ begin
+ open(ARGV[0], "r#{mode[0]}:utf-8") do |f|
+ f.each_codepoint{|i| c = i}
+ end
+ rescue ArgumentError => e
+ STDERR.puts e.message
+ else
+ printf "%x", c
+ end
+ end;
+ Tempfile.create("codepoint") do |f|
+ args = ['-e', code, f.path]
+ f.print data
+ f.close
+ begin
+ assert_in_out_err(args, "", out, err,
+ "#{bug11444}: #{test} in #{mode} mode",
+ timeout: 1)
+ rescue Exception => e
+ failure << e
+ end
+ end
+ end
+ unless failure.empty?
+ flunk failure.join("\n---\n")
+ end
+ end
end
diff --git a/version.h b/version.h
index 786445fb49..def22204e8 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.1.8"
#define RUBY_RELEASE_DATE "2015-12-01"
-#define RUBY_PATCHLEVEL 429
+#define RUBY_PATCHLEVEL 430
#define RUBY_RELEASE_YEAR 2015
#define RUBY_RELEASE_MONTH 12