summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--include/ruby/encoding.h4
-rw-r--r--io.c7
-rw-r--r--test/ruby/test_io_m17n.rb10
-rw-r--r--transcode.c14
5 files changed, 44 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 6ab3a162bf..4c93c3ae0c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Aug 17 13:23:53 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/encoding.h (rb_econv_putbackable): declared.
+ (rb_econv_putback): ditto.
+
+ * transcode.c (rb_econv_putbackable): implemented.
+ (rb_econv_putback): ditto.
+
+ * io.c (io_getc): put back bytes if possible.
+
Sun Aug 17 12:00:18 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (make_econv_exception): add several instance variables
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 18d32d87c6..c859c50559 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -253,6 +253,7 @@ rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
int flags);
+void rb_econv_close(rb_econv_t *ec);
/* result: 0:success -1:failure */
int rb_econv_insert_output(rb_econv_t *ec,
@@ -264,7 +265,8 @@ const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec);
/* raise an error if the last rb_econv_convert is error */
void rb_econv_check_error(rb_econv_t *ec);
-void rb_econv_close(rb_econv_t *ec);
+int rb_econv_putbackable(rb_econv_t *ec);
+void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
/* flags for rb_econv_open */
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100
diff --git a/io.c b/io.c
index 169beeaa3f..eb8c808a3e 100644
--- a/io.c
+++ b/io.c
@@ -2308,6 +2308,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
const unsigned char *ss, *sp, *se;
unsigned char *ds, *dp, *de;
rb_econv_result_t res;
+ int putbackable;
if (fptr->crbuf_len) {
r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
if (!MBCLEN_NEEDMORE_P(r))
@@ -2332,6 +2333,12 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
fptr->rbuf_off += sp - ss;
fptr->rbuf_len -= sp - ss;
fptr->crbuf_len += dp - ds;
+ putbackable = rb_econv_putbackable(fptr->readconv);
+ if (putbackable) {
+ rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable);
+ fptr->rbuf_off -= putbackable;
+ fptr->rbuf_len += putbackable;
+ }
rb_econv_check_error(fptr->readconv);
}
if (MBCLEN_INVALID_P(r)) {
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 6d7b4f7f9b..6cec7a05bb 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -216,6 +216,16 @@ EOT
}
end
+ def test_getc_invalid
+ with_pipe("euc-jp:utf-8") {|r, w|
+ w << "\xa1xyz"
+ w.close
+ err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
+ assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
+ assert_equal("xyz", r.read(10))
+ }
+ end
+
def test_getc_stateful_conversion
with_tmpdir {
src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
diff --git a/transcode.c b/transcode.c
index ed16eb3b0d..35547d1007 100644
--- a/transcode.c
+++ b/transcode.c
@@ -1202,6 +1202,20 @@ rb_econv_close(rb_econv_t *ec)
xfree(ec);
}
+int
+rb_econv_putbackable(rb_econv_t *ec)
+{
+ return ec->elems[0].tc->readagain_len;
+}
+
+void
+rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
+{
+ rb_transcoding *tc = ec->elems[0].tc;
+ memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len, n);
+ tc->readagain_len -= n;
+}
+
static VALUE
make_econv_exception(rb_econv_t *ec)
{