summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog30
-rw-r--r--gc.c4
-rw-r--r--include/ruby/encoding.h5
-rw-r--r--include/ruby/io.h8
-rw-r--r--io.c150
-rw-r--r--test/ruby/test_io_m17n.rb45
-rw-r--r--transcode.c72
7 files changed, 297 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index d2f166aa56..9ead4833be 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,33 @@
+Mon Aug 18 21:02:08 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/io.h (rb_io_t): new fields: writeconv,
+ writeconv_stateless and writeconv_initialized.
+ (MakeOpenFile): initialize them.
+
+ * include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
+ (rb_econv_string): declared.
+
+ * io.c (make_writeconv): new function.
+ (io_fwrite): use econv.
+ (make_readconv): fix error message.
+ (finish_writeconv): new function.
+ (fptr_finalize): call finish_writeconv.
+ (clear_writeconv): new function.
+ (clear_codeconv): new function to call both clear_readconv and
+ clear_writeconv.
+ (rb_io_fptr_finalize): call clear_codeconv instead of
+ clear_readconv.
+ (mode_enc): ditto.
+ (io_set_encoding): ditto.
+ (argf_next_argv): ditto.
+ (io_encoding_set): ditto.
+
+ * gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.
+
+ * transcode.c (stateless_encoding_i): new function.
+ (rb_econv_stateless_encoding): ditto.
+ (rb_econv_string): ditto.
+
Mon Aug 18 17:23:38 2008 Tanaka Akira <akr@fsij.org>
* io.c (clear_readconv): extracted from rb_io_fptr_finalize.
diff --git a/gc.c b/gc.c
index 2bfb27d355..551437ac44 100644
--- a/gc.c
+++ b/gc.c
@@ -1507,8 +1507,10 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
break;
case T_FILE:
- if (obj->as.file.fptr)
+ if (obj->as.file.fptr) {
gc_mark(objspace, obj->as.file.fptr->tied_io_for_writing, lev);
+ gc_mark(objspace, obj->as.file.fptr->writeconv_stateless, lev);
+ }
break;
case T_REGEXP:
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index c859c50559..6c443d6f0d 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -268,6 +268,11 @@ void rb_econv_check_error(rb_econv_t *ec);
int rb_econv_putbackable(rb_econv_t *ec);
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
+/* returns corresponding stateless encoding, or NULL if not stateful. */
+const char *rb_econv_stateless_encoding(const char *stateful_enc);
+
+VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
+
/* flags for rb_econv_open */
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100
#define ECONV_CRLF_NEWLINE_ENCODER 0x200
diff --git a/include/ruby/io.h b/include/ruby/io.h
index 2a2b991a6d..32830aaaea 100644
--- a/include/ruby/io.h
+++ b/include/ruby/io.h
@@ -63,6 +63,11 @@ typedef struct rb_io_t {
int crbuf_off;
int crbuf_len;
int crbuf_capa;
+
+ rb_econv_t *writeconv;
+ VALUE writeconv_stateless;
+ int writeconv_initialized;
+
} rb_io_t;
#define HAVE_RB_IO_T 1
@@ -110,6 +115,9 @@ typedef struct rb_io_t {
fp->crbuf_off = 0;\
fp->crbuf_len = 0;\
fp->crbuf_capa = 0;\
+ fp->writeconv = NULL;\
+ fp->writeconv_stateless = Qnil;\
+ fp->writeconv_initialized = 0;\
fp->tied_io_for_writing = 0;\
fp->enc = 0;\
fp->enc2 = 0;\
diff --git a/io.c b/io.c
index 92db7be8e2..aae3e433cb 100644
--- a/io.c
+++ b/io.c
@@ -689,6 +689,38 @@ rb_io_wait_writable(int f)
}
}
+static void
+make_writeconv(rb_io_t *fptr)
+{
+ if (!fptr->writeconv_initialized) {
+ const char *senc, *denc;
+ fptr->writeconv_stateless = Qnil;
+ if (fptr->enc2) {
+ senc = fptr->enc->name;
+ denc = fptr->enc2->name;
+ }
+ else {
+ senc = rb_econv_stateless_encoding(fptr->enc->name);
+ if (senc) {
+ denc = fptr->enc->name;
+ fptr->writeconv_stateless = rb_str_new2(senc);
+ }
+ else {
+ denc = NULL;
+ }
+ }
+ if (senc) {
+ fptr->writeconv = rb_econv_open(senc, denc, 0);
+ if (!fptr->writeconv)
+ rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
+ }
+ else {
+ fptr->writeconv = NULL;
+ }
+ fptr->writeconv_initialized = 1;
+ }
+}
+
/* writing functions */
static long
io_fwrite(VALUE str, rb_io_t *fptr)
@@ -701,17 +733,18 @@ io_fwrite(VALUE str, rb_io_t *fptr)
* We must also transcode if two encodings were specified
*/
if (fptr->enc) {
- /* transcode str before output */
- /* the methods in transcode.c are static, so call indirectly */
- /* Can't use encode! because puts writes a frozen newline */
+ make_writeconv(fptr);
if (fptr->enc2) {
- str = rb_funcall(str, id_encode, 2,
- rb_enc_from_encoding(fptr->enc2),
- rb_enc_from_encoding(fptr->enc));
+ str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
}
else {
- str = rb_funcall(str, id_encode, 1,
- rb_enc_from_encoding(fptr->enc));
+ if (fptr->writeconv) {
+ str = rb_str_transcode(str, fptr->writeconv_stateless);
+ str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
+ }
+ else {
+ str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
+ }
}
}
@@ -1394,7 +1427,7 @@ make_readconv(rb_io_t *fptr)
if (!fptr->readconv) {
fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
if (!fptr->readconv)
- rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+ rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
fptr->crbuf_off = 0;
fptr->crbuf_len = 0;
fptr->crbuf_capa = 1024;
@@ -2845,9 +2878,77 @@ rb_io_set_close_on_exec(VALUE io, VALUE arg)
#define PREP_STDIO_NAME(f) ((f)->path)
static void
+finish_writeconv(rb_io_t *fptr, int noraise)
+{
+ unsigned char *ds, *dp, *de;
+ rb_econv_result_t res;
+
+ if (!fptr->wbuf) {
+ unsigned char buf[1024];
+ int r;
+
+ res = econv_destination_buffer_full;
+ while (res == econv_destination_buffer_full) {
+ ds = dp = buf;
+ de = buf + sizeof(buf);
+ res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
+ while (dp-ds) {
+retry:
+ r = rb_write_internal(fptr->fd, ds, dp-ds);
+ if (r == dp-ds)
+ break;
+ if (0 <= r) {
+ ds += r;
+ }
+ if (rb_io_wait_writable(fptr->fd)) {
+ if (!noraise)
+ rb_io_check_closed(fptr);
+ else if (fptr->fd < 0)
+ return;
+ goto retry;
+ }
+ return;
+ }
+ if (!noraise) {
+ rb_econv_check_error(fptr->writeconv);
+ }
+ if (res == econv_invalid_byte_sequence ||
+ res == econv_undefined_conversion) {
+ break;
+ }
+ }
+
+ return;
+ }
+
+ res = econv_destination_buffer_full;
+ while (res == econv_destination_buffer_full) {
+ if (fptr->wbuf_len == fptr->wbuf_capa) {
+ io_fflush(fptr);
+ }
+
+ ds = dp = (unsigned char *)fptr->wbuf + fptr->wbuf_off + fptr->wbuf_len;
+ de = (unsigned char *)fptr->wbuf + fptr->wbuf_capa;
+ res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
+ fptr->wbuf_len += dp - ds;
+ if (!noraise) {
+ rb_econv_check_error(fptr->writeconv);
+ }
+ if (res == econv_invalid_byte_sequence ||
+ res == econv_undefined_conversion) {
+ break;
+ }
+ }
+
+}
+
+static void
fptr_finalize(rb_io_t *fptr, int noraise)
{
int ebadf = 0;
+ if (fptr->writeconv) {
+ finish_writeconv(fptr, noraise);
+ }
if (fptr->wbuf_len) {
io_fflush(fptr);
}
@@ -2907,6 +3008,23 @@ clear_readconv(rb_io_t *fptr)
}
}
+static void
+clear_writeconv(rb_io_t *fptr)
+{
+ if (fptr->writeconv) {
+ rb_econv_close(fptr->writeconv);
+ fptr->writeconv = NULL;
+ }
+ fptr->writeconv_initialized = 0;
+}
+
+static void
+clear_codeconv(rb_io_t *fptr)
+{
+ clear_readconv(fptr);
+ clear_writeconv(fptr);
+}
+
int
rb_io_fptr_finalize(rb_io_t *fptr)
{
@@ -2926,7 +3044,7 @@ rb_io_fptr_finalize(rb_io_t *fptr)
free(fptr->wbuf);
fptr->wbuf = 0;
}
- clear_readconv(fptr);
+ clear_codeconv(fptr);
free(fptr);
return 1;
}
@@ -3535,7 +3653,7 @@ mode_enc(rb_io_t *fptr, const char *estr)
fptr->enc = 0;
fptr->enc2 = 0;
- clear_readconv(fptr);
+ clear_codeconv(fptr);
p0 = strrchr(estr, ':');
if (!p0) p1 = estr;
@@ -4265,7 +4383,7 @@ io_set_encoding(VALUE io, VALUE opt)
GetOpenFile(io, fptr);
fptr->enc = 0;
fptr->enc2 = 0;
- clear_readconv(fptr);
+ clear_codeconv(fptr);
if (!NIL_P(encoding)) {
rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
RSTRING_PTR(encoding));
@@ -5612,7 +5730,7 @@ argf_next_argv(VALUE argf)
GetOpenFile(current_file, fptr);
fptr->enc = argf_enc;
fptr->enc2 = argf_enc2;
- clear_readconv(fptr);
+ clear_codeconv(fptr);
}
}
else {
@@ -6340,13 +6458,13 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
if (argc == 2) {
fptr->enc2 = rb_to_encoding(v1);
fptr->enc = rb_to_encoding(v2);
- clear_readconv(fptr);
+ clear_codeconv(fptr);
}
else if (argc == 1) {
if (NIL_P(v1)) {
fptr->enc = 0;
fptr->enc2 = 0;
- clear_readconv(fptr);
+ clear_codeconv(fptr);
}
else {
VALUE tmp = rb_check_string_type(v1);
@@ -6356,7 +6474,7 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
else {
fptr->enc = rb_to_encoding(v1);
fptr->enc2 = 0;
- clear_readconv(fptr);
+ clear_codeconv(fptr);
}
}
}
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index e982722cfe..070987ad3c 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -601,5 +601,50 @@ EOT
}
end
+ def test_write_conversion_fixenc
+ with_pipe {|r, w|
+ w.set_encoding("iso-2022-jp:utf-8")
+ t = Thread.new { r.read.force_encoding("ascii-8bit") }
+ w << "\u3042"
+ w << "\u3044"
+ w.close
+ assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+ }
+ end
+
+ def test_write_conversion_anyenc_stateful
+ with_pipe {|r, w|
+ w.set_encoding("iso-2022-jp")
+ t = Thread.new { r.read.force_encoding("ascii-8bit") }
+ w << "\u3042"
+ w << "\x82\xa2".force_encoding("sjis")
+ w.close
+ assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+ }
+ end
+
+ def test_write_conversion_anyenc_stateless
+ with_pipe {|r, w|
+ w.set_encoding("euc-jp")
+ t = Thread.new { r.read.force_encoding("ascii-8bit") }
+ w << "\u3042"
+ w << "\x82\xa2".force_encoding("sjis")
+ w.close
+ assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
+ }
+ end
+
+ def test_write_conversion_anyenc_stateful_nosync
+ with_pipe {|r, w|
+ w.sync = false
+ w.set_encoding("iso-2022-jp")
+ t = Thread.new { r.read.force_encoding("ascii-8bit") }
+ w << "\u3042"
+ w << "\x82\xa2".force_encoding("sjis")
+ w.close
+ assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+ }
+ end
+
end
diff --git a/transcode.c b/transcode.c
index 33b1c7fc96..6ef4e84040 100644
--- a/transcode.c
+++ b/transcode.c
@@ -1219,6 +1219,78 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
tc->readagain_len -= n;
}
+struct stateless_encoding_t {
+ const char *stateless_enc;
+ const char *stateful_enc;
+};
+
+static int
+stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
+{
+ struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
+ st_table *table2 = (st_table *)val;
+ st_data_t v;
+
+ if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
+ transcoder_entry_t *entry = (transcoder_entry_t *)v;
+ const rb_transcoder *tr = load_transcoder_entry(entry);
+ if (tr && tr->stateful_type == stateful_encoder) {
+ data->stateless_enc = tr->from_encoding;
+ return ST_STOP;
+ }
+ }
+ return ST_CONTINUE;
+}
+
+const char *
+rb_econv_stateless_encoding(const char *stateful_enc)
+{
+ struct stateless_encoding_t data;
+ data.stateful_enc = stateful_enc;
+ data.stateless_enc = NULL;
+ st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
+ if (data.stateless_enc)
+ return data.stateless_enc;
+ return NULL;
+}
+
+VALUE
+rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
+{
+ unsigned const char *ss, *sp, *se;
+ unsigned char *ds, *dp, *de;
+ rb_econv_result_t res;
+
+ if (NIL_P(dst)) {
+ dst = rb_str_buf_new(len);
+ }
+
+ res = econv_destination_buffer_full;
+ while (res == econv_destination_buffer_full) {
+ long dlen = RSTRING_LEN(dst);
+ int max_output = ec->last_tc->transcoder->max_output;
+ if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
+ unsigned long new_capa = (unsigned long)dlen + len + max_output;
+ if (LONG_MAX < new_capa)
+ rb_raise(rb_eArgError, "too long string");
+ rb_str_resize(dst, new_capa);
+ rb_str_set_len(dst, dlen);
+ }
+ ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
+ se = ss + len;
+ ds = dp = (unsigned char *)RSTRING_PTR(dst) + dlen;
+ de = ds + rb_str_capacity(dst);
+ res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
+ off += sp - ss;
+ len -= sp - ss;
+ rb_str_set_len(dst, dlen + (dp - ds));
+ rb_econv_check_error(ec);
+ }
+
+ return dst;
+}
+
+
static VALUE
make_econv_exception(rb_econv_t *ec)
{