summaryrefslogtreecommitdiff
path: root/io.c
diff options
context:
space:
mode:
authorluislavena <luislavena@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-12-04 01:10:06 +0000
committerluislavena <luislavena@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-12-04 01:10:06 +0000
commitf9a6a1dd0c687dfc6e63e5d20bc3812416def301 (patch)
tree2f77668d636ebbc6fd4deeb05dae91d97f71b38c /io.c
parent4a1cfe70dc31f9be8a12908d85b51297d61b8eae (diff)
Introduce NEED_READCONV and NEED_WRITECONV to replace universal newline decorator
Use CRLF only when required to improve file reading and writing under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@33937 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'io.c')
-rw-r--r--io.c133
1 files changed, 126 insertions, 7 deletions
diff --git a/io.c b/io.c
index 986e4dc084..d5ed5afd57 100644
--- a/io.c
+++ b/io.c
@@ -380,12 +380,66 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd)
/* Windows */
# define DEFAULT_TEXTMODE FMODE_TEXTMODE
# define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR
+/*
+ * CRLF newline is set as default newline decorator.
+ * If only CRLF newline conversion is needed, we use binary IO process
+ * with OS's text mode for IO performance improvement.
+ * If encoding conversion is needed or a user sets text mode, we use encoding
+ * conversion IO process and universal newline decorator by default.
+ */
+#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || (fptr)->encs.ecflags & ~ECONV_CRLF_NEWLINE_DECORATOR)
+#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || ((fptr)->encs.ecflags & ((ECONV_DECORATOR_MASK & ~ECONV_CRLF_NEWLINE_DECORATOR)|ECONV_STATEFUL_DECORATOR_MASK)))
+#define SET_BINARY_MODE(fptr) setmode((fptr)->fd, O_BINARY)
+
+#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) do {\
+ if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {\
+ if (((fptr)->mode & FMODE_READABLE) &&\
+ !((fptr)->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {\
+ setmode((fptr)->fd, O_BINARY);\
+ }\
+ else {\
+ setmode((fptr)->fd, O_TEXT);\
+ }\
+ }\
+} while(0)
+
+#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) do {\
+ if ((enc2) && ((ecflags) & ECONV_DEFAULT_NEWLINE_DECORATOR)) {\
+ (ecflags) |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;\
+ }\
+} while(0)
+/*
+ * We use io_seek to back cursor position when changing mode from text to binary,
+ * but stdin and pipe cannot seek back. Stdin and pipe read should use encoding
+ * conversion for working properly with mode change.
+ */
+#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) do {\
+ if ((fptr)->rbuf.len > 0 && !((fptr)->mode & FMODE_DUPLEX)) {\
+ off_t r;\
+ errno = 0;\
+ r = io_seek((fptr), -(fptr)->rbuf.len, SEEK_CUR);\
+ if (r < 0 && errno) {\
+ if (errno == ESPIPE)\
+ (fptr)->mode |= FMODE_DUPLEX;\
+ }\
+ else {\
+ (fptr)->rbuf.off = 0;\
+ (fptr)->rbuf.len = 0;\
+ }\
+ }\
+ setmode((fptr)->fd, O_BINARY);\
+} while(0)
+
#else
/* Unix */
# define DEFAULT_TEXTMODE 0
-#endif
#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr))
#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK)))
+#define SET_BINARY_MODE(fptr) 0
+#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) 0
+#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) 0
+#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) 0
+#endif
#if !defined HAVE_SHUTDOWN && !defined shutdown
#define shutdown(a,b) 0
@@ -1051,6 +1105,7 @@ do_writeconv(VALUE str, rb_io_t *fptr)
{
if (NEED_WRITECONV(fptr)) {
VALUE common_encoding = Qnil;
+ SET_BINARY_MODE(fptr);
make_writeconv(fptr);
@@ -1080,6 +1135,20 @@ do_writeconv(VALUE str, rb_io_t *fptr)
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
}
}
+#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
+#define fmode (fptr->mode)
+ else if (MODE_BTMODE(DEFAULT_TEXTMODE,0,1)) {
+ if ((fptr->mode & FMODE_READABLE) &&
+ !(fptr->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {
+ setmode(fptr->fd, O_BINARY);
+ }
+ if (!rb_enc_asciicompat(rb_enc_get(str))) {
+ rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s",
+ rb_enc_name(rb_enc_get(str)));
+ }
+ }
+#undef fmode
+#endif
return str;
}
@@ -1992,6 +2061,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str)
int cr;
if (NEED_READCONV(fptr)) {
+ SET_BINARY_MODE(fptr);
io_setstrbuf(&str,0);
make_readconv(fptr, 0);
while (1) {
@@ -2013,6 +2083,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str)
}
}
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
bytes = 0;
pos = 0;
@@ -2438,6 +2509,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
long limit = *lp;
if (NEED_READCONV(fptr)) {
+ SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
do {
const char *p, *e;
@@ -2480,6 +2552,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
return EOF;
}
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
@@ -2518,6 +2591,7 @@ swallow(rb_io_t *fptr, int term)
if (NEED_READCONV(fptr)) {
rb_encoding *enc = io_read_encoding(fptr);
int needconv = rb_enc_mbminlen(enc) != 1;
+ SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
do {
size_t cnt;
@@ -2541,6 +2615,7 @@ swallow(rb_io_t *fptr, int term)
return FALSE;
}
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
do {
size_t cnt;
while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) {
@@ -2677,6 +2752,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
}
else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) &&
rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
return rb_io_getline_fast(fptr, enc, io);
}
else {
@@ -2686,6 +2762,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
int rspara = 0;
int extra_limit = 16;
+ SET_BINARY_MODE(fptr);
enc = io_read_encoding(fptr);
if (!NIL_P(rs)) {
@@ -3034,6 +3111,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
VALUE str = Qnil;
rb_encoding *read_enc = io_read_encoding(fptr);
+ SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
while (1) {
@@ -3078,6 +3156,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
return str;
}
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
if (io_fillbuf(fptr) < 0) {
return Qnil;
}
@@ -3192,6 +3271,7 @@ rb_io_each_codepoint(VALUE io)
READ_CHECK(fptr);
if (NEED_READCONV(fptr)) {
+ SET_BINARY_MODE(fptr);
for (;;) {
make_readconv(fptr, 0);
for (;;) {
@@ -3232,6 +3312,7 @@ rb_io_each_codepoint(VALUE io)
rb_yield(UINT2NUM(c));
}
}
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
enc = io_input_encoding(fptr);
for (;;) {
if (io_fillbuf(fptr) < 0) {
@@ -3435,6 +3516,7 @@ rb_io_ungetc(VALUE io, VALUE c)
SafeStringValue(c);
}
if (NEED_READCONV(fptr)) {
+ SET_BINARY_MODE(fptr);
len = RSTRING_LEN(c);
#if SIZEOF_LONG > SIZEOF_INT
if (len > INT_MAX)
@@ -3454,6 +3536,7 @@ rb_io_ungetc(VALUE io, VALUE c)
MEMMOVE(fptr->cbuf.ptr+fptr->cbuf.off, RSTRING_PTR(c), char, len);
}
else {
+ NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
io_ungetbyte(c, fptr);
}
return Qnil;
@@ -4162,6 +4245,14 @@ rb_io_binmode(VALUE io)
fptr->mode |= FMODE_BINMODE;
fptr->mode &= ~FMODE_TEXTMODE;
fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
+#ifdef O_BINARY
+ if (!fptr->readconv) {
+ SET_BINARY_MODE_WITH_SEEK_CUR(fptr);
+ }
+ else {
+ setmode(fptr->fd, O_BINARY);
+ }
+#endif
return io;
}
@@ -4181,6 +4272,7 @@ rb_io_ascii8bit_binmode(VALUE io)
}
fptr->mode |= FMODE_BINMODE;
fptr->mode &= ~FMODE_TEXTMODE;
+ SET_BINARY_MODE_WITH_SEEK_CUR(fptr);
fptr->encs.enc = rb_ascii8bit_encoding();
fptr->encs.enc2 = NULL;
@@ -4703,6 +4795,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
#endif
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
else {
@@ -4743,13 +4836,14 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
#endif
- ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags);
if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) {
if (has_enc) {
rb_raise(rb_eArgError, "encoding specified twice");
}
}
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
+ ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags);
}
validate_enc_binmode(&fmode, ecflags, enc, enc2);
@@ -4794,9 +4888,6 @@ rb_sysopen(VALUE fname, int oflags, mode_t perm)
int fd;
struct sysopen_struct data;
-#ifdef O_BINARY
- oflags |= O_BINARY;
-#endif
data.fname = rb_str_encode_ospath(fname);
data.oflags = oflags;
data.perm = perm;
@@ -5482,6 +5573,11 @@ pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode,
fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX;
if (convconfig) {
fptr->encs = *convconfig;
+#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
+ if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) {
+ fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
+ }
+#endif
}
else {
if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {
@@ -6673,6 +6769,9 @@ prep_stdio(FILE *f, int fmode, VALUE klass, const char *path)
fptr->encs.ecflags |= ECONV_DEFAULT_NEWLINE_DECORATOR;
#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE;
+ if (fmode & FMODE_READABLE) {
+ fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
+ }
#endif
fptr->stdio_file = f;
@@ -8527,22 +8626,26 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
}
else
enc = rb_to_encoding(v2);
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
else {
if (NIL_P(v1)) {
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
else {
tmp = rb_check_string_type(v1);
if (!NIL_P(tmp) && rb_enc_asciicompat(rb_enc_get(tmp))) {
parse_mode_enc(RSTRING_PTR(tmp), &enc, &enc2, NULL);
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
else {
rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2);
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
}
@@ -8661,13 +8764,22 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass)
extract_binmode(opt, &fmode);
#if DEFAULT_TEXTMODE
- if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE))
+ if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) {
fptr->mode &= ~FMODE_TEXTMODE;
+ setmode(fptr->fd, O_BINARY);
+ }
+#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
+ if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) {
+ fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
+ }
+#endif
#endif
fptr->mode |= fmode;
#if DEFAULT_TEXTMODE
- if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE))
+ if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) {
fptr2->mode &= ~FMODE_TEXTMODE;
+ setmode(fptr2->fd, O_BINARY);
+ }
#endif
fptr2->mode |= fmode;
@@ -9590,6 +9702,13 @@ copy_stream_body(VALUE arg)
}
stp->dst_fd = dst_fd;
+#ifdef O_BINARY
+ if (src_fptr)
+ SET_BINARY_MODE_WITH_SEEK_CUR(src_fptr);
+ if (dst_fptr)
+ setmode(dst_fd, O_BINARY);
+#endif
+
if (stp->src_offset == (off_t)-1 && src_fptr && src_fptr->rbuf.len) {
size_t len = src_fptr->rbuf.len;
VALUE str;