diff options
Diffstat (limited to 'io.c')
| -rw-r--r-- | io.c | 1508 |
1 files changed, 953 insertions, 555 deletions
@@ -104,6 +104,16 @@ #ifdef HAVE_COPYFILE_H # include <copyfile.h> + +# ifndef COPYFILE_STATE_COPIED +/* + * Some OSes (e.g., OSX < 10.6) implement fcopyfile() but not + * COPYFILE_STATE_COPIED. Since the only use of the former here + * requires the latter, we disable the former when the latter is undefined. + */ +# undef HAVE_FCOPYFILE +# endif + #endif #include "ruby/internal/stdbool.h" @@ -112,6 +122,7 @@ #include "encindex.h" #include "id.h" #include "internal.h" +#include "internal/class.h" #include "internal/encoding.h" #include "internal/error.h" #include "internal/inits.h" @@ -209,7 +220,18 @@ static VALUE sym_DATA; static VALUE sym_HOLE; #endif -static VALUE prep_io(int fd, int fmode, VALUE klass, const char *path); +static VALUE prep_io(int fd, enum rb_io_mode fmode, VALUE klass, const char *path); + +VALUE +rb_io_blocking_region_wait(struct rb_io *io, rb_blocking_function_t *function, void *argument, enum rb_io_event events) +{ + return rb_thread_io_blocking_call(io, function, argument, events); +} + +VALUE rb_io_blocking_region(struct rb_io *io, rb_blocking_function_t *function, void *argument) +{ + return rb_io_blocking_region_wait(io, function, argument, 0); +} struct argf { VALUE filename, current_file; @@ -478,6 +500,7 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd) #define argf_of(obj) (*(struct argf *)DATA_PTR(obj)) #define ARGF argf_of(argf) +#define ARGF_SET(field, value) RB_OBJ_WRITE(argf, &ARGF.field, value) #define GetWriteIO(io) rb_io_get_write_io(io) @@ -519,7 +542,8 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd) #endif static int io_fflush(rb_io_t *); -static rb_io_t *flush_before_seek(rb_io_t *fptr); +static rb_io_t *flush_before_seek(rb_io_t *fptr, bool discard_rbuf); +static void clear_codeconv(rb_io_t *fptr); #define FMODE_SIGNAL_ON_EPIPE (1<<17) @@ -533,10 +557,12 @@ static rb_io_t *flush_before_seek(rb_io_t *fptr); extern ID ruby_static_id_signo; -NORETURN(static void raise_on_write(rb_io_t *fptr, int e, VALUE errinfo)); +NORETURN(static void rb_sys_fail_on_write(rb_io_t *fptr)); static void -raise_on_write(rb_io_t *fptr, int e, VALUE errinfo) +rb_sys_fail_on_write(rb_io_t *fptr) { + int e = errno; + VALUE errinfo = rb_syserr_new_path(e, (fptr)->pathv); #if defined EPIPE if (fptr_signal_on_epipe(fptr) && (e == EPIPE)) { const VALUE sig = @@ -550,12 +576,6 @@ raise_on_write(rb_io_t *fptr, int e, VALUE errinfo) rb_exc_raise(errinfo); } -#define rb_sys_fail_on_write(fptr) \ - do { \ - int e = errno; \ - raise_on_write(fptr, e, rb_syserr_new_path(e, (fptr)->pathv)); \ - } while (0) - #define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE) #define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) ((fptr)->mode & FMODE_TEXTMODE) #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) @@ -608,7 +628,7 @@ raise_on_write(rb_io_t *fptr, int e, VALUE errinfo) * IO unread with taking care of removed '\r' in text mode. */ static void -io_unread(rb_io_t *fptr) +io_unread(rb_io_t *fptr, bool discard_rbuf) { rb_off_t r, pos; ssize_t read_size; @@ -629,19 +649,17 @@ io_unread(rb_io_t *fptr) if (r < 0 && errno) { if (errno == ESPIPE) fptr->mode |= FMODE_DUPLEX; - return; + if (!discard_rbuf) return; } - fptr->rbuf.off = 0; - fptr->rbuf.len = 0; - return; + goto end; } pos = lseek(fptr->fd, 0, SEEK_CUR); if (pos < 0 && errno) { if (errno == ESPIPE) fptr->mode |= FMODE_DUPLEX; - return; + if (!discard_rbuf) goto end; } /* add extra offset for removed '\r' in rbuf */ @@ -682,8 +700,10 @@ io_unread(rb_io_t *fptr) } } free(buf); + end: fptr->rbuf.off = 0; fptr->rbuf.len = 0; + clear_codeconv(fptr); return; } @@ -702,7 +722,7 @@ set_binary_mode_with_seek_cur(rb_io_t *fptr) if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) { return setmode(fptr->fd, O_BINARY); } - flush_before_seek(fptr); + flush_before_seek(fptr, false); return setmode(fptr->fd, O_BINARY); } #define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) set_binary_mode_with_seek_cur(fptr) @@ -898,7 +918,7 @@ rb_io_s_try_convert(VALUE dummy, VALUE io) #if !RUBY_CRLF_ENVIRONMENT static void -io_unread(rb_io_t *fptr) +io_unread(rb_io_t *fptr, bool discard_rbuf) { rb_off_t r; rb_io_check_closed(fptr); @@ -910,10 +930,11 @@ io_unread(rb_io_t *fptr) if (r < 0 && errno) { if (errno == ESPIPE) fptr->mode |= FMODE_DUPLEX; - return; + if (!discard_rbuf) return; } fptr->rbuf.off = 0; fptr->rbuf.len = 0; + clear_codeconv(fptr); return; } #endif @@ -954,17 +975,17 @@ io_ungetbyte(VALUE str, rb_io_t *fptr) } static rb_io_t * -flush_before_seek(rb_io_t *fptr) +flush_before_seek(rb_io_t *fptr, bool discard_rbuf) { if (io_fflush(fptr) < 0) rb_sys_fail_on_write(fptr); - io_unread(fptr); + io_unread(fptr, discard_rbuf); errno = 0; return fptr; } -#define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr)->fd, (ofs), (whence))) -#define io_tell(fptr) lseek(flush_before_seek(fptr)->fd, 0, SEEK_CUR) +#define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr, true)->fd, (ofs), (whence))) +#define io_tell(fptr) lseek(flush_before_seek(fptr, false)->fd, 0, SEEK_CUR) #ifndef SEEK_CUR # define SEEK_SET 0 @@ -1032,7 +1053,7 @@ rb_io_check_writable(rb_io_t *fptr) rb_raise(rb_eIOError, "not opened for writing"); } if (fptr->rbuf.len) { - io_unread(fptr); + io_unread(fptr, true); } } @@ -1090,7 +1111,7 @@ ruby_dup(int orig) static VALUE io_alloc(VALUE klass) { - NEWOBJ_OF(io, struct RFile, klass, T_FILE, sizeof(struct RFile), 0); + UNPROTECTED_NEWOBJ_OF(io, struct RFile, klass, T_FILE, sizeof(struct RFile)); io->fptr = 0; @@ -1146,6 +1167,11 @@ static int nogvl_wait_for(VALUE th, rb_io_t *fptr, short events, struct timeval static inline int io_internal_wait(VALUE thread, rb_io_t *fptr, int error, int events, struct timeval *timeout) { + if (!timeout && rb_thread_mn_schedulable(thread)) { + RUBY_ASSERT(errno == EWOULDBLOCK || errno == EAGAIN); + return -1; + } + int ready = nogvl_wait_for(thread, fptr, events, timeout); if (ready > 0) { @@ -1156,8 +1182,15 @@ io_internal_wait(VALUE thread, rb_io_t *fptr, int error, int events, struct time return -1; } - errno = error; - return -1; + // If there was an error BEFORE we started waiting, return it: + if (error) { + errno = error; + return -1; + } + else { + // Otherwise, whatever error was generated by `nogvl_wait_for` is the one we want: + return ready; + } } static VALUE @@ -1259,7 +1292,8 @@ internal_writev_func(void *ptr) static ssize_t rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_read_memory(scheduler, fptr->self, buf, count, 0); @@ -1269,7 +1303,7 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) } struct io_internal_read_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1286,13 +1320,14 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_call(internal_read_func, &iis, fptr->fd, RB_WAITFD_IN); + return (ssize_t)rb_io_blocking_region_wait(fptr, internal_read_func, &iis, RUBY_IO_READABLE); } static ssize_t rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, buf, count, 0); @@ -1302,7 +1337,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) } struct io_internal_write_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1319,7 +1354,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_call(internal_write_func, &iis, fptr->fd, RB_WAITFD_OUT); + return (ssize_t)rb_io_blocking_region_wait(fptr, internal_write_func, &iis, RUBY_IO_WRITABLE); } #ifdef HAVE_WRITEV @@ -1328,7 +1363,9 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) { if (!iovcnt) return 0; - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { // This path assumes at least one `iov`: VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, iov[0].iov_base, iov[0].iov_len, 0); @@ -1339,7 +1376,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) } struct io_internal_writev_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1356,7 +1393,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_call(internal_writev_func, &iis, fptr->fd, RB_WAITFD_OUT); + return (ssize_t)rb_io_blocking_region_wait(fptr, internal_writev_func, &iis, RUBY_IO_WRITABLE); } #endif @@ -1382,11 +1419,35 @@ io_flush_buffer_sync(void *arg) return (VALUE)-1; } +static inline VALUE +io_flush_buffer_fiber_scheduler(VALUE scheduler, rb_io_t *fptr) +{ + VALUE ret = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, fptr->wbuf.ptr+fptr->wbuf.off, fptr->wbuf.len, 0); + if (!UNDEF_P(ret)) { + ssize_t result = rb_fiber_scheduler_io_result_apply(ret); + if (result > 0) { + fptr->wbuf.off += result; + fptr->wbuf.len -= result; + } + return result >= 0 ? (VALUE)0 : (VALUE)-1; + } + return ret; +} + static VALUE io_flush_buffer_async(VALUE arg) { rb_io_t *fptr = (rb_io_t *)arg; - return rb_thread_io_blocking_call(io_flush_buffer_sync, fptr, fptr->fd, RB_WAITFD_OUT); + + VALUE scheduler = rb_fiber_scheduler_current(); + if (scheduler != Qnil) { + VALUE result = io_flush_buffer_fiber_scheduler(scheduler, fptr); + if (!UNDEF_P(result)) { + return result; + } + } + + return rb_io_blocking_region_wait(fptr, io_flush_buffer_sync, fptr, RUBY_IO_WRITABLE); } static inline int @@ -1421,7 +1482,8 @@ io_fflush(rb_io_t *fptr) VALUE rb_io_wait(VALUE io, VALUE events, VALUE timeout) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { return rb_fiber_scheduler_io_wait(scheduler, io, events, timeout); @@ -1442,7 +1504,7 @@ rb_io_wait(VALUE io, VALUE events, VALUE timeout) tv = &tv_storage; } - int ready = rb_thread_wait_for_single_fd(fptr->fd, RB_NUM2INT(events), tv); + int ready = rb_thread_io_wait(th, fptr, RB_NUM2INT(events), tv); if (ready < 0) { rb_sys_fail(0); @@ -1466,17 +1528,15 @@ io_from_fd(int fd) } static int -io_wait_for_single_fd(int fd, int events, struct timeval *timeout) +io_wait_for_single_fd(int fd, int events, struct timeval *timeout, rb_thread_t *th, VALUE scheduler) { - VALUE scheduler = rb_fiber_scheduler_current(); - if (scheduler != Qnil) { return RTEST( rb_fiber_scheduler_io_wait(scheduler, io_from_fd(fd), RB_INT2NUM(events), rb_fiber_scheduler_make_timeout(timeout)) ); } - return rb_thread_wait_for_single_fd(fd, events, timeout); + return rb_thread_wait_for_single_fd(th, fd, events, timeout); } int @@ -1484,7 +1544,8 @@ rb_io_wait_readable(int f) { io_fd_check_closed(f); - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); switch (errno) { case EINTR: @@ -1504,7 +1565,7 @@ rb_io_wait_readable(int f) ); } else { - io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL); + io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL, th, scheduler); } return TRUE; @@ -1518,7 +1579,8 @@ rb_io_wait_writable(int f) { io_fd_check_closed(f); - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); switch (errno) { case EINTR: @@ -1547,7 +1609,7 @@ rb_io_wait_writable(int f) ); } else { - io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL); + io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL, th, scheduler); } return TRUE; @@ -1559,7 +1621,9 @@ rb_io_wait_writable(int f) int rb_wait_for_single_fd(int fd, int events, struct timeval *timeout) { - return io_wait_for_single_fd(fd, events, timeout); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); + return io_wait_for_single_fd(fd, events, timeout, th, scheduler); } int @@ -1611,7 +1675,7 @@ rb_io_maybe_wait(int error, VALUE io, VALUE events, VALUE timeout) default: // Non-specific error, no event is ready: - return Qfalse; + return Qnil; } } @@ -1623,9 +1687,11 @@ rb_io_maybe_wait_readable(int error, VALUE io, VALUE timeout) if (RTEST(result)) { return RB_NUM2INT(result); } - else { - return 0; + else if (result == RUBY_Qfalse) { + rb_raise(rb_eIOTimeoutError, "Timed out waiting for IO to become readable!"); } + + return 0; } int @@ -1636,9 +1702,11 @@ rb_io_maybe_wait_writable(int error, VALUE io, VALUE timeout) if (RTEST(result)) { return RB_NUM2INT(result); } - else { - return 0; + else if (result == RUBY_Qfalse) { + rb_raise(rb_eIOTimeoutError, "Timed out waiting for IO to become writable!"); } + + return 0; } static void @@ -1699,7 +1767,6 @@ make_writeconv(rb_io_t *fptr) /* writing functions */ struct binwrite_arg { rb_io_t *fptr; - VALUE str; const char *ptr; long length; }; @@ -1849,7 +1916,7 @@ io_binwrite_requires_flush_write(rb_io_t *fptr, long len, int nosync) } static long -io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync) +io_binwrite(const char *ptr, long len, rb_io_t *fptr, int nosync) { if (len <= 0) return len; @@ -1862,7 +1929,6 @@ io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync) struct binwrite_arg arg; arg.fptr = fptr; - arg.str = str; arg.ptr = ptr; arg.length = len; @@ -1970,9 +2036,9 @@ io_fwrite(VALUE str, rb_io_t *fptr, int nosync) if (converted) OBJ_FREEZE(str); - tmp = rb_str_tmp_frozen_acquire(str); + tmp = rb_str_tmp_frozen_no_embed_acquire(str); RSTRING_GETMEM(tmp, ptr, len); - n = io_binwrite(tmp, ptr, len, fptr, nosync); + n = io_binwrite(ptr, len, fptr, nosync); rb_str_tmp_frozen_release(str, tmp); return n; @@ -1985,7 +2051,7 @@ rb_io_bufwrite(VALUE io, const void *buf, size_t size) GetOpenFile(io, fptr); rb_io_check_writable(fptr); - return (ssize_t)io_binwrite(0, buf, (long)size, fptr, 0); + return (ssize_t)io_binwrite(buf, (long)size, fptr, 0); } static VALUE @@ -2277,7 +2343,7 @@ rb_io_writev(VALUE io, int argc, const VALUE *argv) if (argc > 1 && rb_obj_method_arity(io, id_write) == 1) { if (io != rb_ractor_stderr() && RTEST(ruby_verbose)) { VALUE klass = CLASS_OF(io); - char sep = FL_TEST(klass, FL_SINGLETON) ? (klass = io, '.') : '#'; + char sep = RCLASS_SINGLETON_P(klass) ? (klass = io, '.') : '#'; rb_category_warning( RB_WARN_CATEGORY_DEPRECATED, "%+"PRIsVALUE"%c""write is outdated interface" " which accepts just one argument", @@ -2351,7 +2417,7 @@ rb_io_flush_raw(VALUE io, int sync) rb_sys_fail_on_write(fptr); } if (fptr->mode & FMODE_READABLE) { - io_unread(fptr); + io_unread(fptr, true); } return io; @@ -2470,7 +2536,7 @@ interpret_seek_whence(VALUE vwhence) * f.tell # => 12 * f.close * - * - +:SET+ or <tt>IO:SEEK_SET</tt>: + * - +:SET+ or <tt>IO::SEEK_SET</tt>: * Repositions the stream to the given +offset+: * * f = File.open('t.txt') @@ -2594,9 +2660,6 @@ io_fillbuf(rb_io_t *fptr) fptr->rbuf.len = 0; fptr->rbuf.capa = IO_RBUF_CAPA_FOR(fptr); fptr->rbuf.ptr = ALLOC_N(char, fptr->rbuf.capa); -#ifdef _WIN32 - fptr->rbuf.capa--; -#endif } if (fptr->rbuf.len == 0) { retry: @@ -2671,7 +2734,7 @@ rb_io_eof(VALUE io) READ_CHECK(fptr); #if RUBY_CRLF_ENVIRONMENT if (!NEED_READCONV(fptr) && NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { - return RBOOL(eof(fptr->fd));; + return RBOOL(eof(fptr->fd)); } #endif return RBOOL(io_fillbuf(fptr) < 0); @@ -2774,8 +2837,10 @@ rb_io_fsync(VALUE io) if (io_fflush(fptr) < 0) rb_sys_fail_on_write(fptr); - if ((int)rb_thread_io_blocking_region(nogvl_fsync, fptr, fptr->fd) < 0) + + if ((int)rb_io_blocking_region(fptr, nogvl_fsync, fptr)) rb_sys_fail_path(fptr->pathv); + return INT2FIX(0); } #else @@ -2824,7 +2889,7 @@ rb_io_fdatasync(VALUE io) if (io_fflush(fptr) < 0) rb_sys_fail_on_write(fptr); - if ((int)rb_thread_io_blocking_region(nogvl_fdatasync, fptr, fptr->fd) == 0) + if ((int)rb_io_blocking_region(fptr, nogvl_fdatasync, fptr) == 0) return INT2FIX(0); /* fall back */ @@ -3120,8 +3185,6 @@ io_enc_str(VALUE str, rb_io_t *fptr) return str; } -static rb_encoding *io_read_encoding(rb_io_t *fptr); - static void make_readconv(rb_io_t *fptr, int size) { @@ -3264,10 +3327,6 @@ io_shift_cbuf(rb_io_t *fptr, int len, VALUE *strp) static int io_setstrbuf(VALUE *str, long len) { -#ifdef _WIN32 - if (len > 0) - len = (len + 1) & ~1L; /* round up for wide char */ -#endif if (NIL_P(*str)) { *str = rb_str_new(0, len); return TRUE; @@ -3409,10 +3468,10 @@ io_read_memory_call(VALUE arg) } if (iis->nonblock) { - return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, 0); + return rb_io_blocking_region(iis->fptr, internal_read_func, iis); } else { - return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, RB_WAITFD_IN); + return rb_io_blocking_region_wait(iis->fptr, internal_read_func, iis, RUBY_IO_READABLE); } } @@ -3820,8 +3879,33 @@ rscheck(const char *rsptr, long rslen, VALUE rs) rb_raise(rb_eRuntimeError, "rs modified"); } +static const char * +search_delim(const char *p, long len, int delim, rb_encoding *enc) +{ + if (rb_enc_mbminlen(enc) == 1) { + p = memchr(p, delim, len); + if (p) return p + 1; + } + else { + const char *end = p + len; + while (p < end) { + int r = rb_enc_precise_mbclen(p, end, enc); + if (!MBCLEN_CHARFOUND_P(r)) { + p += rb_enc_mbminlen(enc); + continue; + } + int n = MBCLEN_CHARFOUND_LEN(r); + if (rb_enc_mbc_to_codepoint(p, end, enc) == (unsigned int)delim) { + return p + n; + } + p += n; + } + } + return NULL; +} + static int -appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) +appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, rb_encoding *enc) { VALUE str = *strp; long limit = *lp; @@ -3836,9 +3920,9 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) p = READ_CHAR_PENDING_PTR(fptr); if (0 < limit && limit < searchlen) searchlen = (int)limit; - e = memchr(p, delim, searchlen); + e = search_delim(p, searchlen, delim, enc); if (e) { - int len = (int)(e-p+1); + int len = (int)(e-p); if (NIL_P(str)) *strp = str = rb_str_new(p, len); else @@ -3878,8 +3962,8 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) long last; if (limit > 0 && pending > limit) pending = limit; - e = memchr(p, delim, pending); - if (e) pending = e - p + 1; + e = search_delim(p, pending, delim, enc); + if (e) pending = e - p; if (!NIL_P(str)) { last = RSTRING_LEN(str); rb_str_resize(str, last + pending); @@ -4134,21 +4218,31 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr) rs = 0; if (!rb_enc_asciicompat(enc)) { rs = rb_usascii_str_new(rsptr, rslen); - rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); + rs = rb_str_conv_enc(rs, 0, enc); OBJ_FREEZE(rs); rsptr = RSTRING_PTR(rs); rslen = RSTRING_LEN(rs); } + newline = '\n'; + } + else if (rb_enc_mbminlen(enc) == 1) { + rsptr = RSTRING_PTR(rs); + newline = (unsigned char)rsptr[rslen - 1]; } else { + rs = rb_str_conv_enc(rs, 0, enc); rsptr = RSTRING_PTR(rs); + const char *e = rsptr + rslen; + const char *last = rb_enc_prev_char(rsptr, e, e, enc); + int n; + newline = rb_enc_codepoint_len(last, e, &n, enc); + if (last + n != e) rb_raise(rb_eArgError, "broken separator"); } - newline = (unsigned char)rsptr[rslen - 1]; - chomp_cr = chomp && rslen == 1 && newline == '\n'; + chomp_cr = chomp && newline == '\n' && rslen == rb_enc_mbminlen(enc); } /* MS - Optimization */ - while ((c = appendline(fptr, newline, &str, &limit)) != EOF) { + while ((c = appendline(fptr, newline, &str, &limit, enc)) != EOF) { const char *s, *p, *pp, *e; if (c == newline) { @@ -4169,8 +4263,8 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr) if (limit == 0) { s = RSTRING_PTR(str); p = RSTRING_END(str); - pp = rb_enc_left_char_head(s, p-1, p, enc); - if (extra_limit && + pp = rb_enc_prev_char(s, p, p, enc); + if (extra_limit && pp && MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(pp, p, enc))) { /* relax the limit while incomplete character. * extra_limit limits the relax length */ @@ -4236,11 +4330,17 @@ rb_io_gets(VALUE io) } VALUE -rb_io_gets_internal(VALUE io) +rb_io_gets_limit_internal(VALUE io, long limit) { rb_io_t *fptr; GetOpenFile(io, fptr); - return rb_io_getline_0(rb_default_rs, -1, FALSE, fptr); + return rb_io_getline_0(rb_default_rs, limit, FALSE, fptr); +} + +VALUE +rb_io_gets_internal(VALUE io) +{ + return rb_io_gets_limit_internal(io, -1); } /* @@ -4297,11 +4397,8 @@ rb_io_gets_internal(VALUE io) * File.open('t.txt') {|f| f.gets(12) } # => "First line\n" * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Returns the next line as determined by line separator +sep+, - * or +nil+ if none. - * - But returns no more bytes than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -4372,23 +4469,31 @@ rb_io_set_lineno(VALUE io, VALUE lineno) static VALUE io_readline(rb_execution_context_t *ec, VALUE io, VALUE sep, VALUE lim, VALUE chomp) { + long limit = -1; if (NIL_P(lim)) { + VALUE tmp = Qnil; // If sep is specified, but it's not a string and not nil, then assume // it's the limit (it should be an integer) - if (!NIL_P(sep) && NIL_P(rb_check_string_type(sep))) { + if (!NIL_P(sep) && NIL_P(tmp = rb_check_string_type(sep))) { // If the user has specified a non-nil / non-string value // for the separator, we assume it's the limit and set the // separator to default: rb_rs. lim = sep; + limit = NUM2LONG(lim); sep = rb_rs; } + else { + sep = tmp; + } } - - if (!NIL_P(sep)) { - StringValue(sep); + else { + if (!NIL_P(sep)) StringValue(sep); + limit = NUM2LONG(lim); } - VALUE line = rb_io_getline_1(sep, NIL_P(lim) ? -1L : NUM2LONG(lim), RTEST(chomp), io); + check_getline_args(&sep, &limit, io); + + VALUE line = rb_io_getline_1(sep, limit, RTEST(chomp), io); rb_lastline_set_up(line, 1); if (NIL_P(line)) { @@ -4450,10 +4555,8 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); * f.close * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Returns lines as determined by line separator +sep+. - * - But returns no more bytes in a line than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -4573,10 +4676,8 @@ io_readlines(const struct getline_arg *arg, VALUE io) * "ne\n" * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Calls with the next line as determined by line separator +sep+. - * - But returns no more bytes than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -4620,10 +4721,11 @@ rb_io_each_line(int argc, VALUE *argv, VALUE io) * Calls the given block with each byte (0..255) in the stream; returns +self+. * See {Byte IO}[rdoc-ref:IO@Byte+IO]. * - * f = File.new('t.rus') + * File.read('t.ja') # => "こんにちは" + * f = File.new('t.ja') * a = [] * f.each_byte {|b| a << b } - * a # => [209, 130, 208, 181, 209, 129, 209, 130] + * a # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] * f.close * * Returns an Enumerator if no block is given. @@ -4768,10 +4870,11 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) * Calls the given block with each character in the stream; returns +self+. * See {Character IO}[rdoc-ref:IO@Character+IO]. * - * f = File.new('t.rus') + * File.read('t.ja') # => "こんにちは" + * f = File.new('t.ja') * a = [] * f.each_char {|c| a << c.ord } - * a # => [1090, 1077, 1089, 1090] + * a # => [12371, 12435, 12395, 12385, 12399] * f.close * * Returns an Enumerator if no block is given. @@ -4806,10 +4909,11 @@ rb_io_each_char(VALUE io) * * Calls the given block with each codepoint in the stream; returns +self+: * - * f = File.new('t.rus') + * File.read('t.ja') # => "こんにちは" + * f = File.new('t.ja') * a = [] * f.each_codepoint {|c| a << c } - * a # => [1090, 1077, 1089, 1090] + * a # => [12371, 12435, 12395, 12385, 12399] * f.close * * Returns an Enumerator if no block is given. @@ -4831,6 +4935,7 @@ rb_io_each_codepoint(VALUE io) rb_io_check_char_readable(fptr); READ_CHECK(fptr); + enc = io_read_encoding(fptr); if (NEED_READCONV(fptr)) { SET_BINARY_MODE(fptr); r = 1; /* no invalid char yet */ @@ -4838,12 +4943,9 @@ rb_io_each_codepoint(VALUE io) make_readconv(fptr, 0); for (;;) { if (fptr->cbuf.len) { - if (fptr->encs.enc) - r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off, - fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, - fptr->encs.enc); - else - r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off, + fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, + enc); if (!MBCLEN_NEEDMORE_P(r)) break; if (fptr->cbuf.len == fptr->cbuf.capa) { @@ -4853,33 +4955,25 @@ rb_io_each_codepoint(VALUE io) if (more_char(fptr) == MORE_CHAR_FINISHED) { clear_readconv(fptr); if (!MBCLEN_CHARFOUND_P(r)) { - enc = fptr->encs.enc; goto invalid; } return io; } } if (MBCLEN_INVALID_P(r)) { - enc = fptr->encs.enc; goto invalid; } n = MBCLEN_CHARFOUND_LEN(r); - if (fptr->encs.enc) { - c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off, - fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, - fptr->encs.enc); - } - else { - c = (unsigned char)fptr->cbuf.ptr[fptr->cbuf.off]; - } + c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off, + fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, + enc); fptr->cbuf.off += n; fptr->cbuf.len -= n; rb_yield(UINT2NUM(c)); - rb_io_check_byte_readable(fptr); + rb_io_check_char_readable(fptr); } } NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); - enc = io_input_encoding(fptr); while (io_fillbuf(fptr) >= 0) { r = rb_enc_precise_mbclen(fptr->rbuf.ptr+fptr->rbuf.off, fptr->rbuf.ptr+fptr->rbuf.off+fptr->rbuf.len, enc); @@ -4933,8 +5027,9 @@ rb_io_each_codepoint(VALUE io) * f = File.open('t.txt') * f.getc # => "F" * f.close - * f = File.open('t.rus') - * f.getc.ord # => 1090 + * File.read('t.ja') # => "こんにちは" + * f = File.open('t.ja') + * f.getc.ord # => 12371 * f.close * * Related: IO#readchar (may raise EOFError). @@ -4966,8 +5061,9 @@ rb_io_getc(VALUE io) * f = File.open('t.txt') * f.readchar # => "F" * f.close - * f = File.open('t.rus') - * f.readchar.ord # => 1090 + * File.read('t.ja') # => "こんにちは" + * f = File.open('t.ja') + * f.readchar.ord # => 12371 * f.close * * Related: IO#getc (will not raise EOFError). @@ -4996,8 +5092,9 @@ rb_io_readchar(VALUE io) * f = File.open('t.txt') * f.getbyte # => 70 * f.close - * f = File.open('t.rus') - * f.getbyte # => 209 + * File.read('t.ja') # => "こんにちは" + * f = File.open('t.ja') + * f.getbyte # => 227 * f.close * * Related: IO#readbyte (may raise EOFError). @@ -5040,8 +5137,9 @@ rb_io_getbyte(VALUE io) * f = File.open('t.txt') * f.readbyte # => 70 * f.close - * f = File.open('t.rus') - * f.readbyte # => 209 + * File.read('t.ja') # => "こんにちは" + * f = File.open('t.ja') + * f.readbyte # => 227 * f.close * * Related: IO#getbyte (will not raise EOFError). @@ -5114,7 +5212,7 @@ rb_io_ungetbyte(VALUE io, VALUE b) b = rb_str_new((const char *)&c, 1); break; default: - SafeStringValue(b); + StringValue(b); } io_ungetbyte(b, fptr); return Qnil; @@ -5176,7 +5274,7 @@ rb_io_ungetc(VALUE io, VALUE c) c = rb_enc_uint_chr(NUM2UINT(c), io_read_encoding(fptr)); } else { - SafeStringValue(c); + StringValue(c); } if (NEED_READCONV(fptr)) { SET_BINARY_MODE(fptr); @@ -5422,7 +5520,7 @@ maygvl_close(int fd, int keepgvl) * close() may block for certain file types (NFS, SO_LINGER sockets, * inotify), so let other threads run. */ - return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_close, &fd, RUBY_UBF_IO, 0); + return IO_WITHOUT_GVL_INT(nogvl_close, &fd); } static void* @@ -5439,15 +5537,13 @@ maygvl_fclose(FILE *file, int keepgvl) if (keepgvl) return fclose(file); - return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_fclose, file, RUBY_UBF_IO, 0); + return IO_WITHOUT_GVL_INT(nogvl_fclose, file); } static void free_io_buffer(rb_io_buffer_t *buf); -static void clear_codeconv(rb_io_t *fptr); static void -fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl, - struct rb_io_close_wait_list *busy) +fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl) { VALUE error = Qnil; int fd = fptr->fd; @@ -5487,20 +5583,8 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl, fptr->stdio_file = 0; fptr->mode &= ~(FMODE_READABLE|FMODE_WRITABLE); - // Ensure waiting_fd users do not hit EBADF. - if (busy) { - // Wait for them to exit before we call close(). - rb_notify_fd_close_wait(busy); - } - - // Disable for now. - // if (!done && fd >= 0) { - // VALUE scheduler = rb_fiber_scheduler_current(); - // if (scheduler != Qnil) { - // VALUE result = rb_fiber_scheduler_io_close(scheduler, fptr->self); - // if (!UNDEF_P(result)) done = 1; - // } - // } + // Wait for blocking operations to ensure they do not hit EBADF: + rb_thread_io_close_wait(fptr); if (!done && stdio_file) { // stdio_file is deallocated anyway even if fclose failed. @@ -5513,6 +5597,15 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl, done = 1; } + VALUE scheduler = rb_fiber_scheduler_current(); + if (!done && fd >= 0 && scheduler != Qnil) { + VALUE result = rb_fiber_scheduler_io_close(scheduler, RB_INT2NUM(fd)); + + if (!UNDEF_P(result)) { + done = RTEST(result); + } + } + if (!done && fd >= 0) { // fptr->fd may be closed even if close fails. POSIX doesn't specify it. // We assumes it is closed. @@ -5539,7 +5632,7 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl, static void fptr_finalize(rb_io_t *fptr, int noraise) { - fptr_finalize_flush(fptr, noraise, FALSE, 0); + fptr_finalize_flush(fptr, noraise, FALSE); free_io_buffer(&fptr->rbuf); free_io_buffer(&fptr->wbuf); clear_codeconv(fptr); @@ -5560,7 +5653,7 @@ static void free_io_buffer(rb_io_buffer_t *buf) { if (buf->ptr) { - ruby_sized_xfree(buf->ptr, (size_t)buf->capa); + ruby_xfree_sized(buf->ptr, (size_t)buf->capa); buf->ptr = NULL; } } @@ -5604,37 +5697,45 @@ rb_io_fptr_cleanup_all(rb_io_t *fptr) clear_codeconv(fptr); } -void -rb_io_fptr_finalize_internal(void *ptr) +int +rb_io_fptr_finalize(struct rb_io *io) { - if (!ptr) return; - rb_io_fptr_cleanup_all(ptr); - free(ptr); + if (!io) return 0; + rb_io_fptr_cleanup_all(io); + free(io); + + return 1; } -#undef rb_io_fptr_finalize -int -rb_io_fptr_finalize(rb_io_t *fptr) +bool +rb_io_fptr_finalize_closed(struct rb_io *io) { - if (!fptr) { - return 0; - } - else { - rb_io_fptr_finalize_internal(fptr); - return 1; - } + if (!io) return true; + if (io->fd >= 0) return false; + rb_io_fptr_finalize(io); + return true; } -#define rb_io_fptr_finalize(fptr) rb_io_fptr_finalize_internal(fptr) -RUBY_FUNC_EXPORTED size_t -rb_io_memsize(const rb_io_t *fptr) +size_t +rb_io_memsize(const rb_io_t *io) { size_t size = sizeof(rb_io_t); - size += fptr->rbuf.capa; - size += fptr->wbuf.capa; - size += fptr->cbuf.capa; - if (fptr->readconv) size += rb_econv_memsize(fptr->readconv); - if (fptr->writeconv) size += rb_econv_memsize(fptr->writeconv); + size += io->rbuf.capa; + size += io->wbuf.capa; + size += io->cbuf.capa; + if (io->readconv) size += rb_econv_memsize(io->readconv); + if (io->writeconv) size += rb_econv_memsize(io->writeconv); + + struct rb_io_blocking_operation *blocking_operation = 0; + + // Validate the fork generation of the IO object. If the IO object fork generation is different, the list of blocking operations is not valid memory. See `rb_io_blocking_operations` for the exact semantics. + rb_serial_t fork_generation = GET_VM()->fork_gen; + if (io->fork_generation == fork_generation) { + ccan_list_for_each(&io->blocking_operations, blocking_operation, list) { + size += sizeof(struct rb_io_blocking_operation); + } + } + return size; } @@ -5651,7 +5752,6 @@ io_close_fptr(VALUE io) rb_io_t *fptr; VALUE write_io; rb_io_t *write_fptr; - struct rb_io_close_wait_list busy; write_io = GetWriteIO(io); if (io != write_io) { @@ -5665,10 +5765,12 @@ io_close_fptr(VALUE io) if (!fptr) return 0; if (fptr->fd < 0) return 0; - if (rb_notify_fd_close(fptr->fd, &busy)) { + // This guards against multiple threads closing the same IO object: + if (rb_thread_io_close_interrupt(fptr)) { /* calls close(fptr->fd): */ - fptr_finalize_flush(fptr, FALSE, KEEPGVL, &busy); + fptr_finalize_flush(fptr, FALSE, KEEPGVL); } + rb_io_fptr_cleanup(fptr, FALSE); return fptr; } @@ -5706,6 +5808,9 @@ rb_io_close(VALUE io) * If the stream was opened by IO.popen, sets global variable <tt>$?</tt> * (child exit status). * + * It is not an error to close an IO object that has already been closed. + * It just returns nil. + * * Example: * * IO.popen('ruby', 'r+') do |pipe| @@ -6089,7 +6194,7 @@ rb_io_sysread(int argc, VALUE *argv, VALUE io) } struct prdwr_internal_arg { - VALUE io; + struct rb_io *io; int fd; void *buf; size_t count; @@ -6111,14 +6216,14 @@ pread_internal_call(VALUE _arg) VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { - VALUE result = rb_fiber_scheduler_io_pread_memory(scheduler, arg->io, arg->offset, arg->buf, arg->count, 0); + VALUE result = rb_fiber_scheduler_io_pread_memory(scheduler, arg->io->self, arg->offset, arg->buf, arg->count, 0); if (!UNDEF_P(result)) { return rb_fiber_scheduler_io_result_apply(result); } } - return rb_thread_io_blocking_call(internal_pread_func, arg, arg->fd, RB_WAITFD_IN); + return rb_io_blocking_region_wait(arg->io, internal_pread_func, arg, RUBY_IO_READABLE); } /* @@ -6155,7 +6260,7 @@ rb_io_pread(int argc, VALUE *argv, VALUE io) VALUE len, offset, str; rb_io_t *fptr; ssize_t n; - struct prdwr_internal_arg arg = {.io = io}; + struct prdwr_internal_arg arg; int shrinkable; rb_scan_args(argc, argv, "21", &len, &offset, &str); @@ -6169,6 +6274,7 @@ rb_io_pread(int argc, VALUE *argv, VALUE io) GetOpenFile(io, fptr); rb_io_check_byte_readable(fptr); + arg.io = fptr; arg.fd = fptr->fd; rb_io_check_closed(fptr); @@ -6191,17 +6297,24 @@ internal_pwrite_func(void *_arg) { struct prdwr_internal_arg *arg = _arg; + return (VALUE)pwrite(arg->fd, arg->buf, arg->count, arg->offset); +} + +static VALUE +pwrite_internal_call(VALUE _arg) +{ + struct prdwr_internal_arg *arg = (struct prdwr_internal_arg *)_arg; + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { - VALUE result = rb_fiber_scheduler_io_pwrite_memory(scheduler, arg->io, arg->offset, arg->buf, arg->count, 0); + VALUE result = rb_fiber_scheduler_io_pwrite_memory(scheduler, arg->io->self, arg->offset, arg->buf, arg->count, 0); if (!UNDEF_P(result)) { return rb_fiber_scheduler_io_result_apply(result); } } - - return (VALUE)pwrite(arg->fd, arg->buf, arg->count, arg->offset); + return rb_io_blocking_region_wait(arg->io, internal_pwrite_func, arg, RUBY_IO_WRITABLE); } /* @@ -6234,7 +6347,7 @@ rb_io_pwrite(VALUE io, VALUE str, VALUE offset) { rb_io_t *fptr; ssize_t n; - struct prdwr_internal_arg arg = {.io = io}; + struct prdwr_internal_arg arg; VALUE tmp; if (!RB_TYPE_P(str, T_STRING)) @@ -6245,13 +6358,15 @@ rb_io_pwrite(VALUE io, VALUE str, VALUE offset) io = GetWriteIO(io); GetOpenFile(io, fptr); rb_io_check_writable(fptr); + + arg.io = fptr; arg.fd = fptr->fd; tmp = rb_str_tmp_frozen_acquire(str); arg.buf = RSTRING_PTR(tmp); arg.count = (size_t)RSTRING_LEN(tmp); - n = (ssize_t)rb_thread_io_blocking_call(internal_pwrite_func, &arg, fptr->fd, RB_WAITFD_OUT); + n = (ssize_t)pwrite_internal_call((VALUE)&arg); if (n < 0) rb_sys_fail_path(fptr->pathv); rb_str_tmp_frozen_release(str, tmp); @@ -6356,7 +6471,7 @@ rb_io_binmode_p(VALUE io) } static const char* -rb_io_fmode_modestr(int fmode) +rb_io_fmode_modestr(enum rb_io_mode fmode) { if (fmode & FMODE_APPEND) { if ((fmode & FMODE_READWRITE) == FMODE_READWRITE) { @@ -6390,10 +6505,10 @@ io_encname_bom_p(const char *name, long len) return len > bom_prefix_len && STRNCASECMP(name, bom_prefix, bom_prefix_len) == 0; } -int +enum rb_io_mode rb_io_modestr_fmode(const char *modestr) { - int fmode = 0; + enum rb_io_mode fmode = 0; const char *m = modestr, *p = NULL; switch (*m++) { @@ -6450,7 +6565,7 @@ rb_io_modestr_fmode(const char *modestr) int rb_io_oflags_fmode(int oflags) { - int fmode = 0; + enum rb_io_mode fmode = 0; switch (oflags & O_ACCMODE) { case O_RDONLY: @@ -6486,7 +6601,7 @@ rb_io_oflags_fmode(int oflags) } static int -rb_io_fmode_oflags(int fmode) +rb_io_fmode_oflags(enum rb_io_mode fmode) { int oflags = 0; @@ -6571,7 +6686,7 @@ rb_io_oflags_modestr(int oflags) * Qnil => no encoding specified (internal only) */ static void -rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) +rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, enum rb_io_mode fmode) { int default_ext = 0; @@ -6606,12 +6721,12 @@ unsupported_encoding(const char *name, rb_encoding *enc) static void parse_mode_enc(const char *estr, rb_encoding *estr_enc, - rb_encoding **enc_p, rb_encoding **enc2_p, int *fmode_p) + rb_encoding **enc_p, rb_encoding **enc2_p, enum rb_io_mode *fmode_p) { const char *p; char encname[ENCODING_MAXNAMELEN+1]; int idx, idx2; - int fmode = fmode_p ? *fmode_p : 0; + enum rb_io_mode fmode = fmode_p ? *fmode_p : 0; rb_encoding *ext_enc, *int_enc; long len; @@ -6673,7 +6788,7 @@ parse_mode_enc(const char *estr, rb_encoding *estr_enc, } int -rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p, int *fmode_p) +rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p, enum rb_io_mode *fmode_p) { VALUE encoding=Qnil, extenc=Qundef, intenc=Qundef, tmp; int extracted = 0; @@ -6742,9 +6857,9 @@ rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2 } static void -validate_enc_binmode(int *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *enc2) +validate_enc_binmode(enum rb_io_mode *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *enc2) { - int fmode = *fmode_p; + enum rb_io_mode fmode = *fmode_p; if ((fmode & FMODE_READABLE) && !enc2 && @@ -6769,7 +6884,7 @@ validate_enc_binmode(int *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *e } static void -extract_binmode(VALUE opthash, int *fmode) +extract_binmode(VALUE opthash, enum rb_io_mode *fmode) { if (!NIL_P(opthash)) { VALUE v; @@ -6799,10 +6914,11 @@ extract_binmode(VALUE opthash, int *fmode) void rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, - int *oflags_p, int *fmode_p, struct rb_io_encoding *convconfig_p) + int *oflags_p, enum rb_io_mode *fmode_p, struct rb_io_encoding *convconfig_p) { VALUE vmode; - int oflags, fmode; + int oflags; + enum rb_io_mode fmode; rb_encoding *enc, *enc2; int ecflags; VALUE ecopts; @@ -6827,7 +6943,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, else { const char *p; - SafeStringValue(vmode); + StringValue(vmode); p = StringValueCStr(vmode); fmode = rb_io_modestr_fmode(p); oflags = rb_io_fmode_oflags(fmode); @@ -6961,7 +7077,9 @@ static inline int rb_sysopen_internal(struct sysopen_struct *data) { int fd; - fd = (int)(VALUE)rb_thread_call_without_gvl(sysopen_func, data, RUBY_UBF_IO, 0); + do { + fd = IO_WITHOUT_GVL_INT(sysopen_func, data); + } while (fd < 0 && errno == EINTR); if (0 <= fd) rb_update_max_fd(fd); return fd; @@ -7115,7 +7233,7 @@ io_set_encoding_by_bom(VALUE io) } static VALUE -rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, +rb_file_open_generic(VALUE io, VALUE filename, int oflags, enum rb_io_mode fmode, const struct rb_io_encoding *convconfig, mode_t perm) { VALUE pathv; @@ -7152,15 +7270,13 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, static VALUE rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) { - int fmode = rb_io_modestr_fmode(modestr); + enum rb_io_mode fmode = rb_io_modestr_fmode(modestr); const char *p = strchr(modestr, ':'); struct rb_io_encoding convconfig; if (p) { parse_mode_enc(p+1, rb_usascii_encoding(), &convconfig.enc, &convconfig.enc2, &fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } else { rb_encoding *e; @@ -7168,10 +7284,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + return rb_file_open_generic(io, filename, rb_io_fmode_oflags(fmode), fmode, @@ -7461,7 +7586,7 @@ char *rb_execarg_commandline(const struct rb_execarg *eargp, VALUE *prog); #ifndef __EMSCRIPTEN__ static VALUE -pipe_open(VALUE execarg_obj, const char *modestr, int fmode, +pipe_open(VALUE execarg_obj, const char *modestr, enum rb_io_mode fmode, const struct rb_io_encoding *convconfig) { struct rb_execarg *eargp = NIL_P(execarg_obj) ? NULL : rb_execarg_get(execarg_obj); @@ -7690,7 +7815,7 @@ pipe_open(VALUE execarg_obj, const char *modestr, int fmode, } #else static VALUE -pipe_open(VALUE execarg_obj, const char *modestr, int fmode, +pipe_open(VALUE execarg_obj, const char *modestr, enum rb_io_mode fmode, const struct rb_io_encoding *convconfig) { rb_raise(rb_eNotImpError, "popen() is not available"); @@ -7712,7 +7837,7 @@ is_popen_fork(VALUE prog) } static VALUE -pipe_open_s(VALUE prog, const char *modestr, int fmode, +pipe_open_s(VALUE prog, const char *modestr, enum rb_io_mode fmode, const struct rb_io_encoding *convconfig) { int argc = 1; @@ -7745,7 +7870,7 @@ static VALUE popen_finish(VALUE port, VALUE klass); * whose $stdin and $stdout are connected to a new stream +io+. * * This method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. + * see {Command Injection}[rdoc-ref:security/command_injection.rdoc]. * * If no block is given, returns the new stream, * which depending on given +mode+ may be open for reading, writing, or both. @@ -7754,7 +7879,8 @@ static VALUE popen_finish(VALUE port, VALUE klass); * If a block is given, the stream is passed to the block * (again, open for reading, writing, or both); * when the block exits, the stream is closed, - * and the block's value is assigned to global variable <tt>$?</tt> and returned. + * the block's value is returned, + * and the global variable <tt>$?</tt> is set to the child's exit status. * * Optional argument +mode+ may be any valid \IO mode. * See {Access Modes}[rdoc-ref:File@Access+Modes]. @@ -7783,7 +7909,7 @@ static VALUE popen_finish(VALUE port, VALUE klass); * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. * - Options for Kernel#spawn. * - * <b>Forked \Process</b> + * <b>Forked Process</b> * * When argument +cmd+ is the 1-character string <tt>'-'</tt>, causes the process to fork: * IO.popen('-') do |pipe| @@ -7921,7 +8047,8 @@ rb_io_popen(VALUE pname, VALUE pmode, VALUE env, VALUE opt) { const char *modestr; VALUE tmp, execarg_obj = Qnil; - int oflags, fmode; + int oflags; + enum rb_io_mode fmode; struct rb_io_encoding convconfig; tmp = rb_check_array_type(pname); @@ -7936,7 +8063,7 @@ rb_io_popen(VALUE pname, VALUE pmode, VALUE env, VALUE opt) RB_GC_GUARD(tmp); } else { - SafeStringValue(pname); + StringValue(pname); execarg_obj = Qnil; if (!is_popen_fork(pname)) execarg_obj = rb_execarg_new(1, &pname, TRUE, FALSE); @@ -7959,10 +8086,10 @@ popen_finish(VALUE port, VALUE klass) if (NIL_P(port)) { /* child */ if (rb_block_given_p()) { - rb_yield(Qnil); + rb_protect(rb_yield, Qnil, NULL); rb_io_flush(rb_ractor_stdout()); rb_io_flush(rb_ractor_stderr()); - _exit(0); + _exit(EXIT_SUCCESS); } return Qnil; } @@ -8002,7 +8129,12 @@ ruby_popen_writer(char *const *argv, rb_pid_t *pid) int write_pair[2]; # endif - int result = rb_cloexec_pipe(write_pair); +#ifdef HAVE_PIPE2 + int result = pipe2(write_pair, O_CLOEXEC); +#else + int result = pipe(write_pair); +#endif + *pid = -1; if (result == 0) { # ifdef HAVE_WORKING_FORK @@ -8027,37 +8159,19 @@ ruby_popen_writer(char *const *argv, rb_pid_t *pid) return NULL; } -static void -rb_scan_open_args(int argc, const VALUE *argv, - VALUE *fname_p, int *oflags_p, int *fmode_p, - struct rb_io_encoding *convconfig_p, mode_t *perm_p) +static VALUE +rb_open_file(VALUE io, VALUE fname, VALUE vmode, VALUE vperm, VALUE opt) { - VALUE opt, fname, vmode, vperm; - int oflags, fmode; + int oflags; + enum rb_io_mode fmode; + struct rb_io_encoding convconfig; mode_t perm; - argc = rb_scan_args(argc, argv, "12:", &fname, &vmode, &vperm, &opt); FilePathValue(fname); - rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, convconfig_p); - - perm = NIL_P(vperm) ? 0666 : NUM2MODET(vperm); - - *fname_p = fname; - *oflags_p = oflags; - *fmode_p = fmode; - *perm_p = perm; -} - -static VALUE -rb_open_file(int argc, const VALUE *argv, VALUE io) -{ - VALUE fname; - int oflags, fmode; - struct rb_io_encoding convconfig; - mode_t perm; + rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, &convconfig); + perm = NIL_P(vperm) ? 0666 : NUM2MODET(vperm); - rb_scan_open_args(argc, argv, &fname, &oflags, &fmode, &convconfig, &perm); rb_file_open_generic(io, fname, oflags, fmode, &convconfig, perm); return io; @@ -8141,7 +8255,7 @@ rb_io_s_sysopen(int argc, VALUE *argv, VALUE _) else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) oflags = NUM2INT(intmode); else { - SafeStringValue(vmode); + StringValue(vmode); oflags = rb_io_modestr_oflags(StringValueCStr(vmode)); } if (NIL_P(vperm)) perm = 0666; @@ -8152,21 +8266,6 @@ rb_io_s_sysopen(int argc, VALUE *argv, VALUE _) return INT2NUM(fd); } -static VALUE -check_pipe_command(VALUE filename_or_command) -{ - char *s = RSTRING_PTR(filename_or_command); - long l = RSTRING_LEN(filename_or_command); - char *e = s + l; - int chlen; - - if (rb_enc_ascget(s, e, &chlen, rb_enc_get(filename_or_command)) == '|') { - VALUE cmd = rb_str_new(s+chlen, l-chlen); - return cmd; - } - return Qnil; -} - /* * call-seq: * open(path, mode = 'r', perm = 0666, **opts) -> io or nil @@ -8174,9 +8273,6 @@ check_pipe_command(VALUE filename_or_command) * * Creates an IO object connected to the given file. * - * This method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * * With no block given, file stream is returned: * * open('t.txt') # => #<File:t.txt> @@ -8212,13 +8308,7 @@ rb_f_open(int argc, VALUE *argv, VALUE _) redirect = TRUE; } else { - VALUE cmd = check_pipe_command(tmp); - if (!NIL_P(cmd)) { - // TODO: when removed in 4.0, update command_injection.rdoc - rb_warn_deprecated_to_remove_at(4.0, "Calling Kernel#open with a leading '|'", "IO.popen"); - argv[0] = cmd; - return rb_io_s_popen(argc, argv, rb_cIO); - } + argv[0] = tmp; } } } @@ -8233,12 +8323,19 @@ rb_f_open(int argc, VALUE *argv, VALUE _) return rb_io_s_open(argc, argv, rb_cFile); } -static VALUE rb_io_open_generic(VALUE, VALUE, int, int, const struct rb_io_encoding *, mode_t); +static VALUE +rb_io_open_generic(VALUE klass, VALUE filename, int oflags, enum rb_io_mode fmode, + const struct rb_io_encoding *convconfig, mode_t perm) +{ + return rb_file_open_generic(io_alloc(klass), filename, + oflags, fmode, convconfig, perm); +} static VALUE rb_io_open(VALUE io, VALUE filename, VALUE vmode, VALUE vperm, VALUE opt) { - int oflags, fmode; + int oflags; + enum rb_io_mode fmode; struct rb_io_encoding convconfig; mode_t perm; @@ -8248,22 +8345,6 @@ rb_io_open(VALUE io, VALUE filename, VALUE vmode, VALUE vperm, VALUE opt) } static VALUE -rb_io_open_generic(VALUE klass, VALUE filename, int oflags, int fmode, - const struct rb_io_encoding *convconfig, mode_t perm) -{ - VALUE cmd; - if (klass == rb_cIO && !NIL_P(cmd = check_pipe_command(filename))) { - // TODO: when removed in 4.0, update command_injection.rdoc - rb_warn_deprecated_to_remove_at(4.0, "IO process creation with a leading '|'", "IO.popen"); - return pipe_open_s(cmd, rb_io_oflags_modestr(oflags), fmode, convconfig); - } - else { - return rb_file_open_generic(io_alloc(klass), filename, - oflags, fmode, convconfig, perm); - } -} - -static VALUE io_reopen(VALUE io, VALUE nfile) { rb_io_t *fptr, *orig; @@ -8290,7 +8371,7 @@ io_reopen(VALUE io, VALUE nfile) rb_sys_fail_on_write(fptr); } else { - flush_before_seek(fptr); + flush_before_seek(fptr, true); } if (orig->mode & FMODE_READABLE) { pos = io_tell(orig); @@ -8302,6 +8383,7 @@ io_reopen(VALUE io, VALUE nfile) /* copy rb_io_t structure */ fptr->mode = orig->mode | (fptr->mode & FMODE_EXTERNAL); + fptr->encs = orig->encs; fptr->pid = orig->pid; fptr->lineno = orig->lineno; if (RTEST(orig->pathv)) fptr->pathv = orig->pathv; @@ -8311,6 +8393,10 @@ io_reopen(VALUE io, VALUE nfile) fd = fptr->fd; fd2 = orig->fd; if (fd != fd2) { + // Interrupt all usage of the old file descriptor: + rb_thread_io_close_interrupt(fptr); + rb_thread_io_close_wait(fptr); + if (RUBY_IO_EXTERNAL_P(fptr) || fd <= 2 || !fptr->stdio_file) { /* need to keep FILE objects of stdin, stdout and stderr */ if (rb_cloexec_dup2(fd2, fd) < 0) @@ -8326,7 +8412,7 @@ io_reopen(VALUE io, VALUE nfile) rb_update_max_fd(fd); fptr->fd = fd; } - rb_thread_fd_close(fd); + if ((orig->mode & FMODE_READABLE) && pos >= 0) { if (io_seek(fptr, pos, SEEK_SET) < 0 && errno) { rb_sys_fail_path(fptr->pathv); @@ -8415,7 +8501,7 @@ rb_io_reopen(int argc, VALUE *argv, VALUE file) } if (!NIL_P(nmode) || !NIL_P(opt)) { - int fmode; + enum rb_io_mode fmode; struct rb_io_encoding convconfig; rb_io_extract_modeenc(&nmode, 0, opt, &oflags, &fmode, &convconfig); @@ -8503,6 +8589,12 @@ rb_io_init_copy(VALUE dest, VALUE io) fptr->pid = orig->pid; fptr->lineno = orig->lineno; fptr->timeout = orig->timeout; + + ccan_list_head_init(&fptr->blocking_operations); + fptr->closing_ec = NULL; + fptr->wakeup_mutex = Qnil; + fptr->fork_generation = GET_VM()->fork_gen; + if (!NIL_P(orig->pathv)) fptr->pathv = orig->pathv; fptr_copy_finalizer(fptr, orig); @@ -8532,7 +8624,7 @@ rb_io_init_copy(VALUE dest, VALUE io) * Formats and writes +objects+ to the stream. * * For details on +format_string+, see - * {Format Specifications}[rdoc-ref:format_specifications.rdoc]. + * {Format Specifications}[rdoc-ref:language/format_specifications.rdoc]. * */ @@ -8553,7 +8645,7 @@ rb_io_printf(int argc, const VALUE *argv, VALUE out) * io.write(sprintf(format_string, *objects)) * * For details on +format_string+, see - * {Format Specifications}[rdoc-ref:format_specifications.rdoc]. + * {Format Specifications}[rdoc-ref:language/format_specifications.rdoc]. * * With the single argument +format_string+, formats +objects+ into the string, * then writes the formatted string to $stdout: @@ -8596,12 +8688,19 @@ rb_f_printf(int argc, VALUE *argv, VALUE _) return Qnil; } +extern void rb_deprecated_str_setter(VALUE val, ID id, VALUE *var); + static void -deprecated_str_setter(VALUE val, ID id, VALUE *var) +deprecated_rs_setter(VALUE val, ID id, VALUE *var) { - rb_str_setter(val, id, &val); + rb_deprecated_str_setter(val, id, &val); if (!NIL_P(val)) { - rb_warn_deprecated("`%s'", NULL, rb_id2name(id)); + if (rb_str_equal(val, rb_default_rs)) { + val = rb_default_rs; + } + else { + val = rb_str_frozen_bare_string(val); + } } *var = val; } @@ -9221,6 +9320,11 @@ rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE t io->timeout = timeout; + ccan_list_head_init(&io->blocking_operations); + io->closing_ec = NULL; + io->wakeup_mutex = Qnil; + io->fork_generation = GET_VM()->fork_gen; + if (encoding) { io->encs = *encoding; } @@ -9231,14 +9335,30 @@ rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE t } static VALUE -prep_io(int fd, int fmode, VALUE klass, const char *path) +prep_io(int fd, enum rb_io_mode fmode, VALUE klass, const char *path) { VALUE path_value = Qnil; + rb_encoding *e; + struct rb_io_encoding convconfig; + if (path) { path_value = rb_obj_freeze(rb_str_new_cstr(path)); } - VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL); + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + + VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig); rb_io_t*io = RFILE(self)->fptr; if (!io_check_tty(io)) { @@ -9261,7 +9381,7 @@ rb_io_fdopen(int fd, int oflags, const char *path) } static VALUE -prep_stdio(FILE *f, int fmode, VALUE klass, const char *path) +prep_stdio(FILE *f, enum rb_io_mode fmode, VALUE klass, const char *path) { rb_io_t *fptr; VALUE io = prep_io(fileno(f), fmode|FMODE_EXTERNAL|DEFAULT_TEXTMODE, klass, path); @@ -9344,6 +9464,10 @@ rb_io_fptr_new(void) fp->encs.ecopts = Qnil; fp->write_lock = Qnil; fp->timeout = Qnil; + ccan_list_head_init(&fp->blocking_operations); + fp->closing_ec = NULL; + fp->wakeup_mutex = Qnil; + fp->fork_generation = GET_VM()->fork_gen; return fp; } @@ -9364,6 +9488,8 @@ rb_io_make_open_file(VALUE obj) return fp; } +static VALUE io_initialize(VALUE io, VALUE fnum, VALUE vmode, VALUE opt); + /* * call-seq: * IO.new(fd, mode = 'r', **opts) -> io @@ -9383,7 +9509,8 @@ rb_io_make_open_file(VALUE obj) * The new \IO object does not inherit encoding * (because the integer file descriptor does not have an encoding): * - * fd = IO.sysopen('t.rus', 'rb') + * File.read('t.ja') # => "こんにちは" + * fd = IO.sysopen('t.ja', 'rb') * io = IO.new(fd) * io.external_encoding # => #<Encoding:UTF-8> # Not ASCII-8BIT. * @@ -9409,18 +9536,25 @@ static VALUE rb_io_initialize(int argc, VALUE *argv, VALUE io) { VALUE fnum, vmode; + VALUE opt; + + rb_scan_args(argc, argv, "11:", &fnum, &vmode, &opt); + return io_initialize(io, fnum, vmode, opt); +} + +static VALUE +io_initialize(VALUE io, VALUE fnum, VALUE vmode, VALUE opt) +{ rb_io_t *fp; - int fd, fmode, oflags = O_RDONLY; + int fd, oflags = O_RDONLY; + enum rb_io_mode fmode; struct rb_io_encoding convconfig; - VALUE opt; #if defined(HAVE_FCNTL) && defined(F_GETFL) int ofmode; #else struct stat st; #endif - - argc = rb_scan_args(argc, argv, "11:", &fnum, &vmode, &opt); rb_io_extract_modeenc(&vmode, 0, opt, &oflags, &fmode, &convconfig); fd = NUM2INT(fnum); @@ -9465,6 +9599,10 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io) fp->encs = convconfig; fp->pathv = path; fp->timeout = Qnil; + ccan_list_head_init(&fp->blocking_operations); + fp->closing_ec = NULL; + fp->wakeup_mutex = Qnil; + fp->fork_generation = GET_VM()->fork_gen; clear_codeconv(fp); io_check_tty(fp); if (fileno(stdin) == fd) @@ -9569,17 +9707,16 @@ rb_file_initialize(int argc, VALUE *argv, VALUE io) if (RFILE(io)->fptr) { rb_raise(rb_eRuntimeError, "reinitializing File"); } - if (0 < argc && argc < 3) { - VALUE fd = rb_check_to_int(argv[0]); + VALUE fname, vmode, vperm, opt; + int posargc = rb_scan_args(argc, argv, "12:", &fname, &vmode, &vperm, &opt); + if (posargc < 3) { /* perm is File only */ + VALUE fd = rb_check_to_int(fname); if (!NIL_P(fd)) { - argv[0] = fd; - return rb_io_initialize(argc, argv, io); + return io_initialize(io, fd, vmode, opt); } } - rb_open_file(argc, argv, io); - - return io; + return rb_open_file(io, fname, vmode, vperm, opt); } /* :nodoc: */ @@ -9695,7 +9832,7 @@ io_wait_readable(int argc, VALUE *argv, VALUE io) rb_io_t *fptr; RB_IO_POINTER(io, fptr); - rb_io_check_readable(fptr); + rb_io_check_char_readable(fptr); if (rb_io_read_pending(fptr)) return Qtrue; @@ -9742,7 +9879,7 @@ io_wait_priority(int argc, VALUE *argv, VALUE io) rb_io_t *fptr = NULL; RB_IO_POINTER(io, fptr); - rb_io_check_readable(fptr); + rb_io_check_char_readable(fptr); if (rb_io_read_pending(fptr)) return Qtrue; @@ -9799,7 +9936,7 @@ io_event_from_value(VALUE value) /* * call-seq: * io.wait(events, timeout) -> event mask, false or nil - * io.wait(timeout = nil, mode = :read) -> self, true, or false + * io.wait(*event_symbols[, timeout]) -> self, true, or false * * Waits until the IO becomes ready for the specified events and returns the * subset of events that become ready, or a falsy value when times out. @@ -9807,10 +9944,14 @@ io_event_from_value(VALUE value) * The events can be a bit mask of +IO::READABLE+, +IO::WRITABLE+ or * +IO::PRIORITY+. * - * Returns an event mask (truthy value) immediately when buffered data is available. + * Returns an event mask (truthy value) immediately when buffered data is + * available. * - * Optional parameter +mode+ is one of +:read+, +:write+, or - * +:read_write+. + * The second form: if one or more event symbols (+:read+, +:write+, or + * +:read_write+) are passed, the event mask is the bit OR of the bitmask + * corresponding to those symbols. In this form, +timeout+ is optional, the + * order of the arguments is arbitrary, and returns +io+ if any of the + * events is ready. */ static VALUE @@ -9820,10 +9961,6 @@ io_wait(int argc, VALUE *argv, VALUE io) enum rb_io_event events = 0; int return_io = 0; - // The documented signature for this method is actually incorrect. - // A single timeout is allowed in any position, and multiple symbols can be given. - // Whether this is intentional or not, I don't know, and as such I consider this to - // be a legacy/slow path. if (argc != 2 || (RB_SYMBOL_P(argv[0]) || RB_SYMBOL_P(argv[1]))) { // We'd prefer to return the actual mask, but this form would return the io itself: return_io = 1; @@ -9869,14 +10006,14 @@ io_wait(int argc, VALUE *argv, VALUE io) } static void -argf_mark(void *ptr) +argf_mark_and_move(void *ptr) { struct argf *p = ptr; - rb_gc_mark(p->filename); - rb_gc_mark(p->current_file); - rb_gc_mark(p->argv); - rb_gc_mark(p->inplace); - rb_gc_mark(p->encs.ecopts); + rb_gc_mark_and_move(&p->filename); + rb_gc_mark_and_move(&p->current_file); + rb_gc_mark_and_move(&p->argv); + rb_gc_mark_and_move(&p->inplace); + rb_gc_mark_and_move(&p->encs.ecopts); } static size_t @@ -9889,17 +10026,17 @@ argf_memsize(const void *ptr) static const rb_data_type_t argf_type = { "ARGF", - {argf_mark, RUBY_TYPED_DEFAULT_FREE, argf_memsize}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + {argf_mark_and_move, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_mark_and_move}, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static inline void -argf_init(struct argf *p, VALUE v) +argf_init(VALUE argf, struct argf *p, VALUE v) { p->filename = Qnil; p->current_file = Qnil; p->lineno = 0; - p->argv = v; + RB_OBJ_WRITE(argf, &p->argv, v); } static VALUE @@ -9908,7 +10045,7 @@ argf_alloc(VALUE klass) struct argf *p; VALUE argf = TypedData_Make_Struct(klass, struct argf, &argf_type, p); - argf_init(p, Qnil); + argf_init(argf, p, Qnil); return argf; } @@ -9919,7 +10056,7 @@ static VALUE argf_initialize(VALUE argf, VALUE argv) { memset(&ARGF, 0, sizeof(ARGF)); - argf_init(&ARGF, argv); + argf_init(argf, &ARGF, argv); return argf; } @@ -9930,7 +10067,8 @@ argf_initialize_copy(VALUE argf, VALUE orig) { if (!OBJ_INIT_COPY(argf, orig)) return argf; ARGF = argf_of(orig); - ARGF.argv = rb_obj_dup(ARGF.argv); + rb_gc_writebarrier_remember(argf); + ARGF_SET(argv, rb_obj_dup(ARGF.argv)); return argf; } @@ -10015,7 +10153,7 @@ argf_next_argv(VALUE argf) char *fn; rb_io_t *fptr; int stdout_binmode = 0; - int fmode; + enum rb_io_mode fmode; VALUE r_stdout = rb_ractor_stdout(); @@ -10049,11 +10187,11 @@ argf_next_argv(VALUE argf) if (RARRAY_LEN(ARGF.argv) > 0) { VALUE filename = rb_ary_shift(ARGF.argv); FilePathValue(filename); - ARGF.filename = filename; + ARGF_SET(filename, filename); filename = rb_str_encode_ospath(filename); fn = StringValueCStr(filename); if (RSTRING_LEN(filename) == 1 && fn[0] == '-') { - ARGF.current_file = rb_stdin; + ARGF_SET(current_file, rb_stdin); if (ARGF.inplace) { rb_warn("Can't do inplace edit for stdio; skipping"); goto retry; @@ -10148,7 +10286,7 @@ argf_next_argv(VALUE argf) if (!ARGF.binmode) { fmode |= DEFAULT_TEXTMODE; } - ARGF.current_file = prep_io(fr, fmode, rb_cFile, fn); + ARGF_SET(current_file, prep_io(fr, fmode, rb_cFile, fn)); if (!NIL_P(write_io)) { rb_io_set_write_io(ARGF.current_file, write_io); } @@ -10177,8 +10315,8 @@ argf_next_argv(VALUE argf) } } else if (ARGF.next_p == -1) { - ARGF.current_file = rb_stdin; - ARGF.filename = rb_str_new2("-"); + ARGF_SET(current_file, rb_stdin); + ARGF_SET(filename, rb_str_new2("-")); if (ARGF.inplace) { rb_warn("Can't do inplace edit for stdio"); rb_ractor_stdout_set(orig_stdout); @@ -10446,8 +10584,9 @@ static VALUE argf_readlines(int, VALUE *, VALUE); * $cat t.txt | ruby -e "p readlines 12" * ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] * - * With arguments +sep+ and +limit+ given, combines the two behaviors; - * see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]. + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -10519,14 +10658,14 @@ argf_readlines(int argc, VALUE *argv, VALUE argf) * sets global variable <tt>$?</tt> to the process status. * * This method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. + * see {Command Injection}[rdoc-ref:security/command_injection.rdoc]. * * Examples: * * $ `date` # => "Wed Apr 9 08:56:30 CDT 2003\n" * $ `echo oops && exit 99` # => "oops\n" * $ $? # => #<Process::Status: pid 17088 exit 99> - * $ $?.status # => 99> + * $ $?.exitstatus # => 99 * * The built-in syntax <tt>%x{...}</tt> uses this method. * @@ -10539,7 +10678,7 @@ rb_f_backquote(VALUE obj, VALUE str) VALUE result; rb_io_t *fptr; - SafeStringValue(str); + StringValue(str); rb_last_status_clear(); port = pipe_open_s(str, "r", FMODE_READABLE|DEFAULT_TEXTMODE, NULL); if (NIL_P(port)) return rb_str_new(0,0); @@ -10630,9 +10769,9 @@ select_internal(VALUE read, VALUE write, VALUE except, struct timeval *tp, rb_fd if (!pending && n == 0) return Qnil; /* returns nil on timeout */ res = rb_ary_new2(3); - rb_ary_push(res, rp?rb_ary_new():rb_ary_new2(0)); - rb_ary_push(res, wp?rb_ary_new():rb_ary_new2(0)); - rb_ary_push(res, ep?rb_ary_new():rb_ary_new2(0)); + rb_ary_push(res, rp ? rb_ary_new_capa(RARRAY_LEN(read)) : rb_ary_new()); + rb_ary_push(res, wp ? rb_ary_new_capa(RARRAY_LEN(write)) : rb_ary_new()); + rb_ary_push(res, ep ? rb_ary_new_capa(RARRAY_LEN(except)) : rb_ary_new()); if (rp) { list = RARRAY_AREF(res, 0); @@ -10782,7 +10921,7 @@ do_io_advise(rb_io_t *fptr, VALUE advice, rb_off_t offset, rb_off_t len) ias.offset = offset; ias.len = len; - rv = (int)rb_thread_io_blocking_region(io_advise_internal, &ias, fptr->fd); + rv = (int)rb_io_blocking_region(fptr, io_advise_internal, &ias); if (rv && rv != ENOSYS) { /* posix_fadvise(2) doesn't set errno. On success it returns 0; otherwise it returns the error code. */ @@ -10820,7 +10959,7 @@ advice_arg_check(VALUE advice) * advise(advice, offset = 0, len = 0) -> nil * * Invokes Posix system call - * {posix_fadvise(2)}[https://linux.die.net/man/2/posix_fadvise], + * {posix_fadvise(2)}[https://man7.org/linux/man-pages/man2/posix_fadvise.2.html], * which announces an intention to access data from the current file * in a particular manner. * @@ -10872,11 +11011,21 @@ rb_io_advise(int argc, VALUE *argv, VALUE io) #endif } +static int +is_pos_inf(VALUE x) +{ + double f; + if (!RB_FLOAT_TYPE_P(x)) + return 0; + f = RFLOAT_VALUE(x); + return isinf(f) && 0 < f; +} + /* * call-seq: * IO.select(read_ios, write_ios = [], error_ios = [], timeout = nil) -> array or nil * - * Invokes system call {select(2)}[https://linux.die.net/man/2/select], + * Invokes system call {select(2)}[https://man7.org/linux/man-pages/man2/select.2.html], * which monitors multiple file descriptors, * waiting until one or more of the file descriptors * becomes ready for some class of I/O operation. @@ -10886,7 +11035,10 @@ rb_io_advise(int argc, VALUE *argv, VALUE io) * Each of the arguments +read_ios+, +write_ios+, and +error_ios+ * is an array of IO objects. * - * Argument +timeout+ is an integer timeout interval in seconds. + * Argument +timeout+ is a numeric value (such as integer or float) timeout + * interval in seconds. + * +timeout+ can also be +nil+ or +Float::INFINITY+. + * +nil+ and +Float::INFINITY+ means no timeout. * * The method monitors the \IO objects given in all three arrays, * waiting for some to be ready; @@ -10960,7 +11112,7 @@ rb_io_advise(int argc, VALUE *argv, VALUE io) * Finally, Linux kernel developers don't guarantee that * readability of select(2) means readability of following read(2) even * for a single process; - * see {select(2)}[https://linux.die.net/man/2/select] + * see {select(2)}[https://man7.org/linux/man-pages/man2/select.2.html] * * Invoking \IO.select before IO#readpartial works well as usual. * However it is not the best way to use \IO.select. @@ -11037,7 +11189,7 @@ rb_f_select(int argc, VALUE *argv, VALUE obj) int i; rb_scan_args(argc, argv, "13", &args.read, &args.write, &args.except, &timeout); - if (NIL_P(timeout)) { + if (NIL_P(timeout) || is_pos_inf(timeout)) { args.timeout = 0; } else { @@ -11074,16 +11226,16 @@ nogvl_ioctl(void *ptr) } static int -do_ioctl(int fd, ioctl_req_t cmd, long narg) +do_ioctl(struct rb_io *io, ioctl_req_t cmd, long narg) { int retval; struct ioctl_arg arg; - arg.fd = fd; + arg.fd = io->fd; arg.cmd = cmd; arg.narg = narg; - retval = (int)rb_thread_io_blocking_region(nogvl_ioctl, &arg, fd); + retval = (int)rb_io_blocking_region(io, nogvl_ioctl, &arg); return retval; } @@ -11346,7 +11498,7 @@ rb_ioctl(VALUE io, VALUE req, VALUE arg) narg = setup_narg(cmd, &arg, ioctl_narg_len); GetOpenFile(io, fptr); - retval = do_ioctl(fptr->fd, cmd, narg); + retval = do_ioctl(fptr, cmd, narg); return finish_narg(retval, arg, fptr); } @@ -11354,7 +11506,7 @@ rb_ioctl(VALUE io, VALUE req, VALUE arg) * call-seq: * ioctl(integer_cmd, argument) -> integer * - * Invokes Posix system call {ioctl(2)}[https://linux.die.net/man/2/ioctl], + * Invokes Posix system call {ioctl(2)}[https://man7.org/linux/man-pages/man2/ioctl.2.html], * which issues a low-level command to an I/O device. * * Issues a low-level command to an I/O device. @@ -11400,16 +11552,16 @@ nogvl_fcntl(void *ptr) } static int -do_fcntl(int fd, int cmd, long narg) +do_fcntl(struct rb_io *io, int cmd, long narg) { int retval; struct fcntl_arg arg; - arg.fd = fd; + arg.fd = io->fd; arg.cmd = cmd; arg.narg = narg; - retval = (int)rb_thread_io_blocking_region(nogvl_fcntl, &arg, fd); + retval = (int)rb_io_blocking_region(io, nogvl_fcntl, &arg); if (retval != -1) { switch (cmd) { #if defined(F_DUPFD) @@ -11435,7 +11587,7 @@ rb_fcntl(VALUE io, VALUE req, VALUE arg) narg = setup_narg(cmd, &arg, fcntl_narg_len); GetOpenFile(io, fptr); - retval = do_fcntl(fptr->fd, cmd, narg); + retval = do_fcntl(fptr, cmd, narg); return finish_narg(retval, arg, fptr); } @@ -11443,7 +11595,7 @@ rb_fcntl(VALUE io, VALUE req, VALUE arg) * call-seq: * fcntl(integer_cmd, argument) -> integer * - * Invokes Posix system call {fcntl(2)}[https://linux.die.net/man/2/fcntl], + * Invokes Posix system call {fcntl(2)}[https://man7.org/linux/man-pages/man2/fcntl.2.html], * which provides a mechanism for issuing low-level commands to control or query * a file-oriented I/O stream. Arguments and results are platform * dependent. @@ -11473,7 +11625,7 @@ rb_io_fcntl(int argc, VALUE *argv, VALUE io) * call-seq: * syscall(integer_callno, *arguments) -> integer * - * Invokes Posix system call {syscall(2)}[https://linux.die.net/man/2/syscall], + * Invokes Posix system call {syscall(2)}[https://man7.org/linux/man-pages/man2/syscall.2.html], * which calls a specified function. * * Calls the operating system function identified by +integer_callno+; @@ -11545,7 +11697,7 @@ rb_f_syscall(int argc, VALUE *argv, VALUE _) VALUE v = rb_check_string_type(argv[i]); if (!NIL_P(v)) { - SafeStringValue(v); + StringValue(v); rb_str_modify(v); arg[i] = (VALUE)StringValueCStr(v); } @@ -11783,7 +11935,7 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass) VALUE opt; rb_io_t *fptr, *fptr2; struct io_encoding_set_args ies_args; - int fmode = 0; + enum rb_io_mode fmode = 0; VALUE ret; argc = rb_scan_args(argc, argv, "02:", &v1, &v2, &opt); @@ -11918,10 +12070,6 @@ io_s_foreach(VALUE v) * * Calls the block with each successive line read from the stream. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * * The first argument must be a string that is the path to a file. * * With only argument +path+ given, parses lines from the file at the given +path+, @@ -11958,7 +12106,7 @@ io_s_foreach(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]): * * File.foreach('t.txt', 7) {|line| p line } * @@ -11974,10 +12122,9 @@ io_s_foreach(VALUE v) * "Fourth l" * "line\n" * - * With arguments +sep+ and +limit+ given, - * parses lines as determined by the given - * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword arguments +opts+ specify: * @@ -12022,10 +12169,6 @@ io_s_readlines(VALUE v) * * Returns an array of all lines read from the stream. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * * The first argument must be a string that is the path to a file. * * With only argument +path+ given, parses lines from the file at the given +path+, @@ -12050,15 +12193,14 @@ io_s_readlines(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]: * * IO.readlines('t.txt', 7) * # => ["First l", "ine\n", "Second ", "line\n", "\n", "Third l", "ine\n", "Fourth ", "line\n"] * - * With arguments +sep+ and +limit+ given, - * parses lines as determined by the given - * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword arguments +opts+ specify: * @@ -12112,17 +12254,17 @@ seek_before_access(VALUE argp) * Opens the stream, reads and returns some or all of its content, * and closes the stream; returns +nil+ if no bytes were read. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * * The first argument must be a string that is the path to a file. * * With only argument +path+ given, reads in text mode and returns the entire content * of the file at the given path: * - * IO.read('t.txt') - * # => "First line\nSecond line\n\nThird line\nFourth line\n" + * File.read('t.txt') + * # => "First line\nSecond line\n\nFourth line\nFifth line\n" + * File.read('t.ja') + * # => "こんにちは" + * File.read('t.dat') + * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" * * On Windows, text mode can terminate reading and leave bytes in the file * unread when encountering certain special bytes. Consider using @@ -12130,15 +12272,36 @@ seek_before_access(VALUE argp) * * With argument +length+, returns +length+ bytes if available: * - * IO.read('t.txt', 7) # => "First l" - * IO.read('t.txt', 700) + * File.read('t.txt', 7) + * # => "First l" + * File.read('t.ja', 7) + * # => "\xE3\x81\x93\xE3\x82\x93\xE3" + * File.read('t.dat', 7) + * # => "\xFE\xFF\x99\x90\x99\x91\x99" + * + * Returns all bytes if +length+ is larger than the files size: + * + * File.read('t.txt', 700) * # => "First line\r\nSecond line\r\n\r\nFourth line\r\nFifth line\r\n" + * File.read('t.ja', 700) + * # => "\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF" + * File.read('t.dat', 700) + * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" * * With arguments +length+ and +offset+, returns +length+ bytes * if available, beginning at the given +offset+: * - * IO.read('t.txt', 10, 2) # => "rst line\nS" - * IO.read('t.txt', 10, 200) # => nil + * File.read('t.txt', 10, 2) + * # => "rst line\r\n" + * File.read('t.ja', 10, 2) + * # => "\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1" + * File.read('t.dat', 10, 2) + * # => "\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" + * + * Returns +nil+ if +offset+ is past the end of the stream: + * + * File.read('t.txt', 10, 200) + * # => nil * * Optional keyword arguments +opts+ specify: * @@ -12183,10 +12346,6 @@ rb_io_s_read(int argc, VALUE *argv, VALUE io) * Behaves like IO.read, except that the stream is opened in binary mode * with ASCII-8BIT encoding. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * */ static VALUE @@ -12194,8 +12353,8 @@ rb_io_s_binread(int argc, VALUE *argv, VALUE io) { VALUE offset; struct foreach_arg arg; + enum rb_io_mode fmode = FMODE_READABLE|FMODE_BINMODE; enum { - fmode = FMODE_READABLE|FMODE_BINMODE, oflags = O_RDONLY #ifdef O_BINARY |O_BINARY @@ -12282,40 +12441,50 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary) /* * call-seq: - * IO.write(path, data, offset = 0, **opts) -> integer + * IO.write(path, data, offset = 0, **opts) -> nonnegative_integer * * Opens the stream, writes the given +data+ to it, * and closes the stream; returns the number of bytes written. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * * The first argument must be a string that is the path to a file. * - * With only argument +path+ given, writes the given +data+ to the file at that path: + * With only arguments +path+ and +data+ given, + * writes the given data to the file at that path: + * + * path = 't.tmp' + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") # => 47 + * File.write(path, 'こんにちは') # => 15 + * File.write(path, "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94") # => 12 + * + * When +offset+ is zero (the default), the entire file content is overwritten: * - * IO.write('t.tmp', 'abc') # => 3 - * File.read('t.tmp') # => "abc" + * File.read(path) # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" + * File.write(path, 'foo') + * File.read(path) # => "foo" * - * If +offset+ is zero (the default), the file is overwritten: + * When +offset+ in within the file content, the file content is partly overwritten, + * beginning at byte +offset+: * - * IO.write('t.tmp', 'A') # => 1 - * File.read('t.tmp') # => "A" + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") + * File.write(path, 'LINE', 6) + * File.read(path) # => "First LINE\nSecond line\n\nFourth line\nFifth line\n" * - * If +offset+ in within the file content, the file is partly overwritten: + * When the file contains multi-byte characters, + * the effect of writing may disturb some characters: * - * IO.write('t.tmp', 'abcdef') # => 3 - * File.read('t.tmp') # => "abcdef" - * # Offset within content. - * IO.write('t.tmp', '012', 2) # => 3 - * File.read('t.tmp') # => "ab012f" + * File.write(path, "こんにちは") + * File.write(path, 'FOO', 3) # Replace one 3-byte character. + * File.read(path) # => "こFOOにちは" + * File.write(path, 'BAR', 7) # Replace bytes in two different 3-byte characters. + * File.read(path) # => "こFOO\xE3BAR\x81\xA1は" * * If +offset+ is outside the file content, * the file is padded with null characters <tt>"\u0000"</tt>: * - * IO.write('t.tmp', 'xyz', 10) # => 3 - * File.read('t.tmp') # => "ab012f\u0000\u0000\u0000\u0000xyz" + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") + * File.write(path, 'FOO', 55) + * File.read(path) + * # => "First line\nSecond line\n\nFourth line\nFifth line\n\u0000\u0000\u0000FOO" * * Optional keyword arguments +opts+ specify: * @@ -12332,15 +12501,11 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io) /* * call-seq: - * IO.binwrite(path, string, offset = 0) -> integer + * IO.binwrite(path, string, offset = 0, **opts) -> integer * * Behaves like IO.write, except that the stream is opened in binary mode * with ASCII-8BIT encoding. * - * When called from class \IO (but not subclasses of \IO), - * this method has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. - * */ static VALUE @@ -13055,6 +13220,7 @@ copy_stream_fallback_body(VALUE arg) while (1) { long numwrote; long l; + rb_str_make_independent(buf); if (stp->copy_length < (rb_off_t)0) { l = buflen; } @@ -13208,7 +13374,7 @@ copy_stream_body(VALUE arg) rb_str_resize(str,len); read_buffered_data(RSTRING_PTR(str), len, stp->src_fptr); if (stp->dst_fptr) { /* IO or filename */ - if (io_binwrite(str, RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0) + if (io_binwrite(RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0) rb_sys_fail_on_write(stp->dst_fptr); } else /* others such as StringIO */ @@ -13230,7 +13396,7 @@ copy_stream_body(VALUE arg) return copy_stream_fallback(stp); } - rb_thread_call_without_gvl(nogvl_copy_stream_func, (void*)stp, RUBY_UBF_IO, 0); + IO_WITHOUT_GVL(nogvl_copy_stream_func, stp); return Qnil; } @@ -13558,6 +13724,7 @@ argf_set_encoding(int argc, VALUE *argv, VALUE argf) rb_io_set_encoding(argc, argv, ARGF.current_file); GetOpenFile(ARGF.current_file, fptr); ARGF.encs = fptr->encs; + RB_OBJ_WRITTEN(argf, Qundef, ARGF.encs.ecopts); return argf; } @@ -14490,7 +14657,7 @@ argf_inplace_mode_set(VALUE argf, VALUE val) ARGF.inplace = Qnil; } else { - ARGF.inplace = rb_str_new_frozen(val); + ARGF_SET(inplace, rb_str_new_frozen(val)); } return argf; } @@ -14504,7 +14671,7 @@ opt_i_set(VALUE val, ID id, VALUE *var) void ruby_set_inplace_mode(const char *suffix) { - ARGF.inplace = !suffix ? Qfalse : !*suffix ? Qnil : rb_str_new(suffix, strlen(suffix)); + ARGF_SET(inplace, !suffix ? Qfalse : !*suffix ? Qnil : rb_str_new(suffix, strlen(suffix))); } /* @@ -14557,14 +14724,14 @@ argf_write_io(VALUE argf) /* * call-seq: - * ARGF.write(string) -> integer + * ARGF.write(*objects) -> integer * - * Writes _string_ if inplace mode. + * Writes each of the given +objects+ if inplace mode. */ static VALUE -argf_write(VALUE argf, VALUE str) +argf_write(int argc, VALUE *argv, VALUE argf) { - return rb_io_write(argf_write_io(argf), str); + return rb_io_writev(argf_write_io(argf), argc, argv); } void @@ -14670,55 +14837,259 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) /* * Document-class: ARGF * - * ARGF is a stream designed for use in scripts that process files given as - * command-line arguments or passed in via STDIN. + * == \ARGF and +ARGV+ + * + * The \ARGF object works with the array at global variable +ARGV+ + * to make <tt>$stdin</tt> and file streams available in the Ruby program: + * + * - **ARGV** may be thought of as the <b>argument vector</b> array. + * + * Initially, it contains the command-line arguments and options + * that are passed to the Ruby program; + * the program can modify that array as it likes. + * + * - **ARGF** may be thought of as the <b>argument files</b> object. + * + * It can access file streams and/or the <tt>$stdin</tt> stream, + * based on what it finds in +ARGV+. + * This provides a convenient way for the command line + * to specify streams for a Ruby program to read. + * + * == Reading + * + * \ARGF may read from _source_ streams, + * which at any particular time are determined by the content of +ARGV+. + * + * === Simplest Case + * + * When the <i>very first</i> \ARGF read occurs with an empty +ARGV+ (<tt>[]</tt>), + * the source is <tt>$stdin</tt>: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.read', ARGF.read] + * + * - Commands and outputs + * (see below for the content of files +foo.txt+ and +bar.txt+): + * + * $ echo "Open the pod bay doors, Hal." | ruby t.rb + * ["ARGV", []] + * ["ARGF.read", "Open the pod bay doors, Hal.\n"] + * + * $ cat foo.txt bar.txt | ruby t.rb + * ["ARGV", []] + * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"] + * + * === About the Examples + * + * Many examples here assume the existence of files +foo.txt+ and +bar.txt+: + * + * $ cat foo.txt + * Foo 0 + * Foo 1 + * $ cat bar.txt + * Bar 0 + * Bar 1 + * Bar 2 + * Bar 3 + * + * === Sources in +ARGV+ + * + * For any \ARGF read _except_ the {simplest case}[rdoc-ref:ARGF@Simplest+Case] + * (that is, _except_ for the <i>very first</i> \ARGF read with an empty +ARGV+), + * the sources are found in +ARGV+. + * + * \ARGF assumes that each element in array +ARGV+ is a potential source, + * and is one of: + * + * - The string path to a file that may be opened as a stream. + * - The character <tt>'-'</tt>, meaning stream <tt>$stdin</tt>. * - * The arguments passed to your script are stored in the +ARGV+ Array, one - * argument per element. ARGF assumes that any arguments that aren't - * filenames have been removed from +ARGV+. For example: + * Each element that is _not_ one of these + * should be removed from +ARGV+ before \ARGF accesses that source. * - * $ ruby argf.rb --verbose file1 file2 + * In the following example: * - * ARGV #=> ["--verbose", "file1", "file2"] - * option = ARGV.shift #=> "--verbose" - * ARGV #=> ["file1", "file2"] + * - Filepaths +foo.txt+ and +bar.txt+ may be retained as potential sources. + * - Options <tt>--xyzzy</tt> and <tt>--mojo</tt> should be removed. * - * You can now use ARGF to work with a concatenation of each of these named - * files. For instance, ARGF.read will return the contents of _file1_ - * followed by the contents of _file2_. + * Example: * - * After a file in +ARGV+ has been read ARGF removes it from the Array. - * Thus, after all files have been read +ARGV+ will be empty. + * - \File +t.rb+: * - * You can manipulate +ARGV+ yourself to control what ARGF operates on. If - * you remove a file from +ARGV+, it is ignored by ARGF; if you add files to - * +ARGV+, they are treated as if they were named on the command line. For - * example: + * # Print arguments (and options, if any) found on command line. + * p ['ARGV', ARGV] * - * ARGV.replace ["file1"] - * ARGF.readlines # Returns the contents of file1 as an Array - * ARGV #=> [] - * ARGV.replace ["file2", "file3"] - * ARGF.read # Returns the contents of file2 and file3 + * - Command and output: * - * If +ARGV+ is empty, ARGF acts as if it contained <tt>"-"</tt> that - * makes ARGF read from STDIN, i.e. the data piped or typed to your - * script. For example: + * $ ruby t.rb --xyzzy --mojo foo.txt bar.txt + * ["ARGV", ["--xyzzy", "--mojo", "foo.txt", "bar.txt"]] * - * $ echo "glark" | ruby -e 'p ARGF.read' - * "glark\n" + * \ARGF's stream access considers the elements of +ARGV+, left to right: + * + * - \File +t.rb+: + * + * p "ARGV: #{ARGV}" + * p "Read: #{ARGF.read}" # Read everything from all specified streams. + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n" + * + * Because the value at +ARGV+ is an ordinary array, + * you can manipulate it to control which sources \ARGF considers: + * + * - If you remove an element from +ARGV+, \ARGF will not consider the corresponding source. + * - If you add an element to +ARGV+, \ARGF will consider the corresponding source. + * + * Each element in +ARGV+ is removed when its corresponding source is accessed; + * when all sources have been accessed, the array is empty: + * + * - \File +t.rb+: + * + * until ARGV.empty? && ARGF.eof? + * p "ARGV: #{ARGV}" + * p "Line: #{ARGF.readline}" # Read each line from each specified stream. + * end + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Line: Foo 0\n" + * "ARGV: [\"bar.txt\"]" + * "Line: Foo 1\n" + * "ARGV: [\"bar.txt\"]" + * "Line: Bar 0\n" + * "ARGV: []" + * "Line: Bar 1\n" + * "ARGV: []" + * "Line: Bar 2\n" + * "ARGV: []" + * "Line: Bar 3\n" + * + * ==== Filepaths in +ARGV+ + * + * The +ARGV+ array may contain filepaths the specify sources for \ARGF reading. + * + * This program prints what it reads from files at the paths specified + * on the command line: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * # Read and print all content from the specified sources. + * p ['ARGF.read', ARGF.read] + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * ["ARGV", [foo.txt, bar.txt] + * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"] + * + * ==== Specifying <tt>$stdin</tt> in +ARGV+ + * + * To specify stream <tt>$stdin</tt> in +ARGV+, us the character <tt>'-'</tt>: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.read', ARGF.read] + * + * - Command and output: + * + * $ echo "Open the pod bay doors, Hal." | ruby t.rb - + * ["ARGV", ["-"]] + * ["ARGF.read", "Open the pod bay doors, Hal.\n"] + * + * When no character <tt>'-'</tt> is given, stream <tt>$stdin</tt> is ignored. + * + * - Command and output: + * + * $ echo "Open the pod bay doors, Hal." | ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n" + * + * ==== Mixtures and Repetitions in +ARGV+ + * + * For an \ARGF reader, +ARGV+ may contain any mixture of filepaths + * and character <tt>'-'</tt>, including repetitions. + * + * ==== Modifications to +ARGV+ + * + * The running Ruby program may make any modifications to the +ARGV+ array; + * the current value of +ARGV+ affects \ARGF reading. + * + * ==== Empty +ARGV+ + * + * For an empty +ARGV+, an \ARGF read method either returns +nil+ + * or raises an exception, depending on the specific method. + * + * === More Read Methods + * + * As seen above, method ARGF#read reads the content of all sources + * into a single string. + * Other \ARGF methods provide other ways to access that content; + * these include: + * + * - Byte access: #each_byte, #getbyte, #readbyte. + * - Character access: #each_char, #getc, #readchar. + * - Codepoint access: #each_codepoint. + * - Line access: #each_line, #gets, #readline, #readlines. + * - Source access: #read, #read_nonblock, #readpartial. + * + * === About \Enumerable + * + * \ARGF includes module Enumerable. + * Virtually all methods in \Enumerable call method <tt>#each</tt> in the including class. + * + * <b>Note well</b>: In \ARGF, method #each returns data from the _sources_, + * _not_ from +ARGV+; + * therefore, for example, <tt>ARGF#entries</tt> returns an array of lines from the sources, + * not an array of the strings from +ARGV+: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.entries', ARGF.entries] + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * ["ARGV", ["foo.txt", "bar.txt"]] + * ["ARGF.entries", ["Foo 0\n", "Foo 1\n", "Bar 0\n", "Bar 1\n", "Bar 2\n", "Bar 3\n"]] + * + * == Writing + * + * If <i>inplace mode</i> is in effect, + * \ARGF may write to target streams, + * which at any particular time are determined by the content of ARGV. + * + * Methods about inplace mode: + * + * - #inplace_mode + * - #inplace_mode= + * - #to_write_io + * + * Methods for writing: + * + * - #print + * - #printf + * - #putc + * - #puts + * - #write * - * $ echo Glark > file1 - * $ echo "glark" | ruby -e 'p ARGF.read' -- - file1 - * "glark\nGlark\n" */ /* * An instance of class \IO (commonly called a _stream_) * represents an input/output stream in the underlying operating system. - * \Class \IO is the basis for input and output in Ruby. + * Class \IO is the basis for input and output in Ruby. * - * \Class File is the only class in the Ruby core that is a subclass of \IO. + * Class File is the only class in the Ruby core that is a subclass of \IO. * Some classes in the Ruby standard library are also subclasses of \IO; * these include TCPSocket and UDPSocket. * @@ -14727,7 +15098,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * found in ARGV (or found in STDIN if ARGV is empty). * ARGF is not itself a subclass of \IO. * - * \Class StringIO provides an IO-like stream that handles a String. + * Class StringIO provides an IO-like stream that handles a String. * StringIO is not itself a subclass of \IO. * * Important objects based on \IO include: @@ -14749,7 +15120,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Like a File stream, an \IO stream has: * * - A read/write mode, which may be read-only, write-only, or read/write; - * see {Read/Write Mode}[rdoc-ref:File@Read-2FWrite+Mode]. + * see {Read/Write Mode}[rdoc-ref:File@ReadWrite+Mode]. * - A data mode, which may be text-only or binary; * see {Data Mode}[rdoc-ref:File@Data+Mode]. * - Internal and external encodings; @@ -14793,7 +15164,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * - +:binmode+: If a truthy value, specifies the mode as binary, text-only otherwise. * - +:autoclose+: If a truthy value, specifies that the +fd+ will close * when the stream closes; otherwise it remains open. - * - +:path:+ If a string value is provided, it is used in #inspect and is available as + * - +:path+: If a string value is provided, it is used in #inspect and is available as * #path method. * * Also available are the options offered in String#encode, @@ -14815,6 +15186,9 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * A new stream has position zero (and line number zero); * method +rewind+ resets the position (and line number) to zero. * + * These methods discard {buffers}[rdoc-ref:IO@Buffering] and the + * Encoding::Converter instances used for that \IO. + * * The relevant methods: * * - IO#tell (aliased as +#pos+): Returns the current position (in bytes) in the stream. @@ -14862,56 +15236,64 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * == Line \IO * - * You can read an \IO stream line-by-line using these methods: + * Class \IO supports line-oriented + * {input}[rdoc-ref:IO@Line+Input] and {output}[rdoc-ref:IO@Line+Output] * - * - IO#each_line: Reads each remaining line, passing it to the given block. - * - IO#gets: Returns the next line. - * - IO#readline: Like #gets, but raises an exception at end-of-stream. - * - IO#readlines: Returns all remaining lines in an array. + * === Line Input + * + * Class \IO supports line-oriented input for + * {files}[rdoc-ref:IO@File+Line+Input] and {IO streams}[rdoc-ref:IO@Stream+Line+Input] + * + * ==== \File Line Input * - * Each of these reader methods accepts: + * You can read lines from a file using these methods: * - * - An optional line separator, +sep+; + * - IO.foreach: Reads each line and passes it to the given block. + * - IO.readlines: Reads and returns all lines in an array. + * + * For each of these methods: + * + * - You can specify {open options}[rdoc-ref:IO@Open+Options]. + * - Line parsing depends on the effective <i>line separator</i>; * see {Line Separator}[rdoc-ref:IO@Line+Separator]. - * - An optional line-size limit, +limit+; + * - The length of each returned line depends on the effective <i>line limit</i>; * see {Line Limit}[rdoc-ref:IO@Line+Limit]. * - * For each of these reader methods, reading may begin mid-line, - * depending on the stream's position; - * see {Position}[rdoc-ref:IO@Position]: + * ==== Stream Line Input * - * f = File.new('t.txt') - * f.pos = 27 - * f.each_line {|line| p line } - * f.close + * You can read lines from an \IO stream using these methods: * - * Output: - * - * "rth line\n" - * "Fifth line\n" + * - IO#each_line: Reads each remaining line, passing it to the given block. + * - IO#gets: Returns the next line. + * - IO#readline: Like #gets, but raises an exception at end-of-stream. + * - IO#readlines: Returns all remaining lines in an array. * - * You can write to an \IO stream line-by-line using this method: + * For each of these methods: * - * - IO#puts: Writes objects to the stream. + * - Reading may begin mid-line, + * depending on the stream's _position_; + * see {Position}[rdoc-ref:IO@Position]. + * - Line parsing depends on the effective <i>line separator</i>; + * see {Line Separator}[rdoc-ref:IO@Line+Separator]. + * - The length of each returned line depends on the effective <i>line limit</i>; + * see {Line Limit}[rdoc-ref:IO@Line+Limit]. * - * === Line Separator + * ===== Line Separator * - * Each of these methods uses a <i>line separator</i>, - * which is the string that delimits lines: + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] uses a <i>line separator</i>: + * the string that determines what is considered a line; + * it is sometimes called the <i>input record separator</i>. * - * - IO.foreach. - * - IO.readlines. - * - IO#each_line. - * - IO#gets. - * - IO#readline. - * - IO#readlines. + * The default line separator is taken from global variable <tt>$/</tt>, + * whose initial value is <tt>"\n"</tt>. * - * The default line separator is the given by the global variable <tt>$/</tt>, - * whose value is by default <tt>"\n"</tt>. - * The line to be read next is all data from the current position - * to the next line separator: + * Generally, the line to be read next is all data + * from the current {position}[rdoc-ref:IO@Position] + * to the next line separator + * (but see {Special Line Separator Values}[rdoc-ref:IO@Special+Line+Separator+Values]): * * f = File.new('t.txt') + * # Method gets with no sep argument returns the next line, according to $/. * f.gets # => "First line\n" * f.gets # => "Second line\n" * f.gets # => "\n" @@ -14919,7 +15301,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets # => "Fifth line\n" * f.close * - * You can specify a different line separator: + * You can use a different line separator by passing argument +sep+: * * f = File.new('t.txt') * f.gets('l') # => "First l" @@ -14928,15 +15310,27 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets # => "e\n" * f.close * - * There are two special line separators: + * Or by setting global variable <tt>$/</tt>: + * + * f = File.new('t.txt') + * $/ = 'l' + * f.gets # => "First l" + * f.gets # => "ine\nSecond l" + * f.gets # => "ine\n\nFourth l" + * f.close + * + * ===== Special Line Separator Values * - * - +nil+: The entire stream is read into a single string: + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] + * accepts two special values for parameter +sep+: + * + * - +nil+: The entire stream is to be read ("slurped") into a single string: * * f = File.new('t.txt') * f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n" * f.close * - * - <tt>''</tt> (the empty string): The next "paragraph" is read + * - <tt>''</tt> (the empty string): The next "paragraph" is to be read * (paragraphs being separated by two consecutive line separators): * * f = File.new('t.txt') @@ -14944,23 +15338,18 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets('') # => "Fourth line\nFifth line\n" * f.close * - * === Line Limit - * - * Each of these methods uses a <i>line limit</i>, - * which specifies that the number of bytes returned may not be (much) longer - * than the given +limit+; + * ===== Line Limit * - * - IO.foreach. - * - IO.readlines. - * - IO#each_line. - * - IO#gets. - * - IO#readline. - * - IO#readlines. + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] + * uses an integer <i>line limit</i>, + * which restricts the number of bytes that may be returned. + * (A multi-byte character will not be split, and so a returned line may be slightly longer + * than the limit). * - * A multi-byte character will not be split, and so a line may be slightly longer - * than the given limit. + * The default limit value is <tt>-1</tt>; + * any negative limit value means that there is no limit. * - * If +limit+ is not given, the line is determined only by +sep+. + * If there is no limit, the line is determined only by +sep+. * * # Text with 1-byte characters. * File.open('t.txt') {|f| f.gets(1) } # => "F" @@ -14972,30 +15361,29 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * File.open('t.txt') {|f| f.gets(11) } # => "First line\n" * File.open('t.txt') {|f| f.gets(12) } # => "First line\n" * - * # Text with 2-byte characters, which will not be split. - * File.open('t.rus') {|f| f.gets(1).size } # => 1 - * File.open('t.rus') {|f| f.gets(2).size } # => 1 - * File.open('t.rus') {|f| f.gets(3).size } # => 2 - * File.open('t.rus') {|f| f.gets(4).size } # => 2 + * # Text with 3-byte characters, which will not be split. + * File.read('t.ja') # => "こんにちは" + * File.open('t.ja') {|f| f.gets(1).size } # => 1 + * File.open('t.ja') {|f| f.gets(2).size } # => 1 + * File.open('t.ja') {|f| f.gets(3).size } # => 1 + * File.open('t.ja') {|f| f.gets(4).size } # => 2 + * File.open('t.ja') {|f| f.gets(5).size } # => 2 * - * === Line Separator and Line Limit + * ===== Line Separator and Line Limit * - * With arguments +sep+ and +limit+ given, - * combines the two behaviors: + * With arguments +sep+ and +limit+ given, combines the two behaviors: * * - Returns the next line as determined by line separator +sep+. - * - But returns no more bytes than are allowed by the limit. + * - But returns no more bytes than are allowed by the limit +limit+. * * Example: * * File.open('t.txt') {|f| f.gets('li', 20) } # => "First li" * File.open('t.txt') {|f| f.gets('li', 2) } # => "Fi" * - * === Line Number + * ===== Line Number * - * A readable \IO stream has a non-negative integer <i>line number</i>. - * - * The relevant methods: + * A readable \IO stream has a non-negative integer <i>line number</i>: * * - IO#lineno: Returns the line number. * - IO#lineno=: Resets and returns the line number. @@ -15003,7 +15391,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Unless modified by a call to method IO#lineno=, * the line number is the number of lines read * by certain line-oriented methods, - * according to the given line separator +sep+: + * according to the effective {line separator}[rdoc-ref:IO@Line+Separator]: * * - IO.foreach: Increments the line number on each call to the block. * - IO#each_line: Increments the line number on each call to the block. @@ -15093,6 +15481,12 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * $. # => 5 * f.close * + * === Line Output + * + * You can write to an \IO stream line-by-line using this method: + * + * - IO#puts: Writes objects to the stream. + * * == Character \IO * * You can process an \IO stream character-by-character using these methods: @@ -15103,6 +15497,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * - IO#putc: Writes a character to the stream. * - IO#each_char: Reads each remaining character in the stream, * passing the character to the given block. + * * == Byte \IO * * You can process an \IO stream byte-by-byte using these methods: @@ -15121,10 +15516,10 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * == What's Here * - * First, what's elsewhere. \Class \IO: + * First, what's elsewhere. Class \IO: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class \IO provides methods that are useful for: @@ -15388,18 +15783,21 @@ Init_IO(void) rb_define_method(rb_cIO, "initialize", rb_io_initialize, -1); rb_output_fs = Qnil; - rb_define_hooked_variable("$,", &rb_output_fs, 0, deprecated_str_setter); + rb_define_hooked_variable("$,", &rb_output_fs, 0, rb_deprecated_str_setter); rb_default_rs = rb_fstring_lit("\n"); /* avoid modifying RS_default */ - rb_gc_register_mark_object(rb_default_rs); + rb_vm_register_global_object(rb_default_rs); rb_rs = rb_default_rs; rb_output_rs = Qnil; - rb_define_hooked_variable("$/", &rb_rs, 0, deprecated_str_setter); - rb_define_hooked_variable("$-0", &rb_rs, 0, deprecated_str_setter); - rb_define_hooked_variable("$\\", &rb_output_rs, 0, deprecated_str_setter); + rb_define_hooked_variable("$/", &rb_rs, 0, deprecated_rs_setter); + rb_gvar_ractor_local("$/"); // not local but ractor safe + rb_define_hooked_variable("$-0", &rb_rs, 0, deprecated_rs_setter); + rb_gvar_ractor_local("$-0"); // not local but ractor safe + rb_define_hooked_variable("$\\", &rb_output_rs, 0, rb_deprecated_str_setter); rb_define_virtual_variable("$_", get_LAST_READ_LINE, set_LAST_READ_LINE); rb_gvar_ractor_local("$_"); + rb_gvar_box_dynamic("$_"); rb_define_method(rb_cIO, "initialize_copy", rb_io_init_copy, 1); rb_define_method(rb_cIO, "reopen", rb_io_reopen, -1); @@ -15585,7 +15983,7 @@ Init_IO(void) rb_define_method(rb_cARGF, "binmode", argf_binmode_m, 0); rb_define_method(rb_cARGF, "binmode?", argf_binmode_p, 0); - rb_define_method(rb_cARGF, "write", argf_write, 1); + rb_define_method(rb_cARGF, "write", argf_write, -1); rb_define_method(rb_cARGF, "print", rb_io_print, -1); rb_define_method(rb_cARGF, "putc", rb_io_putc, 1); rb_define_method(rb_cARGF, "puts", rb_io_puts, -1); @@ -15621,7 +16019,7 @@ Init_IO(void) rb_define_hooked_variable("$.", &argf, argf_lineno_getter, argf_lineno_setter); rb_define_hooked_variable("$FILENAME", &argf, argf_filename_getter, rb_gvar_readonly_setter); - ARGF.filename = rb_str_new2("-"); + ARGF_SET(filename, rb_str_new2("-")); rb_define_hooked_variable("$-i", &argf, opt_i_get, opt_i_set); rb_gvar_ractor_local("$-i"); |
