summaryrefslogtreecommitdiff
path: root/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'io.c')
-rw-r--r--io.c1508
1 files changed, 953 insertions, 555 deletions
diff --git a/io.c b/io.c
index ca2cb904ee..effcb349c3 100644
--- a/io.c
+++ b/io.c
@@ -104,6 +104,16 @@
#ifdef HAVE_COPYFILE_H
# include <copyfile.h>
+
+# ifndef COPYFILE_STATE_COPIED
+/*
+ * Some OSes (e.g., OSX < 10.6) implement fcopyfile() but not
+ * COPYFILE_STATE_COPIED. Since the only use of the former here
+ * requires the latter, we disable the former when the latter is undefined.
+ */
+# undef HAVE_FCOPYFILE
+# endif
+
#endif
#include "ruby/internal/stdbool.h"
@@ -112,6 +122,7 @@
#include "encindex.h"
#include "id.h"
#include "internal.h"
+#include "internal/class.h"
#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/inits.h"
@@ -209,7 +220,18 @@ static VALUE sym_DATA;
static VALUE sym_HOLE;
#endif
-static VALUE prep_io(int fd, int fmode, VALUE klass, const char *path);
+static VALUE prep_io(int fd, enum rb_io_mode fmode, VALUE klass, const char *path);
+
+VALUE
+rb_io_blocking_region_wait(struct rb_io *io, rb_blocking_function_t *function, void *argument, enum rb_io_event events)
+{
+ return rb_thread_io_blocking_call(io, function, argument, events);
+}
+
+VALUE rb_io_blocking_region(struct rb_io *io, rb_blocking_function_t *function, void *argument)
+{
+ return rb_io_blocking_region_wait(io, function, argument, 0);
+}
struct argf {
VALUE filename, current_file;
@@ -478,6 +500,7 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd)
#define argf_of(obj) (*(struct argf *)DATA_PTR(obj))
#define ARGF argf_of(argf)
+#define ARGF_SET(field, value) RB_OBJ_WRITE(argf, &ARGF.field, value)
#define GetWriteIO(io) rb_io_get_write_io(io)
@@ -519,7 +542,8 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd)
#endif
static int io_fflush(rb_io_t *);
-static rb_io_t *flush_before_seek(rb_io_t *fptr);
+static rb_io_t *flush_before_seek(rb_io_t *fptr, bool discard_rbuf);
+static void clear_codeconv(rb_io_t *fptr);
#define FMODE_SIGNAL_ON_EPIPE (1<<17)
@@ -533,10 +557,12 @@ static rb_io_t *flush_before_seek(rb_io_t *fptr);
extern ID ruby_static_id_signo;
-NORETURN(static void raise_on_write(rb_io_t *fptr, int e, VALUE errinfo));
+NORETURN(static void rb_sys_fail_on_write(rb_io_t *fptr));
static void
-raise_on_write(rb_io_t *fptr, int e, VALUE errinfo)
+rb_sys_fail_on_write(rb_io_t *fptr)
{
+ int e = errno;
+ VALUE errinfo = rb_syserr_new_path(e, (fptr)->pathv);
#if defined EPIPE
if (fptr_signal_on_epipe(fptr) && (e == EPIPE)) {
const VALUE sig =
@@ -550,12 +576,6 @@ raise_on_write(rb_io_t *fptr, int e, VALUE errinfo)
rb_exc_raise(errinfo);
}
-#define rb_sys_fail_on_write(fptr) \
- do { \
- int e = errno; \
- raise_on_write(fptr, e, rb_syserr_new_path(e, (fptr)->pathv)); \
- } while (0)
-
#define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE)
#define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) ((fptr)->mode & FMODE_TEXTMODE)
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
@@ -608,7 +628,7 @@ raise_on_write(rb_io_t *fptr, int e, VALUE errinfo)
* IO unread with taking care of removed '\r' in text mode.
*/
static void
-io_unread(rb_io_t *fptr)
+io_unread(rb_io_t *fptr, bool discard_rbuf)
{
rb_off_t r, pos;
ssize_t read_size;
@@ -629,19 +649,17 @@ io_unread(rb_io_t *fptr)
if (r < 0 && errno) {
if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX;
- return;
+ if (!discard_rbuf) return;
}
- fptr->rbuf.off = 0;
- fptr->rbuf.len = 0;
- return;
+ goto end;
}
pos = lseek(fptr->fd, 0, SEEK_CUR);
if (pos < 0 && errno) {
if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX;
- return;
+ if (!discard_rbuf) goto end;
}
/* add extra offset for removed '\r' in rbuf */
@@ -682,8 +700,10 @@ io_unread(rb_io_t *fptr)
}
}
free(buf);
+ end:
fptr->rbuf.off = 0;
fptr->rbuf.len = 0;
+ clear_codeconv(fptr);
return;
}
@@ -702,7 +722,7 @@ set_binary_mode_with_seek_cur(rb_io_t *fptr)
if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) {
return setmode(fptr->fd, O_BINARY);
}
- flush_before_seek(fptr);
+ flush_before_seek(fptr, false);
return setmode(fptr->fd, O_BINARY);
}
#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) set_binary_mode_with_seek_cur(fptr)
@@ -898,7 +918,7 @@ rb_io_s_try_convert(VALUE dummy, VALUE io)
#if !RUBY_CRLF_ENVIRONMENT
static void
-io_unread(rb_io_t *fptr)
+io_unread(rb_io_t *fptr, bool discard_rbuf)
{
rb_off_t r;
rb_io_check_closed(fptr);
@@ -910,10 +930,11 @@ io_unread(rb_io_t *fptr)
if (r < 0 && errno) {
if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX;
- return;
+ if (!discard_rbuf) return;
}
fptr->rbuf.off = 0;
fptr->rbuf.len = 0;
+ clear_codeconv(fptr);
return;
}
#endif
@@ -954,17 +975,17 @@ io_ungetbyte(VALUE str, rb_io_t *fptr)
}
static rb_io_t *
-flush_before_seek(rb_io_t *fptr)
+flush_before_seek(rb_io_t *fptr, bool discard_rbuf)
{
if (io_fflush(fptr) < 0)
rb_sys_fail_on_write(fptr);
- io_unread(fptr);
+ io_unread(fptr, discard_rbuf);
errno = 0;
return fptr;
}
-#define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr)->fd, (ofs), (whence)))
-#define io_tell(fptr) lseek(flush_before_seek(fptr)->fd, 0, SEEK_CUR)
+#define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr, true)->fd, (ofs), (whence)))
+#define io_tell(fptr) lseek(flush_before_seek(fptr, false)->fd, 0, SEEK_CUR)
#ifndef SEEK_CUR
# define SEEK_SET 0
@@ -1032,7 +1053,7 @@ rb_io_check_writable(rb_io_t *fptr)
rb_raise(rb_eIOError, "not opened for writing");
}
if (fptr->rbuf.len) {
- io_unread(fptr);
+ io_unread(fptr, true);
}
}
@@ -1090,7 +1111,7 @@ ruby_dup(int orig)
static VALUE
io_alloc(VALUE klass)
{
- NEWOBJ_OF(io, struct RFile, klass, T_FILE, sizeof(struct RFile), 0);
+ UNPROTECTED_NEWOBJ_OF(io, struct RFile, klass, T_FILE, sizeof(struct RFile));
io->fptr = 0;
@@ -1146,6 +1167,11 @@ static int nogvl_wait_for(VALUE th, rb_io_t *fptr, short events, struct timeval
static inline int
io_internal_wait(VALUE thread, rb_io_t *fptr, int error, int events, struct timeval *timeout)
{
+ if (!timeout && rb_thread_mn_schedulable(thread)) {
+ RUBY_ASSERT(errno == EWOULDBLOCK || errno == EAGAIN);
+ return -1;
+ }
+
int ready = nogvl_wait_for(thread, fptr, events, timeout);
if (ready > 0) {
@@ -1156,8 +1182,15 @@ io_internal_wait(VALUE thread, rb_io_t *fptr, int error, int events, struct time
return -1;
}
- errno = error;
- return -1;
+ // If there was an error BEFORE we started waiting, return it:
+ if (error) {
+ errno = error;
+ return -1;
+ }
+ else {
+ // Otherwise, whatever error was generated by `nogvl_wait_for` is the one we want:
+ return ready;
+ }
}
static VALUE
@@ -1259,7 +1292,8 @@ internal_writev_func(void *ptr)
static ssize_t
rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count)
{
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
if (scheduler != Qnil) {
VALUE result = rb_fiber_scheduler_io_read_memory(scheduler, fptr->self, buf, count, 0);
@@ -1269,7 +1303,7 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count)
}
struct io_internal_read_struct iis = {
- .th = rb_thread_current(),
+ .th = th->self,
.fptr = fptr,
.nonblock = 0,
.fd = fptr->fd,
@@ -1286,13 +1320,14 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count)
iis.timeout = &timeout_storage;
}
- return (ssize_t)rb_thread_io_blocking_call(internal_read_func, &iis, fptr->fd, RB_WAITFD_IN);
+ return (ssize_t)rb_io_blocking_region_wait(fptr, internal_read_func, &iis, RUBY_IO_READABLE);
}
static ssize_t
rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count)
{
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
if (scheduler != Qnil) {
VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, buf, count, 0);
@@ -1302,7 +1337,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count)
}
struct io_internal_write_struct iis = {
- .th = rb_thread_current(),
+ .th = th->self,
.fptr = fptr,
.nonblock = 0,
.fd = fptr->fd,
@@ -1319,7 +1354,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count)
iis.timeout = &timeout_storage;
}
- return (ssize_t)rb_thread_io_blocking_call(internal_write_func, &iis, fptr->fd, RB_WAITFD_OUT);
+ return (ssize_t)rb_io_blocking_region_wait(fptr, internal_write_func, &iis, RUBY_IO_WRITABLE);
}
#ifdef HAVE_WRITEV
@@ -1328,7 +1363,9 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt)
{
if (!iovcnt) return 0;
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
if (scheduler != Qnil) {
// This path assumes at least one `iov`:
VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, iov[0].iov_base, iov[0].iov_len, 0);
@@ -1339,7 +1376,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt)
}
struct io_internal_writev_struct iis = {
- .th = rb_thread_current(),
+ .th = th->self,
.fptr = fptr,
.nonblock = 0,
.fd = fptr->fd,
@@ -1356,7 +1393,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt)
iis.timeout = &timeout_storage;
}
- return (ssize_t)rb_thread_io_blocking_call(internal_writev_func, &iis, fptr->fd, RB_WAITFD_OUT);
+ return (ssize_t)rb_io_blocking_region_wait(fptr, internal_writev_func, &iis, RUBY_IO_WRITABLE);
}
#endif
@@ -1382,11 +1419,35 @@ io_flush_buffer_sync(void *arg)
return (VALUE)-1;
}
+static inline VALUE
+io_flush_buffer_fiber_scheduler(VALUE scheduler, rb_io_t *fptr)
+{
+ VALUE ret = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, fptr->wbuf.ptr+fptr->wbuf.off, fptr->wbuf.len, 0);
+ if (!UNDEF_P(ret)) {
+ ssize_t result = rb_fiber_scheduler_io_result_apply(ret);
+ if (result > 0) {
+ fptr->wbuf.off += result;
+ fptr->wbuf.len -= result;
+ }
+ return result >= 0 ? (VALUE)0 : (VALUE)-1;
+ }
+ return ret;
+}
+
static VALUE
io_flush_buffer_async(VALUE arg)
{
rb_io_t *fptr = (rb_io_t *)arg;
- return rb_thread_io_blocking_call(io_flush_buffer_sync, fptr, fptr->fd, RB_WAITFD_OUT);
+
+ VALUE scheduler = rb_fiber_scheduler_current();
+ if (scheduler != Qnil) {
+ VALUE result = io_flush_buffer_fiber_scheduler(scheduler, fptr);
+ if (!UNDEF_P(result)) {
+ return result;
+ }
+ }
+
+ return rb_io_blocking_region_wait(fptr, io_flush_buffer_sync, fptr, RUBY_IO_WRITABLE);
}
static inline int
@@ -1421,7 +1482,8 @@ io_fflush(rb_io_t *fptr)
VALUE
rb_io_wait(VALUE io, VALUE events, VALUE timeout)
{
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
if (scheduler != Qnil) {
return rb_fiber_scheduler_io_wait(scheduler, io, events, timeout);
@@ -1442,7 +1504,7 @@ rb_io_wait(VALUE io, VALUE events, VALUE timeout)
tv = &tv_storage;
}
- int ready = rb_thread_wait_for_single_fd(fptr->fd, RB_NUM2INT(events), tv);
+ int ready = rb_thread_io_wait(th, fptr, RB_NUM2INT(events), tv);
if (ready < 0) {
rb_sys_fail(0);
@@ -1466,17 +1528,15 @@ io_from_fd(int fd)
}
static int
-io_wait_for_single_fd(int fd, int events, struct timeval *timeout)
+io_wait_for_single_fd(int fd, int events, struct timeval *timeout, rb_thread_t *th, VALUE scheduler)
{
- VALUE scheduler = rb_fiber_scheduler_current();
-
if (scheduler != Qnil) {
return RTEST(
rb_fiber_scheduler_io_wait(scheduler, io_from_fd(fd), RB_INT2NUM(events), rb_fiber_scheduler_make_timeout(timeout))
);
}
- return rb_thread_wait_for_single_fd(fd, events, timeout);
+ return rb_thread_wait_for_single_fd(th, fd, events, timeout);
}
int
@@ -1484,7 +1544,8 @@ rb_io_wait_readable(int f)
{
io_fd_check_closed(f);
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
switch (errno) {
case EINTR:
@@ -1504,7 +1565,7 @@ rb_io_wait_readable(int f)
);
}
else {
- io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL);
+ io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL, th, scheduler);
}
return TRUE;
@@ -1518,7 +1579,8 @@ rb_io_wait_writable(int f)
{
io_fd_check_closed(f);
- VALUE scheduler = rb_fiber_scheduler_current();
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
switch (errno) {
case EINTR:
@@ -1547,7 +1609,7 @@ rb_io_wait_writable(int f)
);
}
else {
- io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL);
+ io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL, th, scheduler);
}
return TRUE;
@@ -1559,7 +1621,9 @@ rb_io_wait_writable(int f)
int
rb_wait_for_single_fd(int fd, int events, struct timeval *timeout)
{
- return io_wait_for_single_fd(fd, events, timeout);
+ rb_thread_t *th = GET_THREAD();
+ VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th);
+ return io_wait_for_single_fd(fd, events, timeout, th, scheduler);
}
int
@@ -1611,7 +1675,7 @@ rb_io_maybe_wait(int error, VALUE io, VALUE events, VALUE timeout)
default:
// Non-specific error, no event is ready:
- return Qfalse;
+ return Qnil;
}
}
@@ -1623,9 +1687,11 @@ rb_io_maybe_wait_readable(int error, VALUE io, VALUE timeout)
if (RTEST(result)) {
return RB_NUM2INT(result);
}
- else {
- return 0;
+ else if (result == RUBY_Qfalse) {
+ rb_raise(rb_eIOTimeoutError, "Timed out waiting for IO to become readable!");
}
+
+ return 0;
}
int
@@ -1636,9 +1702,11 @@ rb_io_maybe_wait_writable(int error, VALUE io, VALUE timeout)
if (RTEST(result)) {
return RB_NUM2INT(result);
}
- else {
- return 0;
+ else if (result == RUBY_Qfalse) {
+ rb_raise(rb_eIOTimeoutError, "Timed out waiting for IO to become writable!");
}
+
+ return 0;
}
static void
@@ -1699,7 +1767,6 @@ make_writeconv(rb_io_t *fptr)
/* writing functions */
struct binwrite_arg {
rb_io_t *fptr;
- VALUE str;
const char *ptr;
long length;
};
@@ -1849,7 +1916,7 @@ io_binwrite_requires_flush_write(rb_io_t *fptr, long len, int nosync)
}
static long
-io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync)
+io_binwrite(const char *ptr, long len, rb_io_t *fptr, int nosync)
{
if (len <= 0) return len;
@@ -1862,7 +1929,6 @@ io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync)
struct binwrite_arg arg;
arg.fptr = fptr;
- arg.str = str;
arg.ptr = ptr;
arg.length = len;
@@ -1970,9 +2036,9 @@ io_fwrite(VALUE str, rb_io_t *fptr, int nosync)
if (converted)
OBJ_FREEZE(str);
- tmp = rb_str_tmp_frozen_acquire(str);
+ tmp = rb_str_tmp_frozen_no_embed_acquire(str);
RSTRING_GETMEM(tmp, ptr, len);
- n = io_binwrite(tmp, ptr, len, fptr, nosync);
+ n = io_binwrite(ptr, len, fptr, nosync);
rb_str_tmp_frozen_release(str, tmp);
return n;
@@ -1985,7 +2051,7 @@ rb_io_bufwrite(VALUE io, const void *buf, size_t size)
GetOpenFile(io, fptr);
rb_io_check_writable(fptr);
- return (ssize_t)io_binwrite(0, buf, (long)size, fptr, 0);
+ return (ssize_t)io_binwrite(buf, (long)size, fptr, 0);
}
static VALUE
@@ -2277,7 +2343,7 @@ rb_io_writev(VALUE io, int argc, const VALUE *argv)
if (argc > 1 && rb_obj_method_arity(io, id_write) == 1) {
if (io != rb_ractor_stderr() && RTEST(ruby_verbose)) {
VALUE klass = CLASS_OF(io);
- char sep = FL_TEST(klass, FL_SINGLETON) ? (klass = io, '.') : '#';
+ char sep = RCLASS_SINGLETON_P(klass) ? (klass = io, '.') : '#';
rb_category_warning(
RB_WARN_CATEGORY_DEPRECATED, "%+"PRIsVALUE"%c""write is outdated interface"
" which accepts just one argument",
@@ -2351,7 +2417,7 @@ rb_io_flush_raw(VALUE io, int sync)
rb_sys_fail_on_write(fptr);
}
if (fptr->mode & FMODE_READABLE) {
- io_unread(fptr);
+ io_unread(fptr, true);
}
return io;
@@ -2470,7 +2536,7 @@ interpret_seek_whence(VALUE vwhence)
* f.tell # => 12
* f.close
*
- * - +:SET+ or <tt>IO:SEEK_SET</tt>:
+ * - +:SET+ or <tt>IO::SEEK_SET</tt>:
* Repositions the stream to the given +offset+:
*
* f = File.open('t.txt')
@@ -2594,9 +2660,6 @@ io_fillbuf(rb_io_t *fptr)
fptr->rbuf.len = 0;
fptr->rbuf.capa = IO_RBUF_CAPA_FOR(fptr);
fptr->rbuf.ptr = ALLOC_N(char, fptr->rbuf.capa);
-#ifdef _WIN32
- fptr->rbuf.capa--;
-#endif
}
if (fptr->rbuf.len == 0) {
retry:
@@ -2671,7 +2734,7 @@ rb_io_eof(VALUE io)
READ_CHECK(fptr);
#if RUBY_CRLF_ENVIRONMENT
if (!NEED_READCONV(fptr) && NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {
- return RBOOL(eof(fptr->fd));;
+ return RBOOL(eof(fptr->fd));
}
#endif
return RBOOL(io_fillbuf(fptr) < 0);
@@ -2774,8 +2837,10 @@ rb_io_fsync(VALUE io)
if (io_fflush(fptr) < 0)
rb_sys_fail_on_write(fptr);
- if ((int)rb_thread_io_blocking_region(nogvl_fsync, fptr, fptr->fd) < 0)
+
+ if ((int)rb_io_blocking_region(fptr, nogvl_fsync, fptr))
rb_sys_fail_path(fptr->pathv);
+
return INT2FIX(0);
}
#else
@@ -2824,7 +2889,7 @@ rb_io_fdatasync(VALUE io)
if (io_fflush(fptr) < 0)
rb_sys_fail_on_write(fptr);
- if ((int)rb_thread_io_blocking_region(nogvl_fdatasync, fptr, fptr->fd) == 0)
+ if ((int)rb_io_blocking_region(fptr, nogvl_fdatasync, fptr) == 0)
return INT2FIX(0);
/* fall back */
@@ -3120,8 +3185,6 @@ io_enc_str(VALUE str, rb_io_t *fptr)
return str;
}
-static rb_encoding *io_read_encoding(rb_io_t *fptr);
-
static void
make_readconv(rb_io_t *fptr, int size)
{
@@ -3264,10 +3327,6 @@ io_shift_cbuf(rb_io_t *fptr, int len, VALUE *strp)
static int
io_setstrbuf(VALUE *str, long len)
{
-#ifdef _WIN32
- if (len > 0)
- len = (len + 1) & ~1L; /* round up for wide char */
-#endif
if (NIL_P(*str)) {
*str = rb_str_new(0, len);
return TRUE;
@@ -3409,10 +3468,10 @@ io_read_memory_call(VALUE arg)
}
if (iis->nonblock) {
- return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, 0);
+ return rb_io_blocking_region(iis->fptr, internal_read_func, iis);
}
else {
- return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, RB_WAITFD_IN);
+ return rb_io_blocking_region_wait(iis->fptr, internal_read_func, iis, RUBY_IO_READABLE);
}
}
@@ -3820,8 +3879,33 @@ rscheck(const char *rsptr, long rslen, VALUE rs)
rb_raise(rb_eRuntimeError, "rs modified");
}
+static const char *
+search_delim(const char *p, long len, int delim, rb_encoding *enc)
+{
+ if (rb_enc_mbminlen(enc) == 1) {
+ p = memchr(p, delim, len);
+ if (p) return p + 1;
+ }
+ else {
+ const char *end = p + len;
+ while (p < end) {
+ int r = rb_enc_precise_mbclen(p, end, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) {
+ p += rb_enc_mbminlen(enc);
+ continue;
+ }
+ int n = MBCLEN_CHARFOUND_LEN(r);
+ if (rb_enc_mbc_to_codepoint(p, end, enc) == (unsigned int)delim) {
+ return p + n;
+ }
+ p += n;
+ }
+ }
+ return NULL;
+}
+
static int
-appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, rb_encoding *enc)
{
VALUE str = *strp;
long limit = *lp;
@@ -3836,9 +3920,9 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
p = READ_CHAR_PENDING_PTR(fptr);
if (0 < limit && limit < searchlen)
searchlen = (int)limit;
- e = memchr(p, delim, searchlen);
+ e = search_delim(p, searchlen, delim, enc);
if (e) {
- int len = (int)(e-p+1);
+ int len = (int)(e-p);
if (NIL_P(str))
*strp = str = rb_str_new(p, len);
else
@@ -3878,8 +3962,8 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
long last;
if (limit > 0 && pending > limit) pending = limit;
- e = memchr(p, delim, pending);
- if (e) pending = e - p + 1;
+ e = search_delim(p, pending, delim, enc);
+ if (e) pending = e - p;
if (!NIL_P(str)) {
last = RSTRING_LEN(str);
rb_str_resize(str, last + pending);
@@ -4134,21 +4218,31 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr)
rs = 0;
if (!rb_enc_asciicompat(enc)) {
rs = rb_usascii_str_new(rsptr, rslen);
- rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
+ rs = rb_str_conv_enc(rs, 0, enc);
OBJ_FREEZE(rs);
rsptr = RSTRING_PTR(rs);
rslen = RSTRING_LEN(rs);
}
+ newline = '\n';
+ }
+ else if (rb_enc_mbminlen(enc) == 1) {
+ rsptr = RSTRING_PTR(rs);
+ newline = (unsigned char)rsptr[rslen - 1];
}
else {
+ rs = rb_str_conv_enc(rs, 0, enc);
rsptr = RSTRING_PTR(rs);
+ const char *e = rsptr + rslen;
+ const char *last = rb_enc_prev_char(rsptr, e, e, enc);
+ int n;
+ newline = rb_enc_codepoint_len(last, e, &n, enc);
+ if (last + n != e) rb_raise(rb_eArgError, "broken separator");
}
- newline = (unsigned char)rsptr[rslen - 1];
- chomp_cr = chomp && rslen == 1 && newline == '\n';
+ chomp_cr = chomp && newline == '\n' && rslen == rb_enc_mbminlen(enc);
}
/* MS - Optimization */
- while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
+ while ((c = appendline(fptr, newline, &str, &limit, enc)) != EOF) {
const char *s, *p, *pp, *e;
if (c == newline) {
@@ -4169,8 +4263,8 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr)
if (limit == 0) {
s = RSTRING_PTR(str);
p = RSTRING_END(str);
- pp = rb_enc_left_char_head(s, p-1, p, enc);
- if (extra_limit &&
+ pp = rb_enc_prev_char(s, p, p, enc);
+ if (extra_limit && pp &&
MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(pp, p, enc))) {
/* relax the limit while incomplete character.
* extra_limit limits the relax length */
@@ -4236,11 +4330,17 @@ rb_io_gets(VALUE io)
}
VALUE
-rb_io_gets_internal(VALUE io)
+rb_io_gets_limit_internal(VALUE io, long limit)
{
rb_io_t *fptr;
GetOpenFile(io, fptr);
- return rb_io_getline_0(rb_default_rs, -1, FALSE, fptr);
+ return rb_io_getline_0(rb_default_rs, limit, FALSE, fptr);
+}
+
+VALUE
+rb_io_gets_internal(VALUE io)
+{
+ return rb_io_gets_limit_internal(io, -1);
}
/*
@@ -4297,11 +4397,8 @@ rb_io_gets_internal(VALUE io)
* File.open('t.txt') {|f| f.gets(12) } # => "First line\n"
*
* With arguments +sep+ and +limit+ given,
- * combines the two behaviors:
- *
- * - Returns the next line as determined by line separator +sep+,
- * or +nil+ if none.
- * - But returns no more bytes than are allowed by the limit.
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword argument +chomp+ specifies whether line separators
* are to be omitted:
@@ -4372,23 +4469,31 @@ rb_io_set_lineno(VALUE io, VALUE lineno)
static VALUE
io_readline(rb_execution_context_t *ec, VALUE io, VALUE sep, VALUE lim, VALUE chomp)
{
+ long limit = -1;
if (NIL_P(lim)) {
+ VALUE tmp = Qnil;
// If sep is specified, but it's not a string and not nil, then assume
// it's the limit (it should be an integer)
- if (!NIL_P(sep) && NIL_P(rb_check_string_type(sep))) {
+ if (!NIL_P(sep) && NIL_P(tmp = rb_check_string_type(sep))) {
// If the user has specified a non-nil / non-string value
// for the separator, we assume it's the limit and set the
// separator to default: rb_rs.
lim = sep;
+ limit = NUM2LONG(lim);
sep = rb_rs;
}
+ else {
+ sep = tmp;
+ }
}
-
- if (!NIL_P(sep)) {
- StringValue(sep);
+ else {
+ if (!NIL_P(sep)) StringValue(sep);
+ limit = NUM2LONG(lim);
}
- VALUE line = rb_io_getline_1(sep, NIL_P(lim) ? -1L : NUM2LONG(lim), RTEST(chomp), io);
+ check_getline_args(&sep, &limit, io);
+
+ VALUE line = rb_io_getline_1(sep, limit, RTEST(chomp), io);
rb_lastline_set_up(line, 1);
if (NIL_P(line)) {
@@ -4450,10 +4555,8 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io);
* f.close
*
* With arguments +sep+ and +limit+ given,
- * combines the two behaviors:
- *
- * - Returns lines as determined by line separator +sep+.
- * - But returns no more bytes in a line than are allowed by the limit.
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword argument +chomp+ specifies whether line separators
* are to be omitted:
@@ -4573,10 +4676,8 @@ io_readlines(const struct getline_arg *arg, VALUE io)
* "ne\n"
*
* With arguments +sep+ and +limit+ given,
- * combines the two behaviors:
- *
- * - Calls with the next line as determined by line separator +sep+.
- * - But returns no more bytes than are allowed by the limit.
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword argument +chomp+ specifies whether line separators
* are to be omitted:
@@ -4620,10 +4721,11 @@ rb_io_each_line(int argc, VALUE *argv, VALUE io)
* Calls the given block with each byte (0..255) in the stream; returns +self+.
* See {Byte IO}[rdoc-ref:IO@Byte+IO].
*
- * f = File.new('t.rus')
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.new('t.ja')
* a = []
* f.each_byte {|b| a << b }
- * a # => [209, 130, 208, 181, 209, 129, 209, 130]
+ * a # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]
* f.close
*
* Returns an Enumerator if no block is given.
@@ -4768,10 +4870,11 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
* Calls the given block with each character in the stream; returns +self+.
* See {Character IO}[rdoc-ref:IO@Character+IO].
*
- * f = File.new('t.rus')
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.new('t.ja')
* a = []
* f.each_char {|c| a << c.ord }
- * a # => [1090, 1077, 1089, 1090]
+ * a # => [12371, 12435, 12395, 12385, 12399]
* f.close
*
* Returns an Enumerator if no block is given.
@@ -4806,10 +4909,11 @@ rb_io_each_char(VALUE io)
*
* Calls the given block with each codepoint in the stream; returns +self+:
*
- * f = File.new('t.rus')
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.new('t.ja')
* a = []
* f.each_codepoint {|c| a << c }
- * a # => [1090, 1077, 1089, 1090]
+ * a # => [12371, 12435, 12395, 12385, 12399]
* f.close
*
* Returns an Enumerator if no block is given.
@@ -4831,6 +4935,7 @@ rb_io_each_codepoint(VALUE io)
rb_io_check_char_readable(fptr);
READ_CHECK(fptr);
+ enc = io_read_encoding(fptr);
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
r = 1; /* no invalid char yet */
@@ -4838,12 +4943,9 @@ rb_io_each_codepoint(VALUE io)
make_readconv(fptr, 0);
for (;;) {
if (fptr->cbuf.len) {
- if (fptr->encs.enc)
- r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off,
- fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
- fptr->encs.enc);
- else
- r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
+ r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off,
+ fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
+ enc);
if (!MBCLEN_NEEDMORE_P(r))
break;
if (fptr->cbuf.len == fptr->cbuf.capa) {
@@ -4853,33 +4955,25 @@ rb_io_each_codepoint(VALUE io)
if (more_char(fptr) == MORE_CHAR_FINISHED) {
clear_readconv(fptr);
if (!MBCLEN_CHARFOUND_P(r)) {
- enc = fptr->encs.enc;
goto invalid;
}
return io;
}
}
if (MBCLEN_INVALID_P(r)) {
- enc = fptr->encs.enc;
goto invalid;
}
n = MBCLEN_CHARFOUND_LEN(r);
- if (fptr->encs.enc) {
- c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off,
- fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
- fptr->encs.enc);
- }
- else {
- c = (unsigned char)fptr->cbuf.ptr[fptr->cbuf.off];
- }
+ c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off,
+ fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len,
+ enc);
fptr->cbuf.off += n;
fptr->cbuf.len -= n;
rb_yield(UINT2NUM(c));
- rb_io_check_byte_readable(fptr);
+ rb_io_check_char_readable(fptr);
}
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
- enc = io_input_encoding(fptr);
while (io_fillbuf(fptr) >= 0) {
r = rb_enc_precise_mbclen(fptr->rbuf.ptr+fptr->rbuf.off,
fptr->rbuf.ptr+fptr->rbuf.off+fptr->rbuf.len, enc);
@@ -4933,8 +5027,9 @@ rb_io_each_codepoint(VALUE io)
* f = File.open('t.txt')
* f.getc # => "F"
* f.close
- * f = File.open('t.rus')
- * f.getc.ord # => 1090
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.open('t.ja')
+ * f.getc.ord # => 12371
* f.close
*
* Related: IO#readchar (may raise EOFError).
@@ -4966,8 +5061,9 @@ rb_io_getc(VALUE io)
* f = File.open('t.txt')
* f.readchar # => "F"
* f.close
- * f = File.open('t.rus')
- * f.readchar.ord # => 1090
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.open('t.ja')
+ * f.readchar.ord # => 12371
* f.close
*
* Related: IO#getc (will not raise EOFError).
@@ -4996,8 +5092,9 @@ rb_io_readchar(VALUE io)
* f = File.open('t.txt')
* f.getbyte # => 70
* f.close
- * f = File.open('t.rus')
- * f.getbyte # => 209
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.open('t.ja')
+ * f.getbyte # => 227
* f.close
*
* Related: IO#readbyte (may raise EOFError).
@@ -5040,8 +5137,9 @@ rb_io_getbyte(VALUE io)
* f = File.open('t.txt')
* f.readbyte # => 70
* f.close
- * f = File.open('t.rus')
- * f.readbyte # => 209
+ * File.read('t.ja') # => "こんにちは"
+ * f = File.open('t.ja')
+ * f.readbyte # => 227
* f.close
*
* Related: IO#getbyte (will not raise EOFError).
@@ -5114,7 +5212,7 @@ rb_io_ungetbyte(VALUE io, VALUE b)
b = rb_str_new((const char *)&c, 1);
break;
default:
- SafeStringValue(b);
+ StringValue(b);
}
io_ungetbyte(b, fptr);
return Qnil;
@@ -5176,7 +5274,7 @@ rb_io_ungetc(VALUE io, VALUE c)
c = rb_enc_uint_chr(NUM2UINT(c), io_read_encoding(fptr));
}
else {
- SafeStringValue(c);
+ StringValue(c);
}
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
@@ -5422,7 +5520,7 @@ maygvl_close(int fd, int keepgvl)
* close() may block for certain file types (NFS, SO_LINGER sockets,
* inotify), so let other threads run.
*/
- return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_close, &fd, RUBY_UBF_IO, 0);
+ return IO_WITHOUT_GVL_INT(nogvl_close, &fd);
}
static void*
@@ -5439,15 +5537,13 @@ maygvl_fclose(FILE *file, int keepgvl)
if (keepgvl)
return fclose(file);
- return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_fclose, file, RUBY_UBF_IO, 0);
+ return IO_WITHOUT_GVL_INT(nogvl_fclose, file);
}
static void free_io_buffer(rb_io_buffer_t *buf);
-static void clear_codeconv(rb_io_t *fptr);
static void
-fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl,
- struct rb_io_close_wait_list *busy)
+fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl)
{
VALUE error = Qnil;
int fd = fptr->fd;
@@ -5487,20 +5583,8 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl,
fptr->stdio_file = 0;
fptr->mode &= ~(FMODE_READABLE|FMODE_WRITABLE);
- // Ensure waiting_fd users do not hit EBADF.
- if (busy) {
- // Wait for them to exit before we call close().
- rb_notify_fd_close_wait(busy);
- }
-
- // Disable for now.
- // if (!done && fd >= 0) {
- // VALUE scheduler = rb_fiber_scheduler_current();
- // if (scheduler != Qnil) {
- // VALUE result = rb_fiber_scheduler_io_close(scheduler, fptr->self);
- // if (!UNDEF_P(result)) done = 1;
- // }
- // }
+ // Wait for blocking operations to ensure they do not hit EBADF:
+ rb_thread_io_close_wait(fptr);
if (!done && stdio_file) {
// stdio_file is deallocated anyway even if fclose failed.
@@ -5513,6 +5597,15 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl,
done = 1;
}
+ VALUE scheduler = rb_fiber_scheduler_current();
+ if (!done && fd >= 0 && scheduler != Qnil) {
+ VALUE result = rb_fiber_scheduler_io_close(scheduler, RB_INT2NUM(fd));
+
+ if (!UNDEF_P(result)) {
+ done = RTEST(result);
+ }
+ }
+
if (!done && fd >= 0) {
// fptr->fd may be closed even if close fails. POSIX doesn't specify it.
// We assumes it is closed.
@@ -5539,7 +5632,7 @@ fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl,
static void
fptr_finalize(rb_io_t *fptr, int noraise)
{
- fptr_finalize_flush(fptr, noraise, FALSE, 0);
+ fptr_finalize_flush(fptr, noraise, FALSE);
free_io_buffer(&fptr->rbuf);
free_io_buffer(&fptr->wbuf);
clear_codeconv(fptr);
@@ -5560,7 +5653,7 @@ static void
free_io_buffer(rb_io_buffer_t *buf)
{
if (buf->ptr) {
- ruby_sized_xfree(buf->ptr, (size_t)buf->capa);
+ ruby_xfree_sized(buf->ptr, (size_t)buf->capa);
buf->ptr = NULL;
}
}
@@ -5604,37 +5697,45 @@ rb_io_fptr_cleanup_all(rb_io_t *fptr)
clear_codeconv(fptr);
}
-void
-rb_io_fptr_finalize_internal(void *ptr)
+int
+rb_io_fptr_finalize(struct rb_io *io)
{
- if (!ptr) return;
- rb_io_fptr_cleanup_all(ptr);
- free(ptr);
+ if (!io) return 0;
+ rb_io_fptr_cleanup_all(io);
+ free(io);
+
+ return 1;
}
-#undef rb_io_fptr_finalize
-int
-rb_io_fptr_finalize(rb_io_t *fptr)
+bool
+rb_io_fptr_finalize_closed(struct rb_io *io)
{
- if (!fptr) {
- return 0;
- }
- else {
- rb_io_fptr_finalize_internal(fptr);
- return 1;
- }
+ if (!io) return true;
+ if (io->fd >= 0) return false;
+ rb_io_fptr_finalize(io);
+ return true;
}
-#define rb_io_fptr_finalize(fptr) rb_io_fptr_finalize_internal(fptr)
-RUBY_FUNC_EXPORTED size_t
-rb_io_memsize(const rb_io_t *fptr)
+size_t
+rb_io_memsize(const rb_io_t *io)
{
size_t size = sizeof(rb_io_t);
- size += fptr->rbuf.capa;
- size += fptr->wbuf.capa;
- size += fptr->cbuf.capa;
- if (fptr->readconv) size += rb_econv_memsize(fptr->readconv);
- if (fptr->writeconv) size += rb_econv_memsize(fptr->writeconv);
+ size += io->rbuf.capa;
+ size += io->wbuf.capa;
+ size += io->cbuf.capa;
+ if (io->readconv) size += rb_econv_memsize(io->readconv);
+ if (io->writeconv) size += rb_econv_memsize(io->writeconv);
+
+ struct rb_io_blocking_operation *blocking_operation = 0;
+
+ // Validate the fork generation of the IO object. If the IO object fork generation is different, the list of blocking operations is not valid memory. See `rb_io_blocking_operations` for the exact semantics.
+ rb_serial_t fork_generation = GET_VM()->fork_gen;
+ if (io->fork_generation == fork_generation) {
+ ccan_list_for_each(&io->blocking_operations, blocking_operation, list) {
+ size += sizeof(struct rb_io_blocking_operation);
+ }
+ }
+
return size;
}
@@ -5651,7 +5752,6 @@ io_close_fptr(VALUE io)
rb_io_t *fptr;
VALUE write_io;
rb_io_t *write_fptr;
- struct rb_io_close_wait_list busy;
write_io = GetWriteIO(io);
if (io != write_io) {
@@ -5665,10 +5765,12 @@ io_close_fptr(VALUE io)
if (!fptr) return 0;
if (fptr->fd < 0) return 0;
- if (rb_notify_fd_close(fptr->fd, &busy)) {
+ // This guards against multiple threads closing the same IO object:
+ if (rb_thread_io_close_interrupt(fptr)) {
/* calls close(fptr->fd): */
- fptr_finalize_flush(fptr, FALSE, KEEPGVL, &busy);
+ fptr_finalize_flush(fptr, FALSE, KEEPGVL);
}
+
rb_io_fptr_cleanup(fptr, FALSE);
return fptr;
}
@@ -5706,6 +5808,9 @@ rb_io_close(VALUE io)
* If the stream was opened by IO.popen, sets global variable <tt>$?</tt>
* (child exit status).
*
+ * It is not an error to close an IO object that has already been closed.
+ * It just returns nil.
+ *
* Example:
*
* IO.popen('ruby', 'r+') do |pipe|
@@ -6089,7 +6194,7 @@ rb_io_sysread(int argc, VALUE *argv, VALUE io)
}
struct prdwr_internal_arg {
- VALUE io;
+ struct rb_io *io;
int fd;
void *buf;
size_t count;
@@ -6111,14 +6216,14 @@ pread_internal_call(VALUE _arg)
VALUE scheduler = rb_fiber_scheduler_current();
if (scheduler != Qnil) {
- VALUE result = rb_fiber_scheduler_io_pread_memory(scheduler, arg->io, arg->offset, arg->buf, arg->count, 0);
+ VALUE result = rb_fiber_scheduler_io_pread_memory(scheduler, arg->io->self, arg->offset, arg->buf, arg->count, 0);
if (!UNDEF_P(result)) {
return rb_fiber_scheduler_io_result_apply(result);
}
}
- return rb_thread_io_blocking_call(internal_pread_func, arg, arg->fd, RB_WAITFD_IN);
+ return rb_io_blocking_region_wait(arg->io, internal_pread_func, arg, RUBY_IO_READABLE);
}
/*
@@ -6155,7 +6260,7 @@ rb_io_pread(int argc, VALUE *argv, VALUE io)
VALUE len, offset, str;
rb_io_t *fptr;
ssize_t n;
- struct prdwr_internal_arg arg = {.io = io};
+ struct prdwr_internal_arg arg;
int shrinkable;
rb_scan_args(argc, argv, "21", &len, &offset, &str);
@@ -6169,6 +6274,7 @@ rb_io_pread(int argc, VALUE *argv, VALUE io)
GetOpenFile(io, fptr);
rb_io_check_byte_readable(fptr);
+ arg.io = fptr;
arg.fd = fptr->fd;
rb_io_check_closed(fptr);
@@ -6191,17 +6297,24 @@ internal_pwrite_func(void *_arg)
{
struct prdwr_internal_arg *arg = _arg;
+ return (VALUE)pwrite(arg->fd, arg->buf, arg->count, arg->offset);
+}
+
+static VALUE
+pwrite_internal_call(VALUE _arg)
+{
+ struct prdwr_internal_arg *arg = (struct prdwr_internal_arg *)_arg;
+
VALUE scheduler = rb_fiber_scheduler_current();
if (scheduler != Qnil) {
- VALUE result = rb_fiber_scheduler_io_pwrite_memory(scheduler, arg->io, arg->offset, arg->buf, arg->count, 0);
+ VALUE result = rb_fiber_scheduler_io_pwrite_memory(scheduler, arg->io->self, arg->offset, arg->buf, arg->count, 0);
if (!UNDEF_P(result)) {
return rb_fiber_scheduler_io_result_apply(result);
}
}
-
- return (VALUE)pwrite(arg->fd, arg->buf, arg->count, arg->offset);
+ return rb_io_blocking_region_wait(arg->io, internal_pwrite_func, arg, RUBY_IO_WRITABLE);
}
/*
@@ -6234,7 +6347,7 @@ rb_io_pwrite(VALUE io, VALUE str, VALUE offset)
{
rb_io_t *fptr;
ssize_t n;
- struct prdwr_internal_arg arg = {.io = io};
+ struct prdwr_internal_arg arg;
VALUE tmp;
if (!RB_TYPE_P(str, T_STRING))
@@ -6245,13 +6358,15 @@ rb_io_pwrite(VALUE io, VALUE str, VALUE offset)
io = GetWriteIO(io);
GetOpenFile(io, fptr);
rb_io_check_writable(fptr);
+
+ arg.io = fptr;
arg.fd = fptr->fd;
tmp = rb_str_tmp_frozen_acquire(str);
arg.buf = RSTRING_PTR(tmp);
arg.count = (size_t)RSTRING_LEN(tmp);
- n = (ssize_t)rb_thread_io_blocking_call(internal_pwrite_func, &arg, fptr->fd, RB_WAITFD_OUT);
+ n = (ssize_t)pwrite_internal_call((VALUE)&arg);
if (n < 0) rb_sys_fail_path(fptr->pathv);
rb_str_tmp_frozen_release(str, tmp);
@@ -6356,7 +6471,7 @@ rb_io_binmode_p(VALUE io)
}
static const char*
-rb_io_fmode_modestr(int fmode)
+rb_io_fmode_modestr(enum rb_io_mode fmode)
{
if (fmode & FMODE_APPEND) {
if ((fmode & FMODE_READWRITE) == FMODE_READWRITE) {
@@ -6390,10 +6505,10 @@ io_encname_bom_p(const char *name, long len)
return len > bom_prefix_len && STRNCASECMP(name, bom_prefix, bom_prefix_len) == 0;
}
-int
+enum rb_io_mode
rb_io_modestr_fmode(const char *modestr)
{
- int fmode = 0;
+ enum rb_io_mode fmode = 0;
const char *m = modestr, *p = NULL;
switch (*m++) {
@@ -6450,7 +6565,7 @@ rb_io_modestr_fmode(const char *modestr)
int
rb_io_oflags_fmode(int oflags)
{
- int fmode = 0;
+ enum rb_io_mode fmode = 0;
switch (oflags & O_ACCMODE) {
case O_RDONLY:
@@ -6486,7 +6601,7 @@ rb_io_oflags_fmode(int oflags)
}
static int
-rb_io_fmode_oflags(int fmode)
+rb_io_fmode_oflags(enum rb_io_mode fmode)
{
int oflags = 0;
@@ -6571,7 +6686,7 @@ rb_io_oflags_modestr(int oflags)
* Qnil => no encoding specified (internal only)
*/
static void
-rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
+rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, enum rb_io_mode fmode)
{
int default_ext = 0;
@@ -6606,12 +6721,12 @@ unsupported_encoding(const char *name, rb_encoding *enc)
static void
parse_mode_enc(const char *estr, rb_encoding *estr_enc,
- rb_encoding **enc_p, rb_encoding **enc2_p, int *fmode_p)
+ rb_encoding **enc_p, rb_encoding **enc2_p, enum rb_io_mode *fmode_p)
{
const char *p;
char encname[ENCODING_MAXNAMELEN+1];
int idx, idx2;
- int fmode = fmode_p ? *fmode_p : 0;
+ enum rb_io_mode fmode = fmode_p ? *fmode_p : 0;
rb_encoding *ext_enc, *int_enc;
long len;
@@ -6673,7 +6788,7 @@ parse_mode_enc(const char *estr, rb_encoding *estr_enc,
}
int
-rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p, int *fmode_p)
+rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p, enum rb_io_mode *fmode_p)
{
VALUE encoding=Qnil, extenc=Qundef, intenc=Qundef, tmp;
int extracted = 0;
@@ -6742,9 +6857,9 @@ rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2
}
static void
-validate_enc_binmode(int *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *enc2)
+validate_enc_binmode(enum rb_io_mode *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *enc2)
{
- int fmode = *fmode_p;
+ enum rb_io_mode fmode = *fmode_p;
if ((fmode & FMODE_READABLE) &&
!enc2 &&
@@ -6769,7 +6884,7 @@ validate_enc_binmode(int *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *e
}
static void
-extract_binmode(VALUE opthash, int *fmode)
+extract_binmode(VALUE opthash, enum rb_io_mode *fmode)
{
if (!NIL_P(opthash)) {
VALUE v;
@@ -6799,10 +6914,11 @@ extract_binmode(VALUE opthash, int *fmode)
void
rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
- int *oflags_p, int *fmode_p, struct rb_io_encoding *convconfig_p)
+ int *oflags_p, enum rb_io_mode *fmode_p, struct rb_io_encoding *convconfig_p)
{
VALUE vmode;
- int oflags, fmode;
+ int oflags;
+ enum rb_io_mode fmode;
rb_encoding *enc, *enc2;
int ecflags;
VALUE ecopts;
@@ -6827,7 +6943,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
else {
const char *p;
- SafeStringValue(vmode);
+ StringValue(vmode);
p = StringValueCStr(vmode);
fmode = rb_io_modestr_fmode(p);
oflags = rb_io_fmode_oflags(fmode);
@@ -6961,7 +7077,9 @@ static inline int
rb_sysopen_internal(struct sysopen_struct *data)
{
int fd;
- fd = (int)(VALUE)rb_thread_call_without_gvl(sysopen_func, data, RUBY_UBF_IO, 0);
+ do {
+ fd = IO_WITHOUT_GVL_INT(sysopen_func, data);
+ } while (fd < 0 && errno == EINTR);
if (0 <= fd)
rb_update_max_fd(fd);
return fd;
@@ -7115,7 +7233,7 @@ io_set_encoding_by_bom(VALUE io)
}
static VALUE
-rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode,
+rb_file_open_generic(VALUE io, VALUE filename, int oflags, enum rb_io_mode fmode,
const struct rb_io_encoding *convconfig, mode_t perm)
{
VALUE pathv;
@@ -7152,15 +7270,13 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode,
static VALUE
rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
{
- int fmode = rb_io_modestr_fmode(modestr);
+ enum rb_io_mode fmode = rb_io_modestr_fmode(modestr);
const char *p = strchr(modestr, ':');
struct rb_io_encoding convconfig;
if (p) {
parse_mode_enc(p+1, rb_usascii_encoding(),
&convconfig.enc, &convconfig.enc2, &fmode);
- convconfig.ecflags = 0;
- convconfig.ecopts = Qnil;
}
else {
rb_encoding *e;
@@ -7168,10 +7284,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
- convconfig.ecflags = 0;
- convconfig.ecopts = Qnil;
}
+ convconfig.ecflags = (fmode & FMODE_READABLE) ?
+ MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR,
+ 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0;
+#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
+ convconfig.ecflags |= (fmode & FMODE_WRITABLE) ?
+ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
+ 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
+#endif
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags);
+ convconfig.ecopts = Qnil;
+
return rb_file_open_generic(io, filename,
rb_io_fmode_oflags(fmode),
fmode,
@@ -7461,7 +7586,7 @@ char *rb_execarg_commandline(const struct rb_execarg *eargp, VALUE *prog);
#ifndef __EMSCRIPTEN__
static VALUE
-pipe_open(VALUE execarg_obj, const char *modestr, int fmode,
+pipe_open(VALUE execarg_obj, const char *modestr, enum rb_io_mode fmode,
const struct rb_io_encoding *convconfig)
{
struct rb_execarg *eargp = NIL_P(execarg_obj) ? NULL : rb_execarg_get(execarg_obj);
@@ -7690,7 +7815,7 @@ pipe_open(VALUE execarg_obj, const char *modestr, int fmode,
}
#else
static VALUE
-pipe_open(VALUE execarg_obj, const char *modestr, int fmode,
+pipe_open(VALUE execarg_obj, const char *modestr, enum rb_io_mode fmode,
const struct rb_io_encoding *convconfig)
{
rb_raise(rb_eNotImpError, "popen() is not available");
@@ -7712,7 +7837,7 @@ is_popen_fork(VALUE prog)
}
static VALUE
-pipe_open_s(VALUE prog, const char *modestr, int fmode,
+pipe_open_s(VALUE prog, const char *modestr, enum rb_io_mode fmode,
const struct rb_io_encoding *convconfig)
{
int argc = 1;
@@ -7745,7 +7870,7 @@ static VALUE popen_finish(VALUE port, VALUE klass);
* whose $stdin and $stdout are connected to a new stream +io+.
*
* This method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
+ * see {Command Injection}[rdoc-ref:security/command_injection.rdoc].
*
* If no block is given, returns the new stream,
* which depending on given +mode+ may be open for reading, writing, or both.
@@ -7754,7 +7879,8 @@ static VALUE popen_finish(VALUE port, VALUE klass);
* If a block is given, the stream is passed to the block
* (again, open for reading, writing, or both);
* when the block exits, the stream is closed,
- * and the block's value is assigned to global variable <tt>$?</tt> and returned.
+ * the block's value is returned,
+ * and the global variable <tt>$?</tt> is set to the child's exit status.
*
* Optional argument +mode+ may be any valid \IO mode.
* See {Access Modes}[rdoc-ref:File@Access+Modes].
@@ -7783,7 +7909,7 @@ static VALUE popen_finish(VALUE port, VALUE klass);
* - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
* - Options for Kernel#spawn.
*
- * <b>Forked \Process</b>
+ * <b>Forked Process</b>
*
* When argument +cmd+ is the 1-character string <tt>'-'</tt>, causes the process to fork:
* IO.popen('-') do |pipe|
@@ -7921,7 +8047,8 @@ rb_io_popen(VALUE pname, VALUE pmode, VALUE env, VALUE opt)
{
const char *modestr;
VALUE tmp, execarg_obj = Qnil;
- int oflags, fmode;
+ int oflags;
+ enum rb_io_mode fmode;
struct rb_io_encoding convconfig;
tmp = rb_check_array_type(pname);
@@ -7936,7 +8063,7 @@ rb_io_popen(VALUE pname, VALUE pmode, VALUE env, VALUE opt)
RB_GC_GUARD(tmp);
}
else {
- SafeStringValue(pname);
+ StringValue(pname);
execarg_obj = Qnil;
if (!is_popen_fork(pname))
execarg_obj = rb_execarg_new(1, &pname, TRUE, FALSE);
@@ -7959,10 +8086,10 @@ popen_finish(VALUE port, VALUE klass)
if (NIL_P(port)) {
/* child */
if (rb_block_given_p()) {
- rb_yield(Qnil);
+ rb_protect(rb_yield, Qnil, NULL);
rb_io_flush(rb_ractor_stdout());
rb_io_flush(rb_ractor_stderr());
- _exit(0);
+ _exit(EXIT_SUCCESS);
}
return Qnil;
}
@@ -8002,7 +8129,12 @@ ruby_popen_writer(char *const *argv, rb_pid_t *pid)
int write_pair[2];
# endif
- int result = rb_cloexec_pipe(write_pair);
+#ifdef HAVE_PIPE2
+ int result = pipe2(write_pair, O_CLOEXEC);
+#else
+ int result = pipe(write_pair);
+#endif
+
*pid = -1;
if (result == 0) {
# ifdef HAVE_WORKING_FORK
@@ -8027,37 +8159,19 @@ ruby_popen_writer(char *const *argv, rb_pid_t *pid)
return NULL;
}
-static void
-rb_scan_open_args(int argc, const VALUE *argv,
- VALUE *fname_p, int *oflags_p, int *fmode_p,
- struct rb_io_encoding *convconfig_p, mode_t *perm_p)
+static VALUE
+rb_open_file(VALUE io, VALUE fname, VALUE vmode, VALUE vperm, VALUE opt)
{
- VALUE opt, fname, vmode, vperm;
- int oflags, fmode;
+ int oflags;
+ enum rb_io_mode fmode;
+ struct rb_io_encoding convconfig;
mode_t perm;
- argc = rb_scan_args(argc, argv, "12:", &fname, &vmode, &vperm, &opt);
FilePathValue(fname);
- rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, convconfig_p);
-
- perm = NIL_P(vperm) ? 0666 : NUM2MODET(vperm);
-
- *fname_p = fname;
- *oflags_p = oflags;
- *fmode_p = fmode;
- *perm_p = perm;
-}
-
-static VALUE
-rb_open_file(int argc, const VALUE *argv, VALUE io)
-{
- VALUE fname;
- int oflags, fmode;
- struct rb_io_encoding convconfig;
- mode_t perm;
+ rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, &convconfig);
+ perm = NIL_P(vperm) ? 0666 : NUM2MODET(vperm);
- rb_scan_open_args(argc, argv, &fname, &oflags, &fmode, &convconfig, &perm);
rb_file_open_generic(io, fname, oflags, fmode, &convconfig, perm);
return io;
@@ -8141,7 +8255,7 @@ rb_io_s_sysopen(int argc, VALUE *argv, VALUE _)
else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int")))
oflags = NUM2INT(intmode);
else {
- SafeStringValue(vmode);
+ StringValue(vmode);
oflags = rb_io_modestr_oflags(StringValueCStr(vmode));
}
if (NIL_P(vperm)) perm = 0666;
@@ -8152,21 +8266,6 @@ rb_io_s_sysopen(int argc, VALUE *argv, VALUE _)
return INT2NUM(fd);
}
-static VALUE
-check_pipe_command(VALUE filename_or_command)
-{
- char *s = RSTRING_PTR(filename_or_command);
- long l = RSTRING_LEN(filename_or_command);
- char *e = s + l;
- int chlen;
-
- if (rb_enc_ascget(s, e, &chlen, rb_enc_get(filename_or_command)) == '|') {
- VALUE cmd = rb_str_new(s+chlen, l-chlen);
- return cmd;
- }
- return Qnil;
-}
-
/*
* call-seq:
* open(path, mode = 'r', perm = 0666, **opts) -> io or nil
@@ -8174,9 +8273,6 @@ check_pipe_command(VALUE filename_or_command)
*
* Creates an IO object connected to the given file.
*
- * This method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
* With no block given, file stream is returned:
*
* open('t.txt') # => #<File:t.txt>
@@ -8212,13 +8308,7 @@ rb_f_open(int argc, VALUE *argv, VALUE _)
redirect = TRUE;
}
else {
- VALUE cmd = check_pipe_command(tmp);
- if (!NIL_P(cmd)) {
- // TODO: when removed in 4.0, update command_injection.rdoc
- rb_warn_deprecated_to_remove_at(4.0, "Calling Kernel#open with a leading '|'", "IO.popen");
- argv[0] = cmd;
- return rb_io_s_popen(argc, argv, rb_cIO);
- }
+ argv[0] = tmp;
}
}
}
@@ -8233,12 +8323,19 @@ rb_f_open(int argc, VALUE *argv, VALUE _)
return rb_io_s_open(argc, argv, rb_cFile);
}
-static VALUE rb_io_open_generic(VALUE, VALUE, int, int, const struct rb_io_encoding *, mode_t);
+static VALUE
+rb_io_open_generic(VALUE klass, VALUE filename, int oflags, enum rb_io_mode fmode,
+ const struct rb_io_encoding *convconfig, mode_t perm)
+{
+ return rb_file_open_generic(io_alloc(klass), filename,
+ oflags, fmode, convconfig, perm);
+}
static VALUE
rb_io_open(VALUE io, VALUE filename, VALUE vmode, VALUE vperm, VALUE opt)
{
- int oflags, fmode;
+ int oflags;
+ enum rb_io_mode fmode;
struct rb_io_encoding convconfig;
mode_t perm;
@@ -8248,22 +8345,6 @@ rb_io_open(VALUE io, VALUE filename, VALUE vmode, VALUE vperm, VALUE opt)
}
static VALUE
-rb_io_open_generic(VALUE klass, VALUE filename, int oflags, int fmode,
- const struct rb_io_encoding *convconfig, mode_t perm)
-{
- VALUE cmd;
- if (klass == rb_cIO && !NIL_P(cmd = check_pipe_command(filename))) {
- // TODO: when removed in 4.0, update command_injection.rdoc
- rb_warn_deprecated_to_remove_at(4.0, "IO process creation with a leading '|'", "IO.popen");
- return pipe_open_s(cmd, rb_io_oflags_modestr(oflags), fmode, convconfig);
- }
- else {
- return rb_file_open_generic(io_alloc(klass), filename,
- oflags, fmode, convconfig, perm);
- }
-}
-
-static VALUE
io_reopen(VALUE io, VALUE nfile)
{
rb_io_t *fptr, *orig;
@@ -8290,7 +8371,7 @@ io_reopen(VALUE io, VALUE nfile)
rb_sys_fail_on_write(fptr);
}
else {
- flush_before_seek(fptr);
+ flush_before_seek(fptr, true);
}
if (orig->mode & FMODE_READABLE) {
pos = io_tell(orig);
@@ -8302,6 +8383,7 @@ io_reopen(VALUE io, VALUE nfile)
/* copy rb_io_t structure */
fptr->mode = orig->mode | (fptr->mode & FMODE_EXTERNAL);
+ fptr->encs = orig->encs;
fptr->pid = orig->pid;
fptr->lineno = orig->lineno;
if (RTEST(orig->pathv)) fptr->pathv = orig->pathv;
@@ -8311,6 +8393,10 @@ io_reopen(VALUE io, VALUE nfile)
fd = fptr->fd;
fd2 = orig->fd;
if (fd != fd2) {
+ // Interrupt all usage of the old file descriptor:
+ rb_thread_io_close_interrupt(fptr);
+ rb_thread_io_close_wait(fptr);
+
if (RUBY_IO_EXTERNAL_P(fptr) || fd <= 2 || !fptr->stdio_file) {
/* need to keep FILE objects of stdin, stdout and stderr */
if (rb_cloexec_dup2(fd2, fd) < 0)
@@ -8326,7 +8412,7 @@ io_reopen(VALUE io, VALUE nfile)
rb_update_max_fd(fd);
fptr->fd = fd;
}
- rb_thread_fd_close(fd);
+
if ((orig->mode & FMODE_READABLE) && pos >= 0) {
if (io_seek(fptr, pos, SEEK_SET) < 0 && errno) {
rb_sys_fail_path(fptr->pathv);
@@ -8415,7 +8501,7 @@ rb_io_reopen(int argc, VALUE *argv, VALUE file)
}
if (!NIL_P(nmode) || !NIL_P(opt)) {
- int fmode;
+ enum rb_io_mode fmode;
struct rb_io_encoding convconfig;
rb_io_extract_modeenc(&nmode, 0, opt, &oflags, &fmode, &convconfig);
@@ -8503,6 +8589,12 @@ rb_io_init_copy(VALUE dest, VALUE io)
fptr->pid = orig->pid;
fptr->lineno = orig->lineno;
fptr->timeout = orig->timeout;
+
+ ccan_list_head_init(&fptr->blocking_operations);
+ fptr->closing_ec = NULL;
+ fptr->wakeup_mutex = Qnil;
+ fptr->fork_generation = GET_VM()->fork_gen;
+
if (!NIL_P(orig->pathv)) fptr->pathv = orig->pathv;
fptr_copy_finalizer(fptr, orig);
@@ -8532,7 +8624,7 @@ rb_io_init_copy(VALUE dest, VALUE io)
* Formats and writes +objects+ to the stream.
*
* For details on +format_string+, see
- * {Format Specifications}[rdoc-ref:format_specifications.rdoc].
+ * {Format Specifications}[rdoc-ref:language/format_specifications.rdoc].
*
*/
@@ -8553,7 +8645,7 @@ rb_io_printf(int argc, const VALUE *argv, VALUE out)
* io.write(sprintf(format_string, *objects))
*
* For details on +format_string+, see
- * {Format Specifications}[rdoc-ref:format_specifications.rdoc].
+ * {Format Specifications}[rdoc-ref:language/format_specifications.rdoc].
*
* With the single argument +format_string+, formats +objects+ into the string,
* then writes the formatted string to $stdout:
@@ -8596,12 +8688,19 @@ rb_f_printf(int argc, VALUE *argv, VALUE _)
return Qnil;
}
+extern void rb_deprecated_str_setter(VALUE val, ID id, VALUE *var);
+
static void
-deprecated_str_setter(VALUE val, ID id, VALUE *var)
+deprecated_rs_setter(VALUE val, ID id, VALUE *var)
{
- rb_str_setter(val, id, &val);
+ rb_deprecated_str_setter(val, id, &val);
if (!NIL_P(val)) {
- rb_warn_deprecated("`%s'", NULL, rb_id2name(id));
+ if (rb_str_equal(val, rb_default_rs)) {
+ val = rb_default_rs;
+ }
+ else {
+ val = rb_str_frozen_bare_string(val);
+ }
}
*var = val;
}
@@ -9221,6 +9320,11 @@ rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE t
io->timeout = timeout;
+ ccan_list_head_init(&io->blocking_operations);
+ io->closing_ec = NULL;
+ io->wakeup_mutex = Qnil;
+ io->fork_generation = GET_VM()->fork_gen;
+
if (encoding) {
io->encs = *encoding;
}
@@ -9231,14 +9335,30 @@ rb_io_open_descriptor(VALUE klass, int descriptor, int mode, VALUE path, VALUE t
}
static VALUE
-prep_io(int fd, int fmode, VALUE klass, const char *path)
+prep_io(int fd, enum rb_io_mode fmode, VALUE klass, const char *path)
{
VALUE path_value = Qnil;
+ rb_encoding *e;
+ struct rb_io_encoding convconfig;
+
if (path) {
path_value = rb_obj_freeze(rb_str_new_cstr(path));
}
- VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL);
+ e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
+ rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
+ convconfig.ecflags = (fmode & FMODE_READABLE) ?
+ MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR,
+ 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0;
+#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
+ convconfig.ecflags |= (fmode & FMODE_WRITABLE) ?
+ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
+ 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
+#endif
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags);
+ convconfig.ecopts = Qnil;
+
+ VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig);
rb_io_t*io = RFILE(self)->fptr;
if (!io_check_tty(io)) {
@@ -9261,7 +9381,7 @@ rb_io_fdopen(int fd, int oflags, const char *path)
}
static VALUE
-prep_stdio(FILE *f, int fmode, VALUE klass, const char *path)
+prep_stdio(FILE *f, enum rb_io_mode fmode, VALUE klass, const char *path)
{
rb_io_t *fptr;
VALUE io = prep_io(fileno(f), fmode|FMODE_EXTERNAL|DEFAULT_TEXTMODE, klass, path);
@@ -9344,6 +9464,10 @@ rb_io_fptr_new(void)
fp->encs.ecopts = Qnil;
fp->write_lock = Qnil;
fp->timeout = Qnil;
+ ccan_list_head_init(&fp->blocking_operations);
+ fp->closing_ec = NULL;
+ fp->wakeup_mutex = Qnil;
+ fp->fork_generation = GET_VM()->fork_gen;
return fp;
}
@@ -9364,6 +9488,8 @@ rb_io_make_open_file(VALUE obj)
return fp;
}
+static VALUE io_initialize(VALUE io, VALUE fnum, VALUE vmode, VALUE opt);
+
/*
* call-seq:
* IO.new(fd, mode = 'r', **opts) -> io
@@ -9383,7 +9509,8 @@ rb_io_make_open_file(VALUE obj)
* The new \IO object does not inherit encoding
* (because the integer file descriptor does not have an encoding):
*
- * fd = IO.sysopen('t.rus', 'rb')
+ * File.read('t.ja') # => "こんにちは"
+ * fd = IO.sysopen('t.ja', 'rb')
* io = IO.new(fd)
* io.external_encoding # => #<Encoding:UTF-8> # Not ASCII-8BIT.
*
@@ -9409,18 +9536,25 @@ static VALUE
rb_io_initialize(int argc, VALUE *argv, VALUE io)
{
VALUE fnum, vmode;
+ VALUE opt;
+
+ rb_scan_args(argc, argv, "11:", &fnum, &vmode, &opt);
+ return io_initialize(io, fnum, vmode, opt);
+}
+
+static VALUE
+io_initialize(VALUE io, VALUE fnum, VALUE vmode, VALUE opt)
+{
rb_io_t *fp;
- int fd, fmode, oflags = O_RDONLY;
+ int fd, oflags = O_RDONLY;
+ enum rb_io_mode fmode;
struct rb_io_encoding convconfig;
- VALUE opt;
#if defined(HAVE_FCNTL) && defined(F_GETFL)
int ofmode;
#else
struct stat st;
#endif
-
- argc = rb_scan_args(argc, argv, "11:", &fnum, &vmode, &opt);
rb_io_extract_modeenc(&vmode, 0, opt, &oflags, &fmode, &convconfig);
fd = NUM2INT(fnum);
@@ -9465,6 +9599,10 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
fp->encs = convconfig;
fp->pathv = path;
fp->timeout = Qnil;
+ ccan_list_head_init(&fp->blocking_operations);
+ fp->closing_ec = NULL;
+ fp->wakeup_mutex = Qnil;
+ fp->fork_generation = GET_VM()->fork_gen;
clear_codeconv(fp);
io_check_tty(fp);
if (fileno(stdin) == fd)
@@ -9569,17 +9707,16 @@ rb_file_initialize(int argc, VALUE *argv, VALUE io)
if (RFILE(io)->fptr) {
rb_raise(rb_eRuntimeError, "reinitializing File");
}
- if (0 < argc && argc < 3) {
- VALUE fd = rb_check_to_int(argv[0]);
+ VALUE fname, vmode, vperm, opt;
+ int posargc = rb_scan_args(argc, argv, "12:", &fname, &vmode, &vperm, &opt);
+ if (posargc < 3) { /* perm is File only */
+ VALUE fd = rb_check_to_int(fname);
if (!NIL_P(fd)) {
- argv[0] = fd;
- return rb_io_initialize(argc, argv, io);
+ return io_initialize(io, fd, vmode, opt);
}
}
- rb_open_file(argc, argv, io);
-
- return io;
+ return rb_open_file(io, fname, vmode, vperm, opt);
}
/* :nodoc: */
@@ -9695,7 +9832,7 @@ io_wait_readable(int argc, VALUE *argv, VALUE io)
rb_io_t *fptr;
RB_IO_POINTER(io, fptr);
- rb_io_check_readable(fptr);
+ rb_io_check_char_readable(fptr);
if (rb_io_read_pending(fptr)) return Qtrue;
@@ -9742,7 +9879,7 @@ io_wait_priority(int argc, VALUE *argv, VALUE io)
rb_io_t *fptr = NULL;
RB_IO_POINTER(io, fptr);
- rb_io_check_readable(fptr);
+ rb_io_check_char_readable(fptr);
if (rb_io_read_pending(fptr)) return Qtrue;
@@ -9799,7 +9936,7 @@ io_event_from_value(VALUE value)
/*
* call-seq:
* io.wait(events, timeout) -> event mask, false or nil
- * io.wait(timeout = nil, mode = :read) -> self, true, or false
+ * io.wait(*event_symbols[, timeout]) -> self, true, or false
*
* Waits until the IO becomes ready for the specified events and returns the
* subset of events that become ready, or a falsy value when times out.
@@ -9807,10 +9944,14 @@ io_event_from_value(VALUE value)
* The events can be a bit mask of +IO::READABLE+, +IO::WRITABLE+ or
* +IO::PRIORITY+.
*
- * Returns an event mask (truthy value) immediately when buffered data is available.
+ * Returns an event mask (truthy value) immediately when buffered data is
+ * available.
*
- * Optional parameter +mode+ is one of +:read+, +:write+, or
- * +:read_write+.
+ * The second form: if one or more event symbols (+:read+, +:write+, or
+ * +:read_write+) are passed, the event mask is the bit OR of the bitmask
+ * corresponding to those symbols. In this form, +timeout+ is optional, the
+ * order of the arguments is arbitrary, and returns +io+ if any of the
+ * events is ready.
*/
static VALUE
@@ -9820,10 +9961,6 @@ io_wait(int argc, VALUE *argv, VALUE io)
enum rb_io_event events = 0;
int return_io = 0;
- // The documented signature for this method is actually incorrect.
- // A single timeout is allowed in any position, and multiple symbols can be given.
- // Whether this is intentional or not, I don't know, and as such I consider this to
- // be a legacy/slow path.
if (argc != 2 || (RB_SYMBOL_P(argv[0]) || RB_SYMBOL_P(argv[1]))) {
// We'd prefer to return the actual mask, but this form would return the io itself:
return_io = 1;
@@ -9869,14 +10006,14 @@ io_wait(int argc, VALUE *argv, VALUE io)
}
static void
-argf_mark(void *ptr)
+argf_mark_and_move(void *ptr)
{
struct argf *p = ptr;
- rb_gc_mark(p->filename);
- rb_gc_mark(p->current_file);
- rb_gc_mark(p->argv);
- rb_gc_mark(p->inplace);
- rb_gc_mark(p->encs.ecopts);
+ rb_gc_mark_and_move(&p->filename);
+ rb_gc_mark_and_move(&p->current_file);
+ rb_gc_mark_and_move(&p->argv);
+ rb_gc_mark_and_move(&p->inplace);
+ rb_gc_mark_and_move(&p->encs.ecopts);
}
static size_t
@@ -9889,17 +10026,17 @@ argf_memsize(const void *ptr)
static const rb_data_type_t argf_type = {
"ARGF",
- {argf_mark, RUBY_TYPED_DEFAULT_FREE, argf_memsize},
- 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
+ {argf_mark_and_move, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_mark_and_move},
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
static inline void
-argf_init(struct argf *p, VALUE v)
+argf_init(VALUE argf, struct argf *p, VALUE v)
{
p->filename = Qnil;
p->current_file = Qnil;
p->lineno = 0;
- p->argv = v;
+ RB_OBJ_WRITE(argf, &p->argv, v);
}
static VALUE
@@ -9908,7 +10045,7 @@ argf_alloc(VALUE klass)
struct argf *p;
VALUE argf = TypedData_Make_Struct(klass, struct argf, &argf_type, p);
- argf_init(p, Qnil);
+ argf_init(argf, p, Qnil);
return argf;
}
@@ -9919,7 +10056,7 @@ static VALUE
argf_initialize(VALUE argf, VALUE argv)
{
memset(&ARGF, 0, sizeof(ARGF));
- argf_init(&ARGF, argv);
+ argf_init(argf, &ARGF, argv);
return argf;
}
@@ -9930,7 +10067,8 @@ argf_initialize_copy(VALUE argf, VALUE orig)
{
if (!OBJ_INIT_COPY(argf, orig)) return argf;
ARGF = argf_of(orig);
- ARGF.argv = rb_obj_dup(ARGF.argv);
+ rb_gc_writebarrier_remember(argf);
+ ARGF_SET(argv, rb_obj_dup(ARGF.argv));
return argf;
}
@@ -10015,7 +10153,7 @@ argf_next_argv(VALUE argf)
char *fn;
rb_io_t *fptr;
int stdout_binmode = 0;
- int fmode;
+ enum rb_io_mode fmode;
VALUE r_stdout = rb_ractor_stdout();
@@ -10049,11 +10187,11 @@ argf_next_argv(VALUE argf)
if (RARRAY_LEN(ARGF.argv) > 0) {
VALUE filename = rb_ary_shift(ARGF.argv);
FilePathValue(filename);
- ARGF.filename = filename;
+ ARGF_SET(filename, filename);
filename = rb_str_encode_ospath(filename);
fn = StringValueCStr(filename);
if (RSTRING_LEN(filename) == 1 && fn[0] == '-') {
- ARGF.current_file = rb_stdin;
+ ARGF_SET(current_file, rb_stdin);
if (ARGF.inplace) {
rb_warn("Can't do inplace edit for stdio; skipping");
goto retry;
@@ -10148,7 +10286,7 @@ argf_next_argv(VALUE argf)
if (!ARGF.binmode) {
fmode |= DEFAULT_TEXTMODE;
}
- ARGF.current_file = prep_io(fr, fmode, rb_cFile, fn);
+ ARGF_SET(current_file, prep_io(fr, fmode, rb_cFile, fn));
if (!NIL_P(write_io)) {
rb_io_set_write_io(ARGF.current_file, write_io);
}
@@ -10177,8 +10315,8 @@ argf_next_argv(VALUE argf)
}
}
else if (ARGF.next_p == -1) {
- ARGF.current_file = rb_stdin;
- ARGF.filename = rb_str_new2("-");
+ ARGF_SET(current_file, rb_stdin);
+ ARGF_SET(filename, rb_str_new2("-"));
if (ARGF.inplace) {
rb_warn("Can't do inplace edit for stdio");
rb_ractor_stdout_set(orig_stdout);
@@ -10446,8 +10584,9 @@ static VALUE argf_readlines(int, VALUE *, VALUE);
* $cat t.txt | ruby -e "p readlines 12"
* ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"]
*
- * With arguments +sep+ and +limit+ given, combines the two behaviors;
- * see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit].
+ * With arguments +sep+ and +limit+ given,
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword argument +chomp+ specifies whether line separators
* are to be omitted:
@@ -10519,14 +10658,14 @@ argf_readlines(int argc, VALUE *argv, VALUE argf)
* sets global variable <tt>$?</tt> to the process status.
*
* This method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
+ * see {Command Injection}[rdoc-ref:security/command_injection.rdoc].
*
* Examples:
*
* $ `date` # => "Wed Apr 9 08:56:30 CDT 2003\n"
* $ `echo oops && exit 99` # => "oops\n"
* $ $? # => #<Process::Status: pid 17088 exit 99>
- * $ $?.status # => 99>
+ * $ $?.exitstatus # => 99
*
* The built-in syntax <tt>%x{...}</tt> uses this method.
*
@@ -10539,7 +10678,7 @@ rb_f_backquote(VALUE obj, VALUE str)
VALUE result;
rb_io_t *fptr;
- SafeStringValue(str);
+ StringValue(str);
rb_last_status_clear();
port = pipe_open_s(str, "r", FMODE_READABLE|DEFAULT_TEXTMODE, NULL);
if (NIL_P(port)) return rb_str_new(0,0);
@@ -10630,9 +10769,9 @@ select_internal(VALUE read, VALUE write, VALUE except, struct timeval *tp, rb_fd
if (!pending && n == 0) return Qnil; /* returns nil on timeout */
res = rb_ary_new2(3);
- rb_ary_push(res, rp?rb_ary_new():rb_ary_new2(0));
- rb_ary_push(res, wp?rb_ary_new():rb_ary_new2(0));
- rb_ary_push(res, ep?rb_ary_new():rb_ary_new2(0));
+ rb_ary_push(res, rp ? rb_ary_new_capa(RARRAY_LEN(read)) : rb_ary_new());
+ rb_ary_push(res, wp ? rb_ary_new_capa(RARRAY_LEN(write)) : rb_ary_new());
+ rb_ary_push(res, ep ? rb_ary_new_capa(RARRAY_LEN(except)) : rb_ary_new());
if (rp) {
list = RARRAY_AREF(res, 0);
@@ -10782,7 +10921,7 @@ do_io_advise(rb_io_t *fptr, VALUE advice, rb_off_t offset, rb_off_t len)
ias.offset = offset;
ias.len = len;
- rv = (int)rb_thread_io_blocking_region(io_advise_internal, &ias, fptr->fd);
+ rv = (int)rb_io_blocking_region(fptr, io_advise_internal, &ias);
if (rv && rv != ENOSYS) {
/* posix_fadvise(2) doesn't set errno. On success it returns 0; otherwise
it returns the error code. */
@@ -10820,7 +10959,7 @@ advice_arg_check(VALUE advice)
* advise(advice, offset = 0, len = 0) -> nil
*
* Invokes Posix system call
- * {posix_fadvise(2)}[https://linux.die.net/man/2/posix_fadvise],
+ * {posix_fadvise(2)}[https://man7.org/linux/man-pages/man2/posix_fadvise.2.html],
* which announces an intention to access data from the current file
* in a particular manner.
*
@@ -10872,11 +11011,21 @@ rb_io_advise(int argc, VALUE *argv, VALUE io)
#endif
}
+static int
+is_pos_inf(VALUE x)
+{
+ double f;
+ if (!RB_FLOAT_TYPE_P(x))
+ return 0;
+ f = RFLOAT_VALUE(x);
+ return isinf(f) && 0 < f;
+}
+
/*
* call-seq:
* IO.select(read_ios, write_ios = [], error_ios = [], timeout = nil) -> array or nil
*
- * Invokes system call {select(2)}[https://linux.die.net/man/2/select],
+ * Invokes system call {select(2)}[https://man7.org/linux/man-pages/man2/select.2.html],
* which monitors multiple file descriptors,
* waiting until one or more of the file descriptors
* becomes ready for some class of I/O operation.
@@ -10886,7 +11035,10 @@ rb_io_advise(int argc, VALUE *argv, VALUE io)
* Each of the arguments +read_ios+, +write_ios+, and +error_ios+
* is an array of IO objects.
*
- * Argument +timeout+ is an integer timeout interval in seconds.
+ * Argument +timeout+ is a numeric value (such as integer or float) timeout
+ * interval in seconds.
+ * +timeout+ can also be +nil+ or +Float::INFINITY+.
+ * +nil+ and +Float::INFINITY+ means no timeout.
*
* The method monitors the \IO objects given in all three arrays,
* waiting for some to be ready;
@@ -10960,7 +11112,7 @@ rb_io_advise(int argc, VALUE *argv, VALUE io)
* Finally, Linux kernel developers don't guarantee that
* readability of select(2) means readability of following read(2) even
* for a single process;
- * see {select(2)}[https://linux.die.net/man/2/select]
+ * see {select(2)}[https://man7.org/linux/man-pages/man2/select.2.html]
*
* Invoking \IO.select before IO#readpartial works well as usual.
* However it is not the best way to use \IO.select.
@@ -11037,7 +11189,7 @@ rb_f_select(int argc, VALUE *argv, VALUE obj)
int i;
rb_scan_args(argc, argv, "13", &args.read, &args.write, &args.except, &timeout);
- if (NIL_P(timeout)) {
+ if (NIL_P(timeout) || is_pos_inf(timeout)) {
args.timeout = 0;
}
else {
@@ -11074,16 +11226,16 @@ nogvl_ioctl(void *ptr)
}
static int
-do_ioctl(int fd, ioctl_req_t cmd, long narg)
+do_ioctl(struct rb_io *io, ioctl_req_t cmd, long narg)
{
int retval;
struct ioctl_arg arg;
- arg.fd = fd;
+ arg.fd = io->fd;
arg.cmd = cmd;
arg.narg = narg;
- retval = (int)rb_thread_io_blocking_region(nogvl_ioctl, &arg, fd);
+ retval = (int)rb_io_blocking_region(io, nogvl_ioctl, &arg);
return retval;
}
@@ -11346,7 +11498,7 @@ rb_ioctl(VALUE io, VALUE req, VALUE arg)
narg = setup_narg(cmd, &arg, ioctl_narg_len);
GetOpenFile(io, fptr);
- retval = do_ioctl(fptr->fd, cmd, narg);
+ retval = do_ioctl(fptr, cmd, narg);
return finish_narg(retval, arg, fptr);
}
@@ -11354,7 +11506,7 @@ rb_ioctl(VALUE io, VALUE req, VALUE arg)
* call-seq:
* ioctl(integer_cmd, argument) -> integer
*
- * Invokes Posix system call {ioctl(2)}[https://linux.die.net/man/2/ioctl],
+ * Invokes Posix system call {ioctl(2)}[https://man7.org/linux/man-pages/man2/ioctl.2.html],
* which issues a low-level command to an I/O device.
*
* Issues a low-level command to an I/O device.
@@ -11400,16 +11552,16 @@ nogvl_fcntl(void *ptr)
}
static int
-do_fcntl(int fd, int cmd, long narg)
+do_fcntl(struct rb_io *io, int cmd, long narg)
{
int retval;
struct fcntl_arg arg;
- arg.fd = fd;
+ arg.fd = io->fd;
arg.cmd = cmd;
arg.narg = narg;
- retval = (int)rb_thread_io_blocking_region(nogvl_fcntl, &arg, fd);
+ retval = (int)rb_io_blocking_region(io, nogvl_fcntl, &arg);
if (retval != -1) {
switch (cmd) {
#if defined(F_DUPFD)
@@ -11435,7 +11587,7 @@ rb_fcntl(VALUE io, VALUE req, VALUE arg)
narg = setup_narg(cmd, &arg, fcntl_narg_len);
GetOpenFile(io, fptr);
- retval = do_fcntl(fptr->fd, cmd, narg);
+ retval = do_fcntl(fptr, cmd, narg);
return finish_narg(retval, arg, fptr);
}
@@ -11443,7 +11595,7 @@ rb_fcntl(VALUE io, VALUE req, VALUE arg)
* call-seq:
* fcntl(integer_cmd, argument) -> integer
*
- * Invokes Posix system call {fcntl(2)}[https://linux.die.net/man/2/fcntl],
+ * Invokes Posix system call {fcntl(2)}[https://man7.org/linux/man-pages/man2/fcntl.2.html],
* which provides a mechanism for issuing low-level commands to control or query
* a file-oriented I/O stream. Arguments and results are platform
* dependent.
@@ -11473,7 +11625,7 @@ rb_io_fcntl(int argc, VALUE *argv, VALUE io)
* call-seq:
* syscall(integer_callno, *arguments) -> integer
*
- * Invokes Posix system call {syscall(2)}[https://linux.die.net/man/2/syscall],
+ * Invokes Posix system call {syscall(2)}[https://man7.org/linux/man-pages/man2/syscall.2.html],
* which calls a specified function.
*
* Calls the operating system function identified by +integer_callno+;
@@ -11545,7 +11697,7 @@ rb_f_syscall(int argc, VALUE *argv, VALUE _)
VALUE v = rb_check_string_type(argv[i]);
if (!NIL_P(v)) {
- SafeStringValue(v);
+ StringValue(v);
rb_str_modify(v);
arg[i] = (VALUE)StringValueCStr(v);
}
@@ -11783,7 +11935,7 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass)
VALUE opt;
rb_io_t *fptr, *fptr2;
struct io_encoding_set_args ies_args;
- int fmode = 0;
+ enum rb_io_mode fmode = 0;
VALUE ret;
argc = rb_scan_args(argc, argv, "02:", &v1, &v2, &opt);
@@ -11918,10 +12070,6 @@ io_s_foreach(VALUE v)
*
* Calls the block with each successive line read from the stream.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
* The first argument must be a string that is the path to a file.
*
* With only argument +path+ given, parses lines from the file at the given +path+,
@@ -11958,7 +12106,7 @@ io_s_foreach(VALUE v)
*
* With argument +limit+ given, parses lines as determined by the default
* line separator and the given line-length limit
- * (see {Line Limit}[rdoc-ref:IO@Line+Limit]):
+ * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]):
*
* File.foreach('t.txt', 7) {|line| p line }
*
@@ -11974,10 +12122,9 @@ io_s_foreach(VALUE v)
* "Fourth l"
* "line\n"
*
- * With arguments +sep+ and +limit+ given,
- * parses lines as determined by the given
- * line separator and the given line-length limit
- * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]):
+ * With arguments +sep+ and +limit+ given,
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword arguments +opts+ specify:
*
@@ -12022,10 +12169,6 @@ io_s_readlines(VALUE v)
*
* Returns an array of all lines read from the stream.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
* The first argument must be a string that is the path to a file.
*
* With only argument +path+ given, parses lines from the file at the given +path+,
@@ -12050,15 +12193,14 @@ io_s_readlines(VALUE v)
*
* With argument +limit+ given, parses lines as determined by the default
* line separator and the given line-length limit
- * (see {Line Limit}[rdoc-ref:IO@Line+Limit]):
+ * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]:
*
* IO.readlines('t.txt', 7)
* # => ["First l", "ine\n", "Second ", "line\n", "\n", "Third l", "ine\n", "Fourth ", "line\n"]
*
- * With arguments +sep+ and +limit+ given,
- * parses lines as determined by the given
- * line separator and the given line-length limit
- * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]):
+ * With arguments +sep+ and +limit+ given,
+ * combines the two behaviors
+ * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]).
*
* Optional keyword arguments +opts+ specify:
*
@@ -12112,17 +12254,17 @@ seek_before_access(VALUE argp)
* Opens the stream, reads and returns some or all of its content,
* and closes the stream; returns +nil+ if no bytes were read.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
* The first argument must be a string that is the path to a file.
*
* With only argument +path+ given, reads in text mode and returns the entire content
* of the file at the given path:
*
- * IO.read('t.txt')
- * # => "First line\nSecond line\n\nThird line\nFourth line\n"
+ * File.read('t.txt')
+ * # => "First line\nSecond line\n\nFourth line\nFifth line\n"
+ * File.read('t.ja')
+ * # => "こんにちは"
+ * File.read('t.dat')
+ * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
*
* On Windows, text mode can terminate reading and leave bytes in the file
* unread when encountering certain special bytes. Consider using
@@ -12130,15 +12272,36 @@ seek_before_access(VALUE argp)
*
* With argument +length+, returns +length+ bytes if available:
*
- * IO.read('t.txt', 7) # => "First l"
- * IO.read('t.txt', 700)
+ * File.read('t.txt', 7)
+ * # => "First l"
+ * File.read('t.ja', 7)
+ * # => "\xE3\x81\x93\xE3\x82\x93\xE3"
+ * File.read('t.dat', 7)
+ * # => "\xFE\xFF\x99\x90\x99\x91\x99"
+ *
+ * Returns all bytes if +length+ is larger than the files size:
+ *
+ * File.read('t.txt', 700)
* # => "First line\r\nSecond line\r\n\r\nFourth line\r\nFifth line\r\n"
+ * File.read('t.ja', 700)
+ * # => "\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF"
+ * File.read('t.dat', 700)
+ * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
*
* With arguments +length+ and +offset+, returns +length+ bytes
* if available, beginning at the given +offset+:
*
- * IO.read('t.txt', 10, 2) # => "rst line\nS"
- * IO.read('t.txt', 10, 200) # => nil
+ * File.read('t.txt', 10, 2)
+ * # => "rst line\r\n"
+ * File.read('t.ja', 10, 2)
+ * # => "\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1"
+ * File.read('t.dat', 10, 2)
+ * # => "\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
+ *
+ * Returns +nil+ if +offset+ is past the end of the stream:
+ *
+ * File.read('t.txt', 10, 200)
+ * # => nil
*
* Optional keyword arguments +opts+ specify:
*
@@ -12183,10 +12346,6 @@ rb_io_s_read(int argc, VALUE *argv, VALUE io)
* Behaves like IO.read, except that the stream is opened in binary mode
* with ASCII-8BIT encoding.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
*/
static VALUE
@@ -12194,8 +12353,8 @@ rb_io_s_binread(int argc, VALUE *argv, VALUE io)
{
VALUE offset;
struct foreach_arg arg;
+ enum rb_io_mode fmode = FMODE_READABLE|FMODE_BINMODE;
enum {
- fmode = FMODE_READABLE|FMODE_BINMODE,
oflags = O_RDONLY
#ifdef O_BINARY
|O_BINARY
@@ -12282,40 +12441,50 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary)
/*
* call-seq:
- * IO.write(path, data, offset = 0, **opts) -> integer
+ * IO.write(path, data, offset = 0, **opts) -> nonnegative_integer
*
* Opens the stream, writes the given +data+ to it,
* and closes the stream; returns the number of bytes written.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
* The first argument must be a string that is the path to a file.
*
- * With only argument +path+ given, writes the given +data+ to the file at that path:
+ * With only arguments +path+ and +data+ given,
+ * writes the given data to the file at that path:
+ *
+ * path = 't.tmp'
+ * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") # => 47
+ * File.write(path, 'こんにちは') # => 15
+ * File.write(path, "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94") # => 12
+ *
+ * When +offset+ is zero (the default), the entire file content is overwritten:
*
- * IO.write('t.tmp', 'abc') # => 3
- * File.read('t.tmp') # => "abc"
+ * File.read(path) # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
+ * File.write(path, 'foo')
+ * File.read(path) # => "foo"
*
- * If +offset+ is zero (the default), the file is overwritten:
+ * When +offset+ in within the file content, the file content is partly overwritten,
+ * beginning at byte +offset+:
*
- * IO.write('t.tmp', 'A') # => 1
- * File.read('t.tmp') # => "A"
+ * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
+ * File.write(path, 'LINE', 6)
+ * File.read(path) # => "First LINE\nSecond line\n\nFourth line\nFifth line\n"
*
- * If +offset+ in within the file content, the file is partly overwritten:
+ * When the file contains multi-byte characters,
+ * the effect of writing may disturb some characters:
*
- * IO.write('t.tmp', 'abcdef') # => 3
- * File.read('t.tmp') # => "abcdef"
- * # Offset within content.
- * IO.write('t.tmp', '012', 2) # => 3
- * File.read('t.tmp') # => "ab012f"
+ * File.write(path, "こんにちは")
+ * File.write(path, 'FOO', 3) # Replace one 3-byte character.
+ * File.read(path) # => "こFOOにちは"
+ * File.write(path, 'BAR', 7) # Replace bytes in two different 3-byte characters.
+ * File.read(path) # => "こFOO\xE3BAR\x81\xA1は"
*
* If +offset+ is outside the file content,
* the file is padded with null characters <tt>"\u0000"</tt>:
*
- * IO.write('t.tmp', 'xyz', 10) # => 3
- * File.read('t.tmp') # => "ab012f\u0000\u0000\u0000\u0000xyz"
+ * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
+ * File.write(path, 'FOO', 55)
+ * File.read(path)
+ * # => "First line\nSecond line\n\nFourth line\nFifth line\n\u0000\u0000\u0000FOO"
*
* Optional keyword arguments +opts+ specify:
*
@@ -12332,15 +12501,11 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io)
/*
* call-seq:
- * IO.binwrite(path, string, offset = 0) -> integer
+ * IO.binwrite(path, string, offset = 0, **opts) -> integer
*
* Behaves like IO.write, except that the stream is opened in binary mode
* with ASCII-8BIT encoding.
*
- * When called from class \IO (but not subclasses of \IO),
- * this method has potential security vulnerabilities if called with untrusted input;
- * see {Command Injection}[rdoc-ref:command_injection.rdoc].
- *
*/
static VALUE
@@ -13055,6 +13220,7 @@ copy_stream_fallback_body(VALUE arg)
while (1) {
long numwrote;
long l;
+ rb_str_make_independent(buf);
if (stp->copy_length < (rb_off_t)0) {
l = buflen;
}
@@ -13208,7 +13374,7 @@ copy_stream_body(VALUE arg)
rb_str_resize(str,len);
read_buffered_data(RSTRING_PTR(str), len, stp->src_fptr);
if (stp->dst_fptr) { /* IO or filename */
- if (io_binwrite(str, RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0)
+ if (io_binwrite(RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0)
rb_sys_fail_on_write(stp->dst_fptr);
}
else /* others such as StringIO */
@@ -13230,7 +13396,7 @@ copy_stream_body(VALUE arg)
return copy_stream_fallback(stp);
}
- rb_thread_call_without_gvl(nogvl_copy_stream_func, (void*)stp, RUBY_UBF_IO, 0);
+ IO_WITHOUT_GVL(nogvl_copy_stream_func, stp);
return Qnil;
}
@@ -13558,6 +13724,7 @@ argf_set_encoding(int argc, VALUE *argv, VALUE argf)
rb_io_set_encoding(argc, argv, ARGF.current_file);
GetOpenFile(ARGF.current_file, fptr);
ARGF.encs = fptr->encs;
+ RB_OBJ_WRITTEN(argf, Qundef, ARGF.encs.ecopts);
return argf;
}
@@ -14490,7 +14657,7 @@ argf_inplace_mode_set(VALUE argf, VALUE val)
ARGF.inplace = Qnil;
}
else {
- ARGF.inplace = rb_str_new_frozen(val);
+ ARGF_SET(inplace, rb_str_new_frozen(val));
}
return argf;
}
@@ -14504,7 +14671,7 @@ opt_i_set(VALUE val, ID id, VALUE *var)
void
ruby_set_inplace_mode(const char *suffix)
{
- ARGF.inplace = !suffix ? Qfalse : !*suffix ? Qnil : rb_str_new(suffix, strlen(suffix));
+ ARGF_SET(inplace, !suffix ? Qfalse : !*suffix ? Qnil : rb_str_new(suffix, strlen(suffix)));
}
/*
@@ -14557,14 +14724,14 @@ argf_write_io(VALUE argf)
/*
* call-seq:
- * ARGF.write(string) -> integer
+ * ARGF.write(*objects) -> integer
*
- * Writes _string_ if inplace mode.
+ * Writes each of the given +objects+ if inplace mode.
*/
static VALUE
-argf_write(VALUE argf, VALUE str)
+argf_write(int argc, VALUE *argv, VALUE argf)
{
- return rb_io_write(argf_write_io(argf), str);
+ return rb_io_writev(argf_write_io(argf), argc, argv);
}
void
@@ -14670,55 +14837,259 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
/*
* Document-class: ARGF
*
- * ARGF is a stream designed for use in scripts that process files given as
- * command-line arguments or passed in via STDIN.
+ * == \ARGF and +ARGV+
+ *
+ * The \ARGF object works with the array at global variable +ARGV+
+ * to make <tt>$stdin</tt> and file streams available in the Ruby program:
+ *
+ * - **ARGV** may be thought of as the <b>argument vector</b> array.
+ *
+ * Initially, it contains the command-line arguments and options
+ * that are passed to the Ruby program;
+ * the program can modify that array as it likes.
+ *
+ * - **ARGF** may be thought of as the <b>argument files</b> object.
+ *
+ * It can access file streams and/or the <tt>$stdin</tt> stream,
+ * based on what it finds in +ARGV+.
+ * This provides a convenient way for the command line
+ * to specify streams for a Ruby program to read.
+ *
+ * == Reading
+ *
+ * \ARGF may read from _source_ streams,
+ * which at any particular time are determined by the content of +ARGV+.
+ *
+ * === Simplest Case
+ *
+ * When the <i>very first</i> \ARGF read occurs with an empty +ARGV+ (<tt>[]</tt>),
+ * the source is <tt>$stdin</tt>:
+ *
+ * - \File +t.rb+:
+ *
+ * p ['ARGV', ARGV]
+ * p ['ARGF.read', ARGF.read]
+ *
+ * - Commands and outputs
+ * (see below for the content of files +foo.txt+ and +bar.txt+):
+ *
+ * $ echo "Open the pod bay doors, Hal." | ruby t.rb
+ * ["ARGV", []]
+ * ["ARGF.read", "Open the pod bay doors, Hal.\n"]
+ *
+ * $ cat foo.txt bar.txt | ruby t.rb
+ * ["ARGV", []]
+ * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"]
+ *
+ * === About the Examples
+ *
+ * Many examples here assume the existence of files +foo.txt+ and +bar.txt+:
+ *
+ * $ cat foo.txt
+ * Foo 0
+ * Foo 1
+ * $ cat bar.txt
+ * Bar 0
+ * Bar 1
+ * Bar 2
+ * Bar 3
+ *
+ * === Sources in +ARGV+
+ *
+ * For any \ARGF read _except_ the {simplest case}[rdoc-ref:ARGF@Simplest+Case]
+ * (that is, _except_ for the <i>very first</i> \ARGF read with an empty +ARGV+),
+ * the sources are found in +ARGV+.
+ *
+ * \ARGF assumes that each element in array +ARGV+ is a potential source,
+ * and is one of:
+ *
+ * - The string path to a file that may be opened as a stream.
+ * - The character <tt>'-'</tt>, meaning stream <tt>$stdin</tt>.
*
- * The arguments passed to your script are stored in the +ARGV+ Array, one
- * argument per element. ARGF assumes that any arguments that aren't
- * filenames have been removed from +ARGV+. For example:
+ * Each element that is _not_ one of these
+ * should be removed from +ARGV+ before \ARGF accesses that source.
*
- * $ ruby argf.rb --verbose file1 file2
+ * In the following example:
*
- * ARGV #=> ["--verbose", "file1", "file2"]
- * option = ARGV.shift #=> "--verbose"
- * ARGV #=> ["file1", "file2"]
+ * - Filepaths +foo.txt+ and +bar.txt+ may be retained as potential sources.
+ * - Options <tt>--xyzzy</tt> and <tt>--mojo</tt> should be removed.
*
- * You can now use ARGF to work with a concatenation of each of these named
- * files. For instance, ARGF.read will return the contents of _file1_
- * followed by the contents of _file2_.
+ * Example:
*
- * After a file in +ARGV+ has been read ARGF removes it from the Array.
- * Thus, after all files have been read +ARGV+ will be empty.
+ * - \File +t.rb+:
*
- * You can manipulate +ARGV+ yourself to control what ARGF operates on. If
- * you remove a file from +ARGV+, it is ignored by ARGF; if you add files to
- * +ARGV+, they are treated as if they were named on the command line. For
- * example:
+ * # Print arguments (and options, if any) found on command line.
+ * p ['ARGV', ARGV]
*
- * ARGV.replace ["file1"]
- * ARGF.readlines # Returns the contents of file1 as an Array
- * ARGV #=> []
- * ARGV.replace ["file2", "file3"]
- * ARGF.read # Returns the contents of file2 and file3
+ * - Command and output:
*
- * If +ARGV+ is empty, ARGF acts as if it contained <tt>"-"</tt> that
- * makes ARGF read from STDIN, i.e. the data piped or typed to your
- * script. For example:
+ * $ ruby t.rb --xyzzy --mojo foo.txt bar.txt
+ * ["ARGV", ["--xyzzy", "--mojo", "foo.txt", "bar.txt"]]
*
- * $ echo "glark" | ruby -e 'p ARGF.read'
- * "glark\n"
+ * \ARGF's stream access considers the elements of +ARGV+, left to right:
+ *
+ * - \File +t.rb+:
+ *
+ * p "ARGV: #{ARGV}"
+ * p "Read: #{ARGF.read}" # Read everything from all specified streams.
+ *
+ * - Command and output:
+ *
+ * $ ruby t.rb foo.txt bar.txt
+ * "ARGV: [\"foo.txt\", \"bar.txt\"]"
+ * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"
+ *
+ * Because the value at +ARGV+ is an ordinary array,
+ * you can manipulate it to control which sources \ARGF considers:
+ *
+ * - If you remove an element from +ARGV+, \ARGF will not consider the corresponding source.
+ * - If you add an element to +ARGV+, \ARGF will consider the corresponding source.
+ *
+ * Each element in +ARGV+ is removed when its corresponding source is accessed;
+ * when all sources have been accessed, the array is empty:
+ *
+ * - \File +t.rb+:
+ *
+ * until ARGV.empty? && ARGF.eof?
+ * p "ARGV: #{ARGV}"
+ * p "Line: #{ARGF.readline}" # Read each line from each specified stream.
+ * end
+ *
+ * - Command and output:
+ *
+ * $ ruby t.rb foo.txt bar.txt
+ * "ARGV: [\"foo.txt\", \"bar.txt\"]"
+ * "Line: Foo 0\n"
+ * "ARGV: [\"bar.txt\"]"
+ * "Line: Foo 1\n"
+ * "ARGV: [\"bar.txt\"]"
+ * "Line: Bar 0\n"
+ * "ARGV: []"
+ * "Line: Bar 1\n"
+ * "ARGV: []"
+ * "Line: Bar 2\n"
+ * "ARGV: []"
+ * "Line: Bar 3\n"
+ *
+ * ==== Filepaths in +ARGV+
+ *
+ * The +ARGV+ array may contain filepaths the specify sources for \ARGF reading.
+ *
+ * This program prints what it reads from files at the paths specified
+ * on the command line:
+ *
+ * - \File +t.rb+:
+ *
+ * p ['ARGV', ARGV]
+ * # Read and print all content from the specified sources.
+ * p ['ARGF.read', ARGF.read]
+ *
+ * - Command and output:
+ *
+ * $ ruby t.rb foo.txt bar.txt
+ * ["ARGV", [foo.txt, bar.txt]
+ * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"]
+ *
+ * ==== Specifying <tt>$stdin</tt> in +ARGV+
+ *
+ * To specify stream <tt>$stdin</tt> in +ARGV+, us the character <tt>'-'</tt>:
+ *
+ * - \File +t.rb+:
+ *
+ * p ['ARGV', ARGV]
+ * p ['ARGF.read', ARGF.read]
+ *
+ * - Command and output:
+ *
+ * $ echo "Open the pod bay doors, Hal." | ruby t.rb -
+ * ["ARGV", ["-"]]
+ * ["ARGF.read", "Open the pod bay doors, Hal.\n"]
+ *
+ * When no character <tt>'-'</tt> is given, stream <tt>$stdin</tt> is ignored.
+ *
+ * - Command and output:
+ *
+ * $ echo "Open the pod bay doors, Hal." | ruby t.rb foo.txt bar.txt
+ * "ARGV: [\"foo.txt\", \"bar.txt\"]"
+ * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"
+ *
+ * ==== Mixtures and Repetitions in +ARGV+
+ *
+ * For an \ARGF reader, +ARGV+ may contain any mixture of filepaths
+ * and character <tt>'-'</tt>, including repetitions.
+ *
+ * ==== Modifications to +ARGV+
+ *
+ * The running Ruby program may make any modifications to the +ARGV+ array;
+ * the current value of +ARGV+ affects \ARGF reading.
+ *
+ * ==== Empty +ARGV+
+ *
+ * For an empty +ARGV+, an \ARGF read method either returns +nil+
+ * or raises an exception, depending on the specific method.
+ *
+ * === More Read Methods
+ *
+ * As seen above, method ARGF#read reads the content of all sources
+ * into a single string.
+ * Other \ARGF methods provide other ways to access that content;
+ * these include:
+ *
+ * - Byte access: #each_byte, #getbyte, #readbyte.
+ * - Character access: #each_char, #getc, #readchar.
+ * - Codepoint access: #each_codepoint.
+ * - Line access: #each_line, #gets, #readline, #readlines.
+ * - Source access: #read, #read_nonblock, #readpartial.
+ *
+ * === About \Enumerable
+ *
+ * \ARGF includes module Enumerable.
+ * Virtually all methods in \Enumerable call method <tt>#each</tt> in the including class.
+ *
+ * <b>Note well</b>: In \ARGF, method #each returns data from the _sources_,
+ * _not_ from +ARGV+;
+ * therefore, for example, <tt>ARGF#entries</tt> returns an array of lines from the sources,
+ * not an array of the strings from +ARGV+:
+ *
+ * - \File +t.rb+:
+ *
+ * p ['ARGV', ARGV]
+ * p ['ARGF.entries', ARGF.entries]
+ *
+ * - Command and output:
+ *
+ * $ ruby t.rb foo.txt bar.txt
+ * ["ARGV", ["foo.txt", "bar.txt"]]
+ * ["ARGF.entries", ["Foo 0\n", "Foo 1\n", "Bar 0\n", "Bar 1\n", "Bar 2\n", "Bar 3\n"]]
+ *
+ * == Writing
+ *
+ * If <i>inplace mode</i> is in effect,
+ * \ARGF may write to target streams,
+ * which at any particular time are determined by the content of ARGV.
+ *
+ * Methods about inplace mode:
+ *
+ * - #inplace_mode
+ * - #inplace_mode=
+ * - #to_write_io
+ *
+ * Methods for writing:
+ *
+ * - #print
+ * - #printf
+ * - #putc
+ * - #puts
+ * - #write
*
- * $ echo Glark > file1
- * $ echo "glark" | ruby -e 'p ARGF.read' -- - file1
- * "glark\nGlark\n"
*/
/*
* An instance of class \IO (commonly called a _stream_)
* represents an input/output stream in the underlying operating system.
- * \Class \IO is the basis for input and output in Ruby.
+ * Class \IO is the basis for input and output in Ruby.
*
- * \Class File is the only class in the Ruby core that is a subclass of \IO.
+ * Class File is the only class in the Ruby core that is a subclass of \IO.
* Some classes in the Ruby standard library are also subclasses of \IO;
* these include TCPSocket and UDPSocket.
*
@@ -14727,7 +15098,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* found in ARGV (or found in STDIN if ARGV is empty).
* ARGF is not itself a subclass of \IO.
*
- * \Class StringIO provides an IO-like stream that handles a String.
+ * Class StringIO provides an IO-like stream that handles a String.
* StringIO is not itself a subclass of \IO.
*
* Important objects based on \IO include:
@@ -14749,7 +15120,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* Like a File stream, an \IO stream has:
*
* - A read/write mode, which may be read-only, write-only, or read/write;
- * see {Read/Write Mode}[rdoc-ref:File@Read-2FWrite+Mode].
+ * see {Read/Write Mode}[rdoc-ref:File@ReadWrite+Mode].
* - A data mode, which may be text-only or binary;
* see {Data Mode}[rdoc-ref:File@Data+Mode].
* - Internal and external encodings;
@@ -14793,7 +15164,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* - +:binmode+: If a truthy value, specifies the mode as binary, text-only otherwise.
* - +:autoclose+: If a truthy value, specifies that the +fd+ will close
* when the stream closes; otherwise it remains open.
- * - +:path:+ If a string value is provided, it is used in #inspect and is available as
+ * - +:path+: If a string value is provided, it is used in #inspect and is available as
* #path method.
*
* Also available are the options offered in String#encode,
@@ -14815,6 +15186,9 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* A new stream has position zero (and line number zero);
* method +rewind+ resets the position (and line number) to zero.
*
+ * These methods discard {buffers}[rdoc-ref:IO@Buffering] and the
+ * Encoding::Converter instances used for that \IO.
+ *
* The relevant methods:
*
* - IO#tell (aliased as +#pos+): Returns the current position (in bytes) in the stream.
@@ -14862,56 +15236,64 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
*
* == Line \IO
*
- * You can read an \IO stream line-by-line using these methods:
+ * Class \IO supports line-oriented
+ * {input}[rdoc-ref:IO@Line+Input] and {output}[rdoc-ref:IO@Line+Output]
*
- * - IO#each_line: Reads each remaining line, passing it to the given block.
- * - IO#gets: Returns the next line.
- * - IO#readline: Like #gets, but raises an exception at end-of-stream.
- * - IO#readlines: Returns all remaining lines in an array.
+ * === Line Input
+ *
+ * Class \IO supports line-oriented input for
+ * {files}[rdoc-ref:IO@File+Line+Input] and {IO streams}[rdoc-ref:IO@Stream+Line+Input]
+ *
+ * ==== \File Line Input
*
- * Each of these reader methods accepts:
+ * You can read lines from a file using these methods:
*
- * - An optional line separator, +sep+;
+ * - IO.foreach: Reads each line and passes it to the given block.
+ * - IO.readlines: Reads and returns all lines in an array.
+ *
+ * For each of these methods:
+ *
+ * - You can specify {open options}[rdoc-ref:IO@Open+Options].
+ * - Line parsing depends on the effective <i>line separator</i>;
* see {Line Separator}[rdoc-ref:IO@Line+Separator].
- * - An optional line-size limit, +limit+;
+ * - The length of each returned line depends on the effective <i>line limit</i>;
* see {Line Limit}[rdoc-ref:IO@Line+Limit].
*
- * For each of these reader methods, reading may begin mid-line,
- * depending on the stream's position;
- * see {Position}[rdoc-ref:IO@Position]:
+ * ==== Stream Line Input
*
- * f = File.new('t.txt')
- * f.pos = 27
- * f.each_line {|line| p line }
- * f.close
+ * You can read lines from an \IO stream using these methods:
*
- * Output:
- *
- * "rth line\n"
- * "Fifth line\n"
+ * - IO#each_line: Reads each remaining line, passing it to the given block.
+ * - IO#gets: Returns the next line.
+ * - IO#readline: Like #gets, but raises an exception at end-of-stream.
+ * - IO#readlines: Returns all remaining lines in an array.
*
- * You can write to an \IO stream line-by-line using this method:
+ * For each of these methods:
*
- * - IO#puts: Writes objects to the stream.
+ * - Reading may begin mid-line,
+ * depending on the stream's _position_;
+ * see {Position}[rdoc-ref:IO@Position].
+ * - Line parsing depends on the effective <i>line separator</i>;
+ * see {Line Separator}[rdoc-ref:IO@Line+Separator].
+ * - The length of each returned line depends on the effective <i>line limit</i>;
+ * see {Line Limit}[rdoc-ref:IO@Line+Limit].
*
- * === Line Separator
+ * ===== Line Separator
*
- * Each of these methods uses a <i>line separator</i>,
- * which is the string that delimits lines:
+ * Each of the {line input methods}[rdoc-ref:IO@Line+Input] uses a <i>line separator</i>:
+ * the string that determines what is considered a line;
+ * it is sometimes called the <i>input record separator</i>.
*
- * - IO.foreach.
- * - IO.readlines.
- * - IO#each_line.
- * - IO#gets.
- * - IO#readline.
- * - IO#readlines.
+ * The default line separator is taken from global variable <tt>$/</tt>,
+ * whose initial value is <tt>"\n"</tt>.
*
- * The default line separator is the given by the global variable <tt>$/</tt>,
- * whose value is by default <tt>"\n"</tt>.
- * The line to be read next is all data from the current position
- * to the next line separator:
+ * Generally, the line to be read next is all data
+ * from the current {position}[rdoc-ref:IO@Position]
+ * to the next line separator
+ * (but see {Special Line Separator Values}[rdoc-ref:IO@Special+Line+Separator+Values]):
*
* f = File.new('t.txt')
+ * # Method gets with no sep argument returns the next line, according to $/.
* f.gets # => "First line\n"
* f.gets # => "Second line\n"
* f.gets # => "\n"
@@ -14919,7 +15301,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* f.gets # => "Fifth line\n"
* f.close
*
- * You can specify a different line separator:
+ * You can use a different line separator by passing argument +sep+:
*
* f = File.new('t.txt')
* f.gets('l') # => "First l"
@@ -14928,15 +15310,27 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* f.gets # => "e\n"
* f.close
*
- * There are two special line separators:
+ * Or by setting global variable <tt>$/</tt>:
+ *
+ * f = File.new('t.txt')
+ * $/ = 'l'
+ * f.gets # => "First l"
+ * f.gets # => "ine\nSecond l"
+ * f.gets # => "ine\n\nFourth l"
+ * f.close
+ *
+ * ===== Special Line Separator Values
*
- * - +nil+: The entire stream is read into a single string:
+ * Each of the {line input methods}[rdoc-ref:IO@Line+Input]
+ * accepts two special values for parameter +sep+:
+ *
+ * - +nil+: The entire stream is to be read ("slurped") into a single string:
*
* f = File.new('t.txt')
* f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n"
* f.close
*
- * - <tt>''</tt> (the empty string): The next "paragraph" is read
+ * - <tt>''</tt> (the empty string): The next "paragraph" is to be read
* (paragraphs being separated by two consecutive line separators):
*
* f = File.new('t.txt')
@@ -14944,23 +15338,18 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* f.gets('') # => "Fourth line\nFifth line\n"
* f.close
*
- * === Line Limit
- *
- * Each of these methods uses a <i>line limit</i>,
- * which specifies that the number of bytes returned may not be (much) longer
- * than the given +limit+;
+ * ===== Line Limit
*
- * - IO.foreach.
- * - IO.readlines.
- * - IO#each_line.
- * - IO#gets.
- * - IO#readline.
- * - IO#readlines.
+ * Each of the {line input methods}[rdoc-ref:IO@Line+Input]
+ * uses an integer <i>line limit</i>,
+ * which restricts the number of bytes that may be returned.
+ * (A multi-byte character will not be split, and so a returned line may be slightly longer
+ * than the limit).
*
- * A multi-byte character will not be split, and so a line may be slightly longer
- * than the given limit.
+ * The default limit value is <tt>-1</tt>;
+ * any negative limit value means that there is no limit.
*
- * If +limit+ is not given, the line is determined only by +sep+.
+ * If there is no limit, the line is determined only by +sep+.
*
* # Text with 1-byte characters.
* File.open('t.txt') {|f| f.gets(1) } # => "F"
@@ -14972,30 +15361,29 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* File.open('t.txt') {|f| f.gets(11) } # => "First line\n"
* File.open('t.txt') {|f| f.gets(12) } # => "First line\n"
*
- * # Text with 2-byte characters, which will not be split.
- * File.open('t.rus') {|f| f.gets(1).size } # => 1
- * File.open('t.rus') {|f| f.gets(2).size } # => 1
- * File.open('t.rus') {|f| f.gets(3).size } # => 2
- * File.open('t.rus') {|f| f.gets(4).size } # => 2
+ * # Text with 3-byte characters, which will not be split.
+ * File.read('t.ja') # => "こんにちは"
+ * File.open('t.ja') {|f| f.gets(1).size } # => 1
+ * File.open('t.ja') {|f| f.gets(2).size } # => 1
+ * File.open('t.ja') {|f| f.gets(3).size } # => 1
+ * File.open('t.ja') {|f| f.gets(4).size } # => 2
+ * File.open('t.ja') {|f| f.gets(5).size } # => 2
*
- * === Line Separator and Line Limit
+ * ===== Line Separator and Line Limit
*
- * With arguments +sep+ and +limit+ given,
- * combines the two behaviors:
+ * With arguments +sep+ and +limit+ given, combines the two behaviors:
*
* - Returns the next line as determined by line separator +sep+.
- * - But returns no more bytes than are allowed by the limit.
+ * - But returns no more bytes than are allowed by the limit +limit+.
*
* Example:
*
* File.open('t.txt') {|f| f.gets('li', 20) } # => "First li"
* File.open('t.txt') {|f| f.gets('li', 2) } # => "Fi"
*
- * === Line Number
+ * ===== Line Number
*
- * A readable \IO stream has a non-negative integer <i>line number</i>.
- *
- * The relevant methods:
+ * A readable \IO stream has a non-negative integer <i>line number</i>:
*
* - IO#lineno: Returns the line number.
* - IO#lineno=: Resets and returns the line number.
@@ -15003,7 +15391,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* Unless modified by a call to method IO#lineno=,
* the line number is the number of lines read
* by certain line-oriented methods,
- * according to the given line separator +sep+:
+ * according to the effective {line separator}[rdoc-ref:IO@Line+Separator]:
*
* - IO.foreach: Increments the line number on each call to the block.
* - IO#each_line: Increments the line number on each call to the block.
@@ -15093,6 +15481,12 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* $. # => 5
* f.close
*
+ * === Line Output
+ *
+ * You can write to an \IO stream line-by-line using this method:
+ *
+ * - IO#puts: Writes objects to the stream.
+ *
* == Character \IO
*
* You can process an \IO stream character-by-character using these methods:
@@ -15103,6 +15497,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* - IO#putc: Writes a character to the stream.
* - IO#each_char: Reads each remaining character in the stream,
* passing the character to the given block.
+ *
* == Byte \IO
*
* You can process an \IO stream byte-by-byte using these methods:
@@ -15121,10 +15516,10 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
*
* == What's Here
*
- * First, what's elsewhere. \Class \IO:
+ * First, what's elsewhere. Class \IO:
*
- * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here].
- * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here],
+ * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here].
+ * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here],
* which provides dozens of additional methods.
*
* Here, class \IO provides methods that are useful for:
@@ -15388,18 +15783,21 @@ Init_IO(void)
rb_define_method(rb_cIO, "initialize", rb_io_initialize, -1);
rb_output_fs = Qnil;
- rb_define_hooked_variable("$,", &rb_output_fs, 0, deprecated_str_setter);
+ rb_define_hooked_variable("$,", &rb_output_fs, 0, rb_deprecated_str_setter);
rb_default_rs = rb_fstring_lit("\n"); /* avoid modifying RS_default */
- rb_gc_register_mark_object(rb_default_rs);
+ rb_vm_register_global_object(rb_default_rs);
rb_rs = rb_default_rs;
rb_output_rs = Qnil;
- rb_define_hooked_variable("$/", &rb_rs, 0, deprecated_str_setter);
- rb_define_hooked_variable("$-0", &rb_rs, 0, deprecated_str_setter);
- rb_define_hooked_variable("$\\", &rb_output_rs, 0, deprecated_str_setter);
+ rb_define_hooked_variable("$/", &rb_rs, 0, deprecated_rs_setter);
+ rb_gvar_ractor_local("$/"); // not local but ractor safe
+ rb_define_hooked_variable("$-0", &rb_rs, 0, deprecated_rs_setter);
+ rb_gvar_ractor_local("$-0"); // not local but ractor safe
+ rb_define_hooked_variable("$\\", &rb_output_rs, 0, rb_deprecated_str_setter);
rb_define_virtual_variable("$_", get_LAST_READ_LINE, set_LAST_READ_LINE);
rb_gvar_ractor_local("$_");
+ rb_gvar_box_dynamic("$_");
rb_define_method(rb_cIO, "initialize_copy", rb_io_init_copy, 1);
rb_define_method(rb_cIO, "reopen", rb_io_reopen, -1);
@@ -15585,7 +15983,7 @@ Init_IO(void)
rb_define_method(rb_cARGF, "binmode", argf_binmode_m, 0);
rb_define_method(rb_cARGF, "binmode?", argf_binmode_p, 0);
- rb_define_method(rb_cARGF, "write", argf_write, 1);
+ rb_define_method(rb_cARGF, "write", argf_write, -1);
rb_define_method(rb_cARGF, "print", rb_io_print, -1);
rb_define_method(rb_cARGF, "putc", rb_io_putc, 1);
rb_define_method(rb_cARGF, "puts", rb_io_puts, -1);
@@ -15621,7 +16019,7 @@ Init_IO(void)
rb_define_hooked_variable("$.", &argf, argf_lineno_getter, argf_lineno_setter);
rb_define_hooked_variable("$FILENAME", &argf, argf_filename_getter, rb_gvar_readonly_setter);
- ARGF.filename = rb_str_new2("-");
+ ARGF_SET(filename, rb_str_new2("-"));
rb_define_hooked_variable("$-i", &argf, opt_i_get, opt_i_set);
rb_gvar_ractor_local("$-i");