diff options
Diffstat (limited to 'io.c')
-rw-r--r-- | io.c | 998 |
1 files changed, 591 insertions, 407 deletions
@@ -112,6 +112,7 @@ #include "encindex.h" #include "id.h" #include "internal.h" +#include "internal/class.h" #include "internal/encoding.h" #include "internal/error.h" #include "internal/inits.h" @@ -170,6 +171,7 @@ off_t __syscall(quad_t number, ...); #define open rb_w32_uopen #undef rename #define rename(f, t) rb_w32_urename((f), (t)) +#include "win32/file.h" #endif VALUE rb_cIO; @@ -532,10 +534,12 @@ static rb_io_t *flush_before_seek(rb_io_t *fptr); extern ID ruby_static_id_signo; -NORETURN(static void raise_on_write(rb_io_t *fptr, int e, VALUE errinfo)); +NORETURN(static void rb_sys_fail_on_write(rb_io_t *fptr)); static void -raise_on_write(rb_io_t *fptr, int e, VALUE errinfo) +rb_sys_fail_on_write(rb_io_t *fptr) { + int e = errno; + VALUE errinfo = rb_syserr_new_path(e, (fptr)->pathv); #if defined EPIPE if (fptr_signal_on_epipe(fptr) && (e == EPIPE)) { const VALUE sig = @@ -549,12 +553,6 @@ raise_on_write(rb_io_t *fptr, int e, VALUE errinfo) rb_exc_raise(errinfo); } -#define rb_sys_fail_on_write(fptr) \ - do { \ - int e = errno; \ - raise_on_write(fptr, e, rb_syserr_new_path(e, (fptr)->pathv)); \ - } while (0) - #define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE) #define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) ((fptr)->mode & FMODE_TEXTMODE) #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) @@ -846,7 +844,7 @@ rb_io_timeout(VALUE self) * timeout = duration -> duration * timeout = nil -> nil * - * \Set the internal timeout to the specified duration or nil. The timeout + * Sets the internal timeout to the specified duration or nil. The timeout * applies to all blocking operations where possible. * * When the operation performs longer than the timeout set, IO::TimeoutError @@ -1063,20 +1061,24 @@ rb_gc_for_fd(int err) return 0; } +/* try `expr` upto twice while it returns false and `errno` + * is to GC. Each `errno`s are available as `first_errno` and + * `retried_errno` respectively */ +#define TRY_WITH_GC(expr) \ + for (int first_errno, retried_errno = 0, retried = 0; \ + (!retried && \ + !(expr) && \ + (!rb_gc_for_fd(first_errno = errno) || !(expr)) && \ + (retried_errno = errno, 1)); \ + (void)retried_errno, retried = 1) + static int ruby_dup(int orig) { - int fd; + int fd = -1; - fd = rb_cloexec_dup(orig); - if (fd < 0) { - int e = errno; - if (rb_gc_for_fd(e)) { - fd = rb_cloexec_dup(orig); - } - if (fd < 0) { - rb_syserr_fail(e, 0); - } + TRY_WITH_GC((fd = rb_cloexec_dup(orig)) >= 0) { + rb_syserr_fail(first_errno, 0); } rb_update_max_fd(fd); return fd; @@ -1141,6 +1143,11 @@ static int nogvl_wait_for(VALUE th, rb_io_t *fptr, short events, struct timeval static inline int io_internal_wait(VALUE thread, rb_io_t *fptr, int error, int events, struct timeval *timeout) { + if (!timeout && rb_thread_mn_schedulable(thread)) { + RUBY_ASSERT(errno == EWOULDBLOCK || errno == EAGAIN); + return -1; + } + int ready = nogvl_wait_for(thread, fptr, events, timeout); if (ready > 0) { @@ -1281,7 +1288,7 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_region(internal_read_func, &iis, fptr->fd); + return (ssize_t)rb_thread_io_blocking_call(internal_read_func, &iis, fptr->fd, RB_WAITFD_IN); } static ssize_t @@ -1314,7 +1321,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_region(internal_write_func, &iis, fptr->fd); + return (ssize_t)rb_thread_io_blocking_call(internal_write_func, &iis, fptr->fd, RB_WAITFD_OUT); } #ifdef HAVE_WRITEV @@ -1351,7 +1358,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) iis.timeout = &timeout_storage; } - return (ssize_t)rb_thread_io_blocking_region(internal_writev_func, &iis, fptr->fd); + return (ssize_t)rb_thread_io_blocking_call(internal_writev_func, &iis, fptr->fd, RB_WAITFD_OUT); } #endif @@ -1381,7 +1388,7 @@ static VALUE io_flush_buffer_async(VALUE arg) { rb_io_t *fptr = (rb_io_t *)arg; - return rb_thread_io_blocking_region(io_flush_buffer_sync, fptr, fptr->fd); + return rb_thread_io_blocking_call(io_flush_buffer_sync, fptr, fptr->fd, RB_WAITFD_OUT); } static inline int @@ -1694,7 +1701,6 @@ make_writeconv(rb_io_t *fptr) /* writing functions */ struct binwrite_arg { rb_io_t *fptr; - VALUE str; const char *ptr; long length; }; @@ -1790,13 +1796,11 @@ io_binwrite_string(VALUE arg) // Write as much as possible: ssize_t result = io_binwrite_string_internal(p->fptr, ptr, remaining); - // If only the internal buffer is written, result will be zero [bytes of given data written]. This means we - // should try again. if (result == 0) { - errno = EWOULDBLOCK; + // If only the internal buffer is written, result will be zero [bytes of given data written]. This means we + // should try again immediately. } - - if (result > 0) { + else if (result > 0) { if ((size_t)result == remaining) break; ptr += result; remaining -= result; @@ -1846,7 +1850,7 @@ io_binwrite_requires_flush_write(rb_io_t *fptr, long len, int nosync) } static long -io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync) +io_binwrite(const char *ptr, long len, rb_io_t *fptr, int nosync) { if (len <= 0) return len; @@ -1859,7 +1863,6 @@ io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync) struct binwrite_arg arg; arg.fptr = fptr; - arg.str = str; arg.ptr = ptr; arg.length = len; @@ -1967,9 +1970,9 @@ io_fwrite(VALUE str, rb_io_t *fptr, int nosync) if (converted) OBJ_FREEZE(str); - tmp = rb_str_tmp_frozen_acquire(str); + tmp = rb_str_tmp_frozen_no_embed_acquire(str); RSTRING_GETMEM(tmp, ptr, len); - n = io_binwrite(tmp, ptr, len, fptr, nosync); + n = io_binwrite(ptr, len, fptr, nosync); rb_str_tmp_frozen_release(str, tmp); return n; @@ -1982,7 +1985,7 @@ rb_io_bufwrite(VALUE io, const void *buf, size_t size) GetOpenFile(io, fptr); rb_io_check_writable(fptr); - return (ssize_t)io_binwrite(0, buf, (long)size, fptr, 0); + return (ssize_t)io_binwrite(buf, (long)size, fptr, 0); } static VALUE @@ -2274,7 +2277,7 @@ rb_io_writev(VALUE io, int argc, const VALUE *argv) if (argc > 1 && rb_obj_method_arity(io, id_write) == 1) { if (io != rb_ractor_stderr() && RTEST(ruby_verbose)) { VALUE klass = CLASS_OF(io); - char sep = FL_TEST(klass, FL_SINGLETON) ? (klass = io, '.') : '#'; + char sep = RCLASS_SINGLETON_P(klass) ? (klass = io, '.') : '#'; rb_category_warning( RB_WARN_CATEGORY_DEPRECATED, "%+"PRIsVALUE"%c""write is outdated interface" " which accepts just one argument", @@ -2668,7 +2671,7 @@ rb_io_eof(VALUE io) READ_CHECK(fptr); #if RUBY_CRLF_ENVIRONMENT if (!NEED_READCONV(fptr) && NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { - return RBOOL(eof(fptr->fd));; + return RBOOL(eof(fptr->fd)); } #endif return RBOOL(io_fillbuf(fptr) < 0); @@ -2865,11 +2868,19 @@ rb_io_descriptor(VALUE io) return fptr->fd; } else { - return RB_NUM2INT(rb_funcall(io, id_fileno, 0)); + VALUE fileno = rb_check_funcall(io, id_fileno, 0, NULL); + if (!UNDEF_P(fileno)) { + return RB_NUM2INT(fileno); + } } + + rb_raise(rb_eTypeError, "expected IO or #fileno, %"PRIsVALUE" given", rb_obj_class(io)); + + UNREACHABLE_RETURN(-1); } -int rb_io_mode(VALUE io) +int +rb_io_mode(VALUE io) { rb_io_t *fptr; GetOpenFile(io, fptr); @@ -3263,9 +3274,10 @@ io_setstrbuf(VALUE *str, long len) } else { VALUE s = StringValue(*str); + rb_str_modify(s); + long clen = RSTRING_LEN(s); if (clen >= len) { - rb_str_modify(s); return FALSE; } len -= clen; @@ -3396,7 +3408,12 @@ io_read_memory_call(VALUE arg) } } - return rb_thread_io_blocking_region(internal_read_func, iis, iis->fptr->fd); + if (iis->nonblock) { + return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, 0); + } + else { + return rb_thread_io_blocking_call(internal_read_func, iis, iis->fptr->fd, RB_WAITFD_IN); + } } static long @@ -4139,8 +4156,7 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr) s = RSTRING_PTR(str); e = RSTRING_END(str); p = e - rslen; - pp = rb_enc_left_char_head(s, p, e, enc); - if (pp != p) continue; + if (!at_char_boundary(s, p, e, enc)) continue; if (!rspara) rscheck(rsptr, rslen, rs); if (memcmp(p, rsptr, rslen) == 0) { if (chomp) { @@ -4281,11 +4297,8 @@ rb_io_gets_internal(VALUE io) * File.open('t.txt') {|f| f.gets(12) } # => "First line\n" * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Returns the next line as determined by line separator +sep+, - * or +nil+ if none. - * - But returns no more bytes than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -4352,22 +4365,28 @@ rb_io_set_lineno(VALUE io, VALUE lineno) return lineno; } -/* - * call-seq: - * readline(sep = $/, chomp: false) -> string - * readline(limit, chomp: false) -> string - * readline(sep, limit, chomp: false) -> string - * - * Reads a line as with IO#gets, but raises EOFError if already at end-of-stream. - * - * Optional keyword argument +chomp+ specifies whether line separators - * are to be omitted. - */ - +/* :nodoc: */ static VALUE -rb_io_readline(int argc, VALUE *argv, VALUE io) +io_readline(rb_execution_context_t *ec, VALUE io, VALUE sep, VALUE lim, VALUE chomp) { - VALUE line = rb_io_gets_m(argc, argv, io); + if (NIL_P(lim)) { + // If sep is specified, but it's not a string and not nil, then assume + // it's the limit (it should be an integer) + if (!NIL_P(sep) && NIL_P(rb_check_string_type(sep))) { + // If the user has specified a non-nil / non-string value + // for the separator, we assume it's the limit and set the + // separator to default: rb_rs. + lim = sep; + sep = rb_rs; + } + } + + if (!NIL_P(sep)) { + StringValue(sep); + } + + VALUE line = rb_io_getline_1(sep, NIL_P(lim) ? -1L : NUM2LONG(lim), RTEST(chomp), io); + rb_lastline_set_up(line, 1); if (NIL_P(line)) { rb_eof_error(); @@ -4428,10 +4447,8 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); * f.close * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Returns lines as determined by line separator +sep+. - * - But returns no more bytes in a line than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -4551,10 +4568,8 @@ io_readlines(const struct getline_arg *arg, VALUE io) * "ne\n" * * With arguments +sep+ and +limit+ given, - * combines the two behaviors: - * - * - Calls with the next line as determined by line separator +sep+. - * - But returns no more bytes than are allowed by the limit. + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -5092,7 +5107,7 @@ rb_io_ungetbyte(VALUE io, VALUE b) b = rb_str_new((const char *)&c, 1); break; default: - SafeStringValue(b); + StringValue(b); } io_ungetbyte(b, fptr); return Qnil; @@ -5154,7 +5169,7 @@ rb_io_ungetc(VALUE io, VALUE c) c = rb_enc_uint_chr(NUM2UINT(c), io_read_encoding(fptr)); } else { - SafeStringValue(c); + StringValue(c); } if (NEED_READCONV(fptr)) { SET_BINARY_MODE(fptr); @@ -5255,7 +5270,7 @@ rb_io_close_on_exec_p(VALUE io) * * Sets a close-on-exec flag. * - * f = open("/dev/null") + * f = File.open(File::NULL) * f.close_on_exec = true * system("cat", "/proc/self/fd/#{f.fileno}") # cat: /proc/self/fd/3: No such file or directory * f.closed? #=> false @@ -5400,7 +5415,7 @@ maygvl_close(int fd, int keepgvl) * close() may block for certain file types (NFS, SO_LINGER sockets, * inotify), so let other threads run. */ - return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_close, &fd, RUBY_UBF_IO, 0); + return IO_WITHOUT_GVL_INT(nogvl_close, &fd); } static void* @@ -5417,7 +5432,7 @@ maygvl_fclose(FILE *file, int keepgvl) if (keepgvl) return fclose(file); - return (int)(intptr_t)rb_thread_call_without_gvl(nogvl_fclose, file, RUBY_UBF_IO, 0); + return IO_WITHOUT_GVL_INT(nogvl_fclose, file); } static void free_io_buffer(rb_io_buffer_t *buf); @@ -5570,12 +5585,9 @@ clear_codeconv(rb_io_t *fptr) clear_writeconv(fptr); } -void -rb_io_fptr_finalize_internal(void *ptr) +static void +rb_io_fptr_cleanup_all(rb_io_t *fptr) { - rb_io_t *fptr = ptr; - - if (!ptr) return; fptr->pathv = Qnil; if (0 <= fptr->fd) rb_io_fptr_cleanup(fptr, TRUE); @@ -5583,7 +5595,14 @@ rb_io_fptr_finalize_internal(void *ptr) free_io_buffer(&fptr->rbuf); free_io_buffer(&fptr->wbuf); clear_codeconv(fptr); - free(fptr); +} + +void +rb_io_fptr_finalize_internal(void *ptr) +{ + if (!ptr) return; + rb_io_fptr_cleanup_all(ptr); + free(ptr); } #undef rb_io_fptr_finalize @@ -5600,7 +5619,7 @@ rb_io_fptr_finalize(rb_io_t *fptr) } #define rb_io_fptr_finalize(fptr) rb_io_fptr_finalize_internal(fptr) -RUBY_FUNC_EXPORTED size_t +size_t rb_io_memsize(const rb_io_t *fptr) { size_t size = sizeof(rb_io_t); @@ -6092,7 +6111,7 @@ pread_internal_call(VALUE _arg) } } - return rb_thread_io_blocking_region(internal_pread_func, arg, arg->fd); + return rb_thread_io_blocking_call(internal_pread_func, arg, arg->fd, RB_WAITFD_IN); } /* @@ -6225,7 +6244,7 @@ rb_io_pwrite(VALUE io, VALUE str, VALUE offset) arg.buf = RSTRING_PTR(tmp); arg.count = (size_t)RSTRING_LEN(tmp); - n = (ssize_t)rb_thread_io_blocking_region(internal_pwrite_func, &arg, fptr->fd); + n = (ssize_t)rb_thread_io_blocking_call(internal_pwrite_func, &arg, fptr->fd, RB_WAITFD_OUT); if (n < 0) rb_sys_fail_path(fptr->pathv); rb_str_tmp_frozen_release(str, tmp); @@ -6801,7 +6820,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, else { const char *p; - SafeStringValue(vmode); + StringValue(vmode); p = StringValueCStr(vmode); fmode = rb_io_modestr_fmode(p); oflags = rb_io_fmode_oflags(fmode); @@ -6934,8 +6953,7 @@ sysopen_func(void *ptr) static inline int rb_sysopen_internal(struct sysopen_struct *data) { - int fd; - fd = (int)(VALUE)rb_thread_call_without_gvl(sysopen_func, data, RUBY_UBF_IO, 0); + int fd = IO_WITHOUT_GVL_INT(sysopen_func, data); if (0 <= fd) rb_update_max_fd(fd); return fd; @@ -6944,7 +6962,7 @@ rb_sysopen_internal(struct sysopen_struct *data) static int rb_sysopen(VALUE fname, int oflags, mode_t perm) { - int fd; + int fd = -1; struct sysopen_struct data; data.fname = rb_str_encode_ospath(fname); @@ -6952,21 +6970,14 @@ rb_sysopen(VALUE fname, int oflags, mode_t perm) data.oflags = oflags; data.perm = perm; - fd = rb_sysopen_internal(&data); - if (fd < 0) { - int e = errno; - if (rb_gc_for_fd(e)) { - fd = rb_sysopen_internal(&data); - } - if (fd < 0) { - rb_syserr_fail_path(e, fname); - } + TRY_WITH_GC((fd = rb_sysopen_internal(&data)) >= 0) { + rb_syserr_fail_path(first_errno, fname); } return fd; } -FILE * -rb_fdopen(int fd, const char *modestr) +static inline FILE * +fdopen_internal(int fd, const char *modestr) { FILE *file; @@ -6975,26 +6986,22 @@ rb_fdopen(int fd, const char *modestr) #endif file = fdopen(fd, modestr); if (!file) { - int e = errno; -#if defined(__sun) - if (e == 0) { - rb_gc(); - errno = 0; - file = fdopen(fd, modestr); - } - else -#endif - if (rb_gc_for_fd(e)) { - file = fdopen(fd, modestr); - } - if (!file) { #ifdef _WIN32 - if (e == 0) e = EINVAL; + if (errno == 0) errno = EINVAL; #elif defined(__sun) - if (e == 0) e = EMFILE; + if (errno == 0) errno = EMFILE; #endif - rb_syserr_fail(e, 0); - } + } + return file; +} + +FILE * +rb_fdopen(int fd, const char *modestr) +{ + FILE *file = 0; + + TRY_WITH_GC((file = fdopen_internal(fd, modestr)) != 0) { + rb_syserr_fail(first_errno, 0); } /* xxx: should be _IONBF? A buffer in FILE may have trouble. */ @@ -7144,8 +7151,6 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) if (p) { parse_mode_enc(p+1, rb_usascii_encoding(), &convconfig.enc, &convconfig.enc2, &fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } else { rb_encoding *e; @@ -7153,10 +7158,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + return rb_file_open_generic(io, filename, rb_io_fmode_oflags(fmode), fmode, @@ -7285,12 +7299,7 @@ int rb_pipe(int *pipes) { int ret; - ret = rb_cloexec_pipe(pipes); - if (ret < 0) { - if (rb_gc_for_fd(errno)) { - ret = rb_cloexec_pipe(pipes); - } - } + TRY_WITH_GC((ret = rb_cloexec_pipe(pipes)) >= 0); if (ret == 0) { rb_update_max_fd(pipes[0]); rb_update_max_fd(pipes[1]); @@ -7926,7 +7935,7 @@ rb_io_popen(VALUE pname, VALUE pmode, VALUE env, VALUE opt) RB_GC_GUARD(tmp); } else { - SafeStringValue(pname); + StringValue(pname); execarg_obj = Qnil; if (!is_popen_fork(pname)) execarg_obj = rb_execarg_new(1, &pname, TRUE, FALSE); @@ -7963,6 +7972,60 @@ popen_finish(VALUE port, VALUE klass) return port; } +#if defined(HAVE_WORKING_FORK) && !defined(__EMSCRIPTEN__) +struct popen_writer_arg { + char *const *argv; + struct popen_arg popen; +}; + +static int +exec_popen_writer(void *arg, char *errmsg, size_t buflen) +{ + struct popen_writer_arg *pw = arg; + pw->popen.modef = FMODE_WRITABLE; + popen_redirect(&pw->popen); + execv(pw->argv[0], pw->argv); + strlcpy(errmsg, strerror(errno), buflen); + return -1; +} +#endif + +FILE * +ruby_popen_writer(char *const *argv, rb_pid_t *pid) +{ +#if (defined(HAVE_WORKING_FORK) && !defined(__EMSCRIPTEN__)) || defined(_WIN32) +# ifdef HAVE_WORKING_FORK + struct popen_writer_arg pw; + int *const write_pair = pw.popen.pair; +# else + int write_pair[2]; +# endif + + int result = rb_cloexec_pipe(write_pair); + *pid = -1; + if (result == 0) { +# ifdef HAVE_WORKING_FORK + pw.argv = argv; + int status; + char errmsg[80] = {'\0'}; + *pid = rb_fork_async_signal_safe(&status, exec_popen_writer, &pw, Qnil, errmsg, sizeof(errmsg)); +# else + *pid = rb_w32_uspawn_process(P_NOWAIT, argv[0], argv, write_pair[0], -1, -1, 0); + const char *errmsg = (*pid < 0) ? strerror(errno) : NULL; +# endif + close(write_pair[0]); + if (*pid < 0) { + close(write_pair[1]); + fprintf(stderr, "ruby_popen_writer(%s): %s\n", argv[0], errmsg); + } + else { + return fdopen(write_pair[1], "w"); + } + } +#endif + return NULL; +} + static void rb_scan_open_args(int argc, const VALUE *argv, VALUE *fname_p, int *oflags_p, int *fmode_p, @@ -8006,11 +8069,11 @@ rb_open_file(int argc, const VALUE *argv, VALUE io) * File.open(path, mode = 'r', perm = 0666, **opts) -> file * File.open(path, mode = 'r', perm = 0666, **opts) {|f| ... } -> object * - * Creates a new \File object, via File.new with the given arguments. + * Creates a new File object, via File.new with the given arguments. * - * With no block given, returns the \File object. + * With no block given, returns the File object. * - * With a block given, calls the block with the \File object + * With a block given, calls the block with the File object * and returns the block's value. * */ @@ -8077,7 +8140,7 @@ rb_io_s_sysopen(int argc, VALUE *argv, VALUE _) else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) oflags = NUM2INT(intmode); else { - SafeStringValue(vmode); + StringValue(vmode); oflags = rb_io_modestr_oflags(StringValueCStr(vmode)); } if (NIL_P(vperm)) perm = 0666; @@ -8108,20 +8171,10 @@ check_pipe_command(VALUE filename_or_command) * open(path, mode = 'r', perm = 0666, **opts) -> io or nil * open(path, mode = 'r', perm = 0666, **opts) {|io| ... } -> obj * - * Creates an IO object connected to the given stream, file, or subprocess. - * - * Required string argument +path+ determines which of the following occurs: + * Creates an IO object connected to the given file. * - * - The file at the specified +path+ is opened. - * - The process forks. - * - A subprocess is created. - * - * Each of these is detailed below. - * - * <b>File Opened</b> - - * If +path+ does _not_ start with a pipe character (<tt>'|'</tt>), - * a file stream is opened with <tt>File.open(path, mode, perm, **opts)</tt>. + * This method has potential security vulnerabilities if called with untrusted input; + * see {Command Injection}[rdoc-ref:command_injection.rdoc]. * * With no block given, file stream is returned: * @@ -8138,67 +8191,6 @@ check_pipe_command(VALUE filename_or_command) * * See File.open for details. * - * <b>Process Forked</b> - * - * If +path+ is the 2-character string <tt>'|-'</tt>, the process forks - * and the child process is connected to the parent. - * - * With no block given: - * - * io = open('|-') - * if io - * $stderr.puts "In parent, child pid is #{io.pid}." - * else - * $stderr.puts "In child, pid is #{$$}." - * end - * - * Output: - * - * In parent, child pid is 27903. - * In child, pid is 27903. - * - * With a block given: - * - * open('|-') do |io| - * if io - * $stderr.puts "In parent, child pid is #{io.pid}." - * else - * $stderr.puts "In child, pid is #{$$}." - * end - * end - * - * Output: - * - * In parent, child pid is 28427. - * In child, pid is 28427. - * - * <b>Subprocess Created</b> - * - * If +path+ is <tt>'|command'</tt> (<tt>'command' != '-'</tt>), - * a new subprocess runs the command; its open stream is returned. - * Note that the command may be processed by shell if it contains - * shell metacharacters. - * - * With no block given: - * - * io = open('|echo "Hi!"') # => #<IO:fd 12> - * print io.gets - * io.close - * - * Output: - * - * "Hi!" - * - * With a block given, calls the block with the stream, then closes the stream: - * - * open('|echo "Hi!"') do |io| - * print io.gets - * end - * - * Output: - * - * "Hi!" - * */ static VALUE @@ -8221,6 +8213,8 @@ rb_f_open(int argc, VALUE *argv, VALUE _) else { VALUE cmd = check_pipe_command(tmp); if (!NIL_P(cmd)) { + // TODO: when removed in 4.0, update command_injection.rdoc + rb_warn_deprecated_to_remove_at(4.0, "Calling Kernel#open with a leading '|'", "IO.popen"); argv[0] = cmd; return rb_io_s_popen(argc, argv, rb_cIO); } @@ -8258,6 +8252,8 @@ rb_io_open_generic(VALUE klass, VALUE filename, int oflags, int fmode, { VALUE cmd; if (klass == rb_cIO && !NIL_P(cmd = check_pipe_command(filename))) { + // TODO: when removed in 4.0, update command_injection.rdoc + rb_warn_deprecated_to_remove_at(4.0, "IO process creation with a leading '|'", "IO.popen"); return pipe_open_s(cmd, rb_io_oflags_modestr(oflags), fmode, convconfig); } else { @@ -8604,7 +8600,7 @@ deprecated_str_setter(VALUE val, ID id, VALUE *var) { rb_str_setter(val, id, &val); if (!NIL_P(val)) { - rb_warn_deprecated("`%s'", NULL, rb_id2name(id)); + rb_warn_deprecated("'%s'", NULL, rb_id2name(id)); } *var = val; } @@ -8865,9 +8861,9 @@ io_puts_ary(VALUE ary, VALUE out, int recur) * * Treatment for each object: * - * - \String: writes the string. + * - String: writes the string. * - Neither string nor array: writes <tt>object.to_s</tt>. - * - \Array: writes each element of the array; arrays may be nested. + * - Array: writes each element of the array; arrays may be nested. * * To keep these examples brief, we define this helper method: * @@ -9023,6 +9019,10 @@ rb_p_result(int argc, const VALUE *argv) * 0..4 * [0..4, 0..4, 0..4] * + * Kernel#p is designed for debugging purposes. + * Ruby implementations may define Kernel#p to be uninterruptible + * in whole or in part. + * On CRuby, Kernel#p's writing of data is uninterruptible. */ static VALUE @@ -9233,11 +9233,27 @@ static VALUE prep_io(int fd, int fmode, VALUE klass, const char *path) { VALUE path_value = Qnil; + rb_encoding *e; + struct rb_io_encoding convconfig; + if (path) { path_value = rb_obj_freeze(rb_str_new_cstr(path)); } - VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL); + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + + VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig); rb_io_t*io = RFILE(self)->fptr; if (!io_check_tty(io)) { @@ -9526,9 +9542,9 @@ rb_io_set_encoding_by_bom(VALUE io) * File.new(path, mode = 'r', perm = 0666, **opts) -> file * * Opens the file at the given +path+ according to the given +mode+; - * creates and returns a new \File object for that file. + * creates and returns a new File object for that file. * - * The new \File object is buffered mode (or non-sync mode), unless + * The new File object is buffered mode (or non-sync mode), unless * +filename+ is a tty. * See IO#flush, IO#fsync, IO#fdatasync, and IO#sync=. * @@ -9633,11 +9649,11 @@ rb_io_autoclose_p(VALUE io) * * Sets auto-close flag. * - * f = open("/dev/null") + * f = File.open(File::NULL) * IO.for_fd(f.fileno).close * f.gets # raises Errno::EBADF * - * f = open("/dev/null") + * f = File.open(File::NULL) * g = IO.for_fd(f.fileno) * g.autoclose = false * g.close @@ -10445,8 +10461,9 @@ static VALUE argf_readlines(int, VALUE *, VALUE); * $cat t.txt | ruby -e "p readlines 12" * ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] * - * With arguments +sep+ and +limit+ given, combines the two behaviors; - * see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]. + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword argument +chomp+ specifies whether line separators * are to be omitted: @@ -10470,19 +10487,21 @@ rb_f_readlines(int argc, VALUE *argv, VALUE recv) /* * call-seq: - * ARGF.readlines(sep = $/) -> array - * ARGF.readlines(limit) -> array - * ARGF.readlines(sep, limit) -> array + * ARGF.readlines(sep = $/, chomp: false) -> array + * ARGF.readlines(limit, chomp: false) -> array + * ARGF.readlines(sep, limit, chomp: false) -> array * - * ARGF.to_a(sep = $/) -> array - * ARGF.to_a(limit) -> array - * ARGF.to_a(sep, limit) -> array + * ARGF.to_a(sep = $/, chomp: false) -> array + * ARGF.to_a(limit, chomp: false) -> array + * ARGF.to_a(sep, limit, chomp: false) -> array * * Reads each file in ARGF in its entirety, returning an Array containing * lines from the files. Lines are assumed to be separated by _sep_. * * lines = ARGF.readlines * lines[0] #=> "This is line one\n" + * + * See +IO.readlines+ for a full description of all options. */ static VALUE argf_readlines(int argc, VALUE *argv, VALUE argf) @@ -10536,17 +10555,15 @@ rb_f_backquote(VALUE obj, VALUE str) VALUE result; rb_io_t *fptr; - SafeStringValue(str); + StringValue(str); rb_last_status_clear(); port = pipe_open_s(str, "r", FMODE_READABLE|DEFAULT_TEXTMODE, NULL); if (NIL_P(port)) return rb_str_new(0,0); GetOpenFile(port, fptr); - rb_obj_hide(port); result = read_all(fptr, remain_size(fptr), Qnil); rb_io_close(port); - RFILE(port)->fptr = NULL; - rb_io_fptr_finalize(fptr); + rb_io_fptr_cleanup_all(fptr); RB_GC_GUARD(port); return result; @@ -11447,7 +11464,7 @@ rb_fcntl(VALUE io, VALUE req, VALUE arg) * a file-oriented I/O stream. Arguments and results are platform * dependent. * - * If +argument is a number, its value is passed directly; + * If +argument+ is a number, its value is passed directly; * if it is a string, it is interpreted as a binary sequence of bytes. * (Array#pack might be a useful way to build this string.) * @@ -11544,7 +11561,7 @@ rb_f_syscall(int argc, VALUE *argv, VALUE _) VALUE v = rb_check_string_type(argv[i]); if (!NIL_P(v)) { - SafeStringValue(v); + StringValue(v); rb_str_modify(v); arg[i] = (VALUE)StringValueCStr(v); } @@ -11913,9 +11930,6 @@ io_s_foreach(VALUE v) * IO.foreach(path, sep = $/, **opts) {|line| block } -> nil * IO.foreach(path, limit, **opts) {|line| block } -> nil * IO.foreach(path, sep, limit, **opts) {|line| block } -> nil - * IO.foreach(command, sep = $/, **opts) {|line| block } -> nil - * IO.foreach(command, limit, **opts) {|line| block } -> nil - * IO.foreach(command, sep, limit, **opts) {|line| block } -> nil * IO.foreach(...) -> an_enumerator * * Calls the block with each successive line read from the stream. @@ -11924,16 +11938,7 @@ io_s_foreach(VALUE v) * this method has potential security vulnerabilities if called with untrusted input; * see {Command Injection}[rdoc-ref:command_injection.rdoc]. * - * The first argument must be a string that is one of the following: - * - * - Path: if +self+ is a subclass of \IO (\File, for example), - * or if the string _does_ _not_ start with the pipe character (<tt>'|'</tt>), - * the string is the path to a file. - * - Command: if +self+ is the class \IO, - * and if the string starts with the pipe character, - * the rest of the string is a command to be executed as a subprocess. - * This usage has potential security vulnerabilities if called with untrusted input; - * see {Command Injection}[rdoc-ref:command_injection.rdoc]. + * The first argument must be a string that is the path to a file. * * With only argument +path+ given, parses lines from the file at the given +path+, * as determined by the default line separator, @@ -11969,7 +11974,7 @@ io_s_foreach(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]): * * File.foreach('t.txt', 7) {|line| p line } * @@ -11985,16 +11990,15 @@ io_s_foreach(VALUE v) * "Fourth l" * "line\n" * - * With arguments +sep+ and +limit+ given, - * parses lines as determined by the given - * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword arguments +opts+ specify: * * - {Open Options}[rdoc-ref:IO@Open+Options]. * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. - * - {Line Options}[rdoc-ref:IO@Line+Options]. + * - {Line Options}[rdoc-ref:IO@Line+IO]. * * Returns an Enumerator if no block is given. * @@ -12027,9 +12031,6 @@ io_s_readlines(VALUE v) /* * call-seq: - * IO.readlines(command, sep = $/, **opts) -> array - * IO.readlines(command, limit, **opts) -> array - * IO.readlines(command, sep, limit, **opts) -> array * IO.readlines(path, sep = $/, **opts) -> array * IO.readlines(path, limit, **opts) -> array * IO.readlines(path, sep, limit, **opts) -> array @@ -12040,19 +12041,7 @@ io_s_readlines(VALUE v) * this method has potential security vulnerabilities if called with untrusted input; * see {Command Injection}[rdoc-ref:command_injection.rdoc]. * - * The first argument must be a string; - * its meaning depends on whether it starts with the pipe character (<tt>'|'</tt>): - * - * - If so (and if +self+ is \IO), - * the rest of the string is a command to be executed as a subprocess. - * - Otherwise, the string is the path to a file. - * - * With only argument +command+ given, executes the command in a shell, - * parses its $stdout into lines, as determined by the default line separator, - * and returns those lines in an array: - * - * IO.readlines('| cat t.txt') - * # => ["First line\n", "Second line\n", "\n", "Third line\n", "Fourth line\n"] + * The first argument must be a string that is the path to a file. * * With only argument +path+ given, parses lines from the file at the given +path+, * as determined by the default line separator, @@ -12061,8 +12050,6 @@ io_s_readlines(VALUE v) * IO.readlines('t.txt') * # => ["First line\n", "Second line\n", "\n", "Third line\n", "Fourth line\n"] * - * For both forms, command and path, the remaining arguments are the same. - * * With argument +sep+ given, parses lines as determined by that line separator * (see {Line Separator}[rdoc-ref:IO@Line+Separator]): * @@ -12078,21 +12065,20 @@ io_s_readlines(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator] and {Line Limit}[rdoc-ref:IO@Line+Limit]: * * IO.readlines('t.txt', 7) * # => ["First l", "ine\n", "Second ", "line\n", "\n", "Third l", "ine\n", "Fourth ", "line\n"] * - * With arguments +sep+ and +limit+ given, - * parses lines as determined by the given - * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): + * With arguments +sep+ and +limit+ given, + * combines the two behaviors + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]). * * Optional keyword arguments +opts+ specify: * * - {Open Options}[rdoc-ref:IO@Open+Options]. * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. - * - {Line Options}[rdoc-ref:IO@Line+Options]. + * - {Line Options}[rdoc-ref:IO@Line+IO]. * */ @@ -12135,7 +12121,6 @@ seek_before_access(VALUE argp) /* * call-seq: - * IO.read(command, length = nil, offset = 0, **opts) -> string or nil * IO.read(path, length = nil, offset = 0, **opts) -> string or nil * * Opens the stream, reads and returns some or all of its content, @@ -12145,18 +12130,7 @@ seek_before_access(VALUE argp) * this method has potential security vulnerabilities if called with untrusted input; * see {Command Injection}[rdoc-ref:command_injection.rdoc]. * - * The first argument must be a string; - * its meaning depends on whether it starts with the pipe character (<tt>'|'</tt>): - * - * - If so (and if +self+ is \IO), - * the rest of the string is a command to be executed as a subprocess. - * - Otherwise, the string is the path to a file. - * - * With only argument +command+ given, executes the command in a shell, - * returns its entire $stdout: - * - * IO.read('| cat t.txt') - * # => "First line\nSecond line\n\nThird line\nFourth line\n" + * The first argument must be a string that is the path to a file. * * With only argument +path+ given, reads in text mode and returns the entire content * of the file at the given path: @@ -12168,8 +12142,6 @@ seek_before_access(VALUE argp) * unread when encountering certain special bytes. Consider using * IO.binread if all bytes in the file should be read. * - * For both forms, command and path, the remaining arguments are the same. - * * With argument +length+, returns +length+ bytes if available: * * IO.read('t.txt', 7) # => "First l" @@ -12220,7 +12192,6 @@ rb_io_s_read(int argc, VALUE *argv, VALUE io) /* * call-seq: - * IO.binread(command, length = nil, offset = 0) -> string or nil * IO.binread(path, length = nil, offset = 0) -> string or nil * * Behaves like IO.read, except that the stream is opened in binary mode @@ -12325,7 +12296,6 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary) /* * call-seq: - * IO.write(command, data, **opts) -> integer * IO.write(path, data, offset = 0, **opts) -> integer * * Opens the stream, writes the given +data+ to it, @@ -12335,25 +12305,9 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary) * this method has potential security vulnerabilities if called with untrusted input; * see {Command Injection}[rdoc-ref:command_injection.rdoc]. * - * The first argument must be a string; - * its meaning depends on whether it starts with the pipe character (<tt>'|'</tt>): - * - * - If so (and if +self+ is \IO), - * the rest of the string is a command to be executed as a subprocess. - * - Otherwise, the string is the path to a file. - * - * With argument +command+ given, executes the command in a shell, - * passes +data+ through standard input, writes its output to $stdout, - * and returns the length of the given +data+: + * The first argument must be a string that is the path to a file. * - * IO.write('| cat', 'Hello World!') # => 12 - * - * Output: - * - * Hello World! - * - * With argument +path+ given, writes the given +data+ to the file - * at that path: + * With only argument +path+ given, writes the given +data+ to the file at that path: * * IO.write('t.tmp', 'abc') # => 3 * File.read('t.tmp') # => "abc" @@ -12392,7 +12346,6 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io) /* * call-seq: - * IO.binwrite(command, string, offset = 0) -> integer * IO.binwrite(path, string, offset = 0) -> integer * * Behaves like IO.write, except that the stream is opened in binary mode @@ -13269,7 +13222,7 @@ copy_stream_body(VALUE arg) rb_str_resize(str,len); read_buffered_data(RSTRING_PTR(str), len, stp->src_fptr); if (stp->dst_fptr) { /* IO or filename */ - if (io_binwrite(str, RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0) + if (io_binwrite(RSTRING_PTR(str), RSTRING_LEN(str), stp->dst_fptr, 0) < 0) rb_sys_fail_on_write(stp->dst_fptr); } else /* others such as StringIO */ @@ -13291,7 +13244,7 @@ copy_stream_body(VALUE arg) return copy_stream_fallback(stp); } - rb_thread_call_without_gvl(nogvl_copy_stream_func, (void*)stp, RUBY_UBF_IO, 0); + IO_WITHOUT_GVL(nogvl_copy_stream_func, stp); return Qnil; } @@ -14618,14 +14571,14 @@ argf_write_io(VALUE argf) /* * call-seq: - * ARGF.write(string) -> integer + * ARGF.write(*objects) -> integer * - * Writes _string_ if inplace mode. + * Writes each of the given +objects+ if inplace mode. */ static VALUE -argf_write(VALUE argf, VALUE str) +argf_write(int argc, VALUE *argv, VALUE argf) { - return rb_io_write(argf_write_io(argf), str); + return rb_io_writev(argf_write_io(argf), argc, argv); } void @@ -14731,42 +14684,253 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) /* * Document-class: ARGF * - * ARGF is a stream designed for use in scripts that process files given as - * command-line arguments or passed in via STDIN. + * == \ARGF and +ARGV+ + * + * The \ARGF object works with the array at global variable +ARGV+ + * to make <tt>$stdin</tt> and file streams available in the Ruby program: + * + * - **ARGV** may be thought of as the <b>argument vector</b> array. + * + * Initially, it contains the command-line arguments and options + * that are passed to the Ruby program; + * the program can modify that array as it likes. + * + * - **ARGF** may be thought of as the <b>argument files</b> object. + * + * It can access file streams and/or the <tt>$stdin</tt> stream, + * based on what it finds in +ARGV+. + * This provides a convenient way for the command line + * to specify streams for a Ruby program to read. + * + * == Reading + * + * \ARGF may read from _source_ streams, + * which at any particular time are determined by the content of +ARGV+. + * + * === Simplest Case + * + * When the <i>very first</i> \ARGF read occurs with an empty +ARGV+ (<tt>[]</tt>), + * the source is <tt>$stdin</tt>: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.read', ARGF.read] + * + * - Commands and outputs + * (see below for the content of files +foo.txt+ and +bar.txt+): + * + * $ echo "Open the pod bay doors, Hal." | ruby t.rb + * ["ARGV", []] + * ["ARGF.read", "Open the pod bay doors, Hal.\n"] + * + * $ cat foo.txt bar.txt | ruby t.rb + * ["ARGV", []] + * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"] + * + * === About the Examples + * + * Many examples here assume the existence of files +foo.txt+ and +bar.txt+: + * + * $ cat foo.txt + * Foo 0 + * Foo 1 + * $ cat bar.txt + * Bar 0 + * Bar 1 + * Bar 2 + * Bar 3 + * + * === Sources in +ARGV+ + * + * For any \ARGF read _except_ the {simplest case}[rdoc-ref:ARGF@Simplest+Case] + * (that is, _except_ for the <i>very first</i> \ARGF read with an empty +ARGV+), + * the sources are found in +ARGV+. + * + * \ARGF assumes that each element in array +ARGV+ is a potential source, + * and is one of: + * + * - The string path to a file that may be opened as a stream. + * - The character <tt>'-'</tt>, meaning stream <tt>$stdin</tt>. + * + * Each element that is _not_ one of these + * should be removed from +ARGV+ before \ARGF accesses that source. + * + * In the following example: + * + * - Filepaths +foo.txt+ and +bar.txt+ may be retained as potential sources. + * - Options <tt>--xyzzy</tt> and <tt>--mojo</tt> should be removed. + * + * Example: + * + * - \File +t.rb+: + * + * # Print arguments (and options, if any) found on command line. + * p ['ARGV', ARGV] + * + * - Command and output: + * + * $ ruby t.rb --xyzzy --mojo foo.txt bar.txt + * ["ARGV", ["--xyzzy", "--mojo", "foo.txt", "bar.txt"]] + * + * \ARGF's stream access considers the elements of +ARGV+, left to right: + * + * - \File +t.rb+: + * + * p "ARGV: #{ARGV}" + * p "Line: #{ARGF.read}" # Read everything from all specified streams. + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n" + * + * Because the value at +ARGV+ is an ordinary array, + * you can manipulate it to control which sources \ARGF considers: + * + * - If you remove an element from +ARGV+, \ARGF will not consider the corresponding source. + * - If you add an element to +ARGV+, \ARGF will consider the corresponding source. + * + * Each element in +ARGV+ is removed when its corresponding source is accessed; + * when all sources have been accessed, the array is empty: + * + * - \File +t.rb+: * - * The arguments passed to your script are stored in the +ARGV+ Array, one - * argument per element. ARGF assumes that any arguments that aren't - * filenames have been removed from +ARGV+. For example: + * until ARGV.empty? && ARGF.eof? + * p "ARGV: #{ARGV}" + * p "Line: #{ARGF.readline}" # Read each line from each specified stream. + * end + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Line: Foo 0\n" + * "ARGV: [\"bar.txt\"]" + * "Line: Foo 1\n" + * "ARGV: [\"bar.txt\"]" + * "Line: Bar 0\n" + * "ARGV: []" + * "Line: Bar 1\n" + * "ARGV: []" + * "Line: Bar 2\n" + * "ARGV: []" + * "Line: Bar 3\n" + * + * ==== Filepaths in +ARGV+ + * + * The +ARGV+ array may contain filepaths the specify sources for \ARGF reading. + * + * This program prints what it reads from files at the paths specified + * on the command line: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * # Read and print all content from the specified sources. + * p ['ARGF.read', ARGF.read] + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * ["ARGV", [foo.txt, bar.txt] + * ["ARGF.read", "Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n"] + * + * ==== Specifying <tt>$stdin</tt> in +ARGV+ + * + * To specify stream <tt>$stdin</tt> in +ARGV+, us the character <tt>'-'</tt>: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.read', ARGF.read] + * + * - Command and output: + * + * $ echo "Open the pod bay doors, Hal." | ruby t.rb - + * ["ARGV", ["-"]] + * ["ARGF.read", "Open the pod bay doors, Hal.\n"] + * + * When no character <tt>'-'</tt> is given, stream <tt>$stdin</tt> is ignored + * (exception: + * see {Specifying $stdin in ARGV}[rdoc-ref:ARGF@Specifying+-24stdin+in+ARGV]): + * + * - Command and output: * - * $ ruby argf.rb --verbose file1 file2 + * $ echo "Open the pod bay doors, Hal." | ruby t.rb foo.txt bar.txt + * "ARGV: [\"foo.txt\", \"bar.txt\"]" + * "Read: Foo 0\nFoo 1\nBar 0\nBar 1\nBar 2\nBar 3\n" * - * ARGV #=> ["--verbose", "file1", "file2"] - * option = ARGV.shift #=> "--verbose" - * ARGV #=> ["file1", "file2"] + * ==== Mixtures and Repetitions in +ARGV+ * - * You can now use ARGF to work with a concatenation of each of these named - * files. For instance, ARGF.read will return the contents of _file1_ - * followed by the contents of _file2_. + * For an \ARGF reader, +ARGV+ may contain any mixture of filepaths + * and character <tt>'-'</tt>, including repetitions. * - * After a file in +ARGV+ has been read ARGF removes it from the Array. - * Thus, after all files have been read +ARGV+ will be empty. + * ==== Modifications to +ARGV+ * - * You can manipulate +ARGV+ yourself to control what ARGF operates on. If - * you remove a file from +ARGV+, it is ignored by ARGF; if you add files to - * +ARGV+, they are treated as if they were named on the command line. For - * example: + * The running Ruby program may make any modifications to the +ARGV+ array; + * the current value of +ARGV+ affects \ARGF reading. * - * ARGV.replace ["file1"] - * ARGF.readlines # Returns the contents of file1 as an Array - * ARGV #=> [] - * ARGV.replace ["file2", "file3"] - * ARGF.read # Returns the contents of file2 and file3 + * ==== Empty +ARGV+ * - * If +ARGV+ is empty, ARGF acts as if it contained STDIN, i.e. the data - * piped to your script. For example: + * For an empty +ARGV+, an \ARGF read method either returns +nil+ + * or raises an exception, depending on the specific method. + * + * === More Read Methods + * + * As seen above, method ARGF#read reads the content of all sources + * into a single string. + * Other \ARGF methods provide other ways to access that content; + * these include: + * + * - Byte access: #each_byte, #getbyte, #readbyte. + * - Character access: #each_char, #getc, #readchar. + * - Codepoint access: #each_codepoint. + * - Line access: #each_line, #gets, #readline, #readlines. + * - Source access: #read, #read_nonblock, #readpartial. + * + * === About \Enumerable + * + * \ARGF includes module Enumerable. + * Virtually all methods in \Enumerable call method <tt>#each</tt> in the including class. + * + * <b>Note well</b>: In \ARGF, method #each returns data from the _sources_, + * _not_ from +ARGV+; + * therefore, for example, <tt>ARGF#entries</tt> returns an array of lines from the sources, + * not an array of the strings from +ARGV+: + * + * - \File +t.rb+: + * + * p ['ARGV', ARGV] + * p ['ARGF.entries', ARGF.entries] + * + * - Command and output: + * + * $ ruby t.rb foo.txt bar.txt + * ["ARGV", ["foo.txt", "bar.txt"]] + * ["ARGF.entries", ["Foo 0\n", "Foo 1\n", "Bar 0\n", "Bar 1\n", "Bar 2\n", "Bar 3\n"]] + * + * == Writing + * + * If <i>inplace mode</i> is in effect, + * \ARGF may write to target streams, + * which at any particular time are determined by the content of ARGV. + * + * Methods about inplace mode: + * + * - #inplace_mode + * - #inplace_mode= + * - #to_write_io + * + * Methods for writing: + * + * - #print + * - #printf + * - #putc + * - #puts + * - #write * - * $ echo "glark" | ruby -e 'p ARGF.read' - * "glark\n" */ /* @@ -14784,7 +14948,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * ARGF is not itself a subclass of \IO. * * \Class StringIO provides an IO-like stream that handles a String. - * \StringIO is not itself a subclass of \IO. + * StringIO is not itself a subclass of \IO. * * Important objects based on \IO include: * @@ -14802,7 +14966,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * - Kernel#open: Returns a new \IO object connected to a given source: * stream, file, or subprocess. * - * Like a \File stream, an \IO stream has: + * Like a File stream, an \IO stream has: * * - A read/write mode, which may be read-only, write-only, or read/write; * see {Read/Write Mode}[rdoc-ref:File@Read-2FWrite+Mode]. @@ -14838,7 +15002,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * that determine how a new stream is to be opened: * * - +:mode+: Stream mode. - * - +:flags+: \Integer file open flags; + * - +:flags+: Integer file open flags; * If +mode+ is also given, the two are bitwise-ORed. * - +:external_encoding+: External encoding for the stream. * - +:internal_encoding+: Internal encoding for the stream. @@ -14853,7 +15017,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * #path method. * * Also available are the options offered in String#encode, - * which may control conversion between external internal encoding. + * which may control conversion between external and internal encoding. * * == Basic \IO * @@ -14918,56 +15082,64 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * == Line \IO * - * You can read an \IO stream line-by-line using these methods: + * \Class \IO supports line-oriented + * {input}[rdoc-ref:IO@Line+Input] and {output}[rdoc-ref:IO@Line+Output] * - * - IO#each_line: Reads each remaining line, passing it to the given block. - * - IO#gets: Returns the next line. - * - IO#readline: Like #gets, but raises an exception at end-of-stream. - * - IO#readlines: Returns all remaining lines in an array. + * === Line Input + * + * \Class \IO supports line-oriented input for + * {files}[rdoc-ref:IO@File+Line+Input] and {IO streams}[rdoc-ref:IO@Stream+Line+Input] + * + * ==== \File Line Input + * + * You can read lines from a file using these methods: * - * Each of these reader methods accepts: + * - IO.foreach: Reads each line and passes it to the given block. + * - IO.readlines: Reads and returns all lines in an array. * - * - An optional line separator, +sep+; + * For each of these methods: + * + * - You can specify {open options}[rdoc-ref:IO@Open+Options]. + * - Line parsing depends on the effective <i>line separator</i>; * see {Line Separator}[rdoc-ref:IO@Line+Separator]. - * - An optional line-size limit, +limit+; + * - The length of each returned line depends on the effective <i>line limit</i>; * see {Line Limit}[rdoc-ref:IO@Line+Limit]. * - * For each of these reader methods, reading may begin mid-line, - * depending on the stream's position; - * see {Position}[rdoc-ref:IO@Position]: - * - * f = File.new('t.txt') - * f.pos = 27 - * f.each_line {|line| p line } - * f.close + * ==== Stream Line Input * - * Output: + * You can read lines from an \IO stream using these methods: * - * "rth line\n" - * "Fifth line\n" + * - IO#each_line: Reads each remaining line, passing it to the given block. + * - IO#gets: Returns the next line. + * - IO#readline: Like #gets, but raises an exception at end-of-stream. + * - IO#readlines: Returns all remaining lines in an array. * - * You can write to an \IO stream line-by-line using this method: + * For each of these methods: * - * - IO#puts: Writes objects to the stream. + * - Reading may begin mid-line, + * depending on the stream's _position_; + * see {Position}[rdoc-ref:IO@Position]. + * - Line parsing depends on the effective <i>line separator</i>; + * see {Line Separator}[rdoc-ref:IO@Line+Separator]. + * - The length of each returned line depends on the effective <i>line limit</i>; + * see {Line Limit}[rdoc-ref:IO@Line+Limit]. * - * === Line Separator + * ===== Line Separator * - * Each of these methods uses a <i>line separator</i>, - * which is the string that delimits lines: + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] uses a <i>line separator</i>: + * the string that determines what is considered a line; + * it is sometimes called the <i>input record separator</i>. * - * - IO.foreach. - * - IO.readlines. - * - IO#each_line. - * - IO#gets. - * - IO#readline. - * - IO#readlines. + * The default line separator is taken from global variable <tt>$/</tt>, + * whose initial value is <tt>"\n"</tt>. * - * The default line separator is the given by the global variable <tt>$/</tt>, - * whose value is by default <tt>"\n"</tt>. - * The line to be read next is all data from the current position - * to the next line separator: + * Generally, the line to be read next is all data + * from the current {position}[rdoc-ref:IO@Position] + * to the next line separator + * (but see {Special Line Separator Values}[rdoc-ref:IO@Special+Line+Separator+Values]): * * f = File.new('t.txt') + * # Method gets with no sep argument returns the next line, according to $/. * f.gets # => "First line\n" * f.gets # => "Second line\n" * f.gets # => "\n" @@ -14975,7 +15147,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets # => "Fifth line\n" * f.close * - * You can specify a different line separator: + * You can use a different line separator by passing argument +sep+: * * f = File.new('t.txt') * f.gets('l') # => "First l" @@ -14984,15 +15156,27 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets # => "e\n" * f.close * - * There are two special line separators: + * Or by setting global variable <tt>$/</tt>: + * + * f = File.new('t.txt') + * $/ = 'l' + * f.gets # => "First l" + * f.gets # => "ine\nSecond l" + * f.gets # => "ine\n\nFourth l" + * f.close + * + * ===== Special Line Separator Values + * + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] + * accepts two special values for parameter +sep+: * - * - +nil+: The entire stream is read into a single string: + * - +nil+: The entire stream is to be read ("slurped") into a single string: * * f = File.new('t.txt') * f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n" * f.close * - * - <tt>''</tt> (the empty string): The next "paragraph" is read + * - <tt>''</tt> (the empty string): The next "paragraph" is to be read * (paragraphs being separated by two consecutive line separators): * * f = File.new('t.txt') @@ -15000,23 +15184,18 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * f.gets('') # => "Fourth line\nFifth line\n" * f.close * - * === Line Limit + * ===== Line Limit * - * Each of these methods uses a <i>line limit</i>, - * which specifies that the number of bytes returned may not be (much) longer - * than the given +limit+; + * Each of the {line input methods}[rdoc-ref:IO@Line+Input] + * uses an integer <i>line limit</i>, + * which restricts the number of bytes that may be returned. + * (A multi-byte character will not be split, and so a returned line may be slightly longer + * than the limit). * - * - IO.foreach. - * - IO.readlines. - * - IO#each_line. - * - IO#gets. - * - IO#readline. - * - IO#readlines. + * The default limit value is <tt>-1</tt>; + * any negative limit value means that there is no limit. * - * A multi-byte character will not be split, and so a line may be slightly longer - * than the given limit. - * - * If +limit+ is not given, the line is determined only by +sep+. + * If there is no limit, the line is determined only by +sep+. * * # Text with 1-byte characters. * File.open('t.txt') {|f| f.gets(1) } # => "F" @@ -15034,24 +15213,21 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * File.open('t.rus') {|f| f.gets(3).size } # => 2 * File.open('t.rus') {|f| f.gets(4).size } # => 2 * - * === Line Separator and Line Limit + * ===== Line Separator and Line Limit * - * With arguments +sep+ and +limit+ given, - * combines the two behaviors: + * With arguments +sep+ and +limit+ given, combines the two behaviors: * * - Returns the next line as determined by line separator +sep+. - * - But returns no more bytes than are allowed by the limit. + * - But returns no more bytes than are allowed by the limit +limit+. * * Example: * * File.open('t.txt') {|f| f.gets('li', 20) } # => "First li" * File.open('t.txt') {|f| f.gets('li', 2) } # => "Fi" * - * === Line Number + * ===== Line Number * - * A readable \IO stream has a non-negative integer <i>line number</i>. - * - * The relevant methods: + * A readable \IO stream has a non-negative integer <i>line number</i>: * * - IO#lineno: Returns the line number. * - IO#lineno=: Resets and returns the line number. @@ -15059,7 +15235,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Unless modified by a call to method IO#lineno=, * the line number is the number of lines read * by certain line-oriented methods, - * according to the given line separator +sep+: + * according to the effective {line separator}[rdoc-ref:IO@Line+Separator]: * * - IO.foreach: Increments the line number on each call to the block. * - IO#each_line: Increments the line number on each call to the block. @@ -15128,7 +15304,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * # => 41 * f.close * - * - When a stream is read, <tt>#.</tt> is set to the line number for that stream: + * - When a stream is read, <tt>$.</tt> is set to the line number for that stream: * * f0 = File.new('t.txt') * f1 = File.new('t.dat') @@ -15149,6 +15325,12 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * $. # => 5 * f.close * + * === Line Output + * + * You can write to an \IO stream line-by-line using this method: + * + * - IO#puts: Writes objects to the stream. + * * == Character \IO * * You can process an \IO stream character-by-character using these methods: @@ -15380,8 +15562,11 @@ Init_IO(void) /* Can be raised by IO operations when IO#timeout= is set. */ rb_eIOTimeoutError = rb_define_class_under(rb_cIO, "TimeoutError", rb_eIOError); + /* Readable event mask for IO#wait. */ rb_define_const(rb_cIO, "READABLE", INT2NUM(RUBY_IO_READABLE)); + /* Writable event mask for IO#wait. */ rb_define_const(rb_cIO, "WRITABLE", INT2NUM(RUBY_IO_WRITABLE)); + /* Priority event mask for IO#wait. */ rb_define_const(rb_cIO, "PRIORITY", INT2NUM(RUBY_IO_PRIORITY)); /* exception to wait for reading. see IO.select. */ @@ -15444,7 +15629,7 @@ Init_IO(void) rb_define_hooked_variable("$,", &rb_output_fs, 0, deprecated_str_setter); rb_default_rs = rb_fstring_lit("\n"); /* avoid modifying RS_default */ - rb_gc_register_mark_object(rb_default_rs); + rb_vm_register_global_object(rb_default_rs); rb_rs = rb_default_rs; rb_output_rs = Qnil; rb_define_hooked_variable("$/", &rb_rs, 0, deprecated_str_setter); @@ -15495,7 +15680,6 @@ Init_IO(void) rb_define_method(rb_cIO, "read", io_read, -1); rb_define_method(rb_cIO, "write", io_write_m, -1); rb_define_method(rb_cIO, "gets", rb_io_gets_m, -1); - rb_define_method(rb_cIO, "readline", rb_io_readline, -1); rb_define_method(rb_cIO, "getc", rb_io_getc, 0); rb_define_method(rb_cIO, "getbyte", rb_io_getbyte, 0); rb_define_method(rb_cIO, "readchar", rb_io_readchar, 0); @@ -15639,7 +15823,7 @@ Init_IO(void) rb_define_method(rb_cARGF, "binmode", argf_binmode_m, 0); rb_define_method(rb_cARGF, "binmode?", argf_binmode_p, 0); - rb_define_method(rb_cARGF, "write", argf_write, 1); + rb_define_method(rb_cARGF, "write", argf_write, -1); rb_define_method(rb_cARGF, "print", rb_io_print, -1); rb_define_method(rb_cARGF, "putc", rb_io_putc, 1); rb_define_method(rb_cARGF, "puts", rb_io_puts, -1); |