diff options
Diffstat (limited to 'ext/stringio/stringio.c')
| -rw-r--r-- | ext/stringio/stringio.c | 1557 |
1 files changed, 1202 insertions, 355 deletions
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 34e0a1dd19..cc2294a795 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -1,9 +1,9 @@ +/* -*- mode: c; indent-tabs-mode: t -*- */ /********************************************************************** stringio.c - $Author$ - $Date$ $RoughId: stringio.c,v 1.13 2002/03/14 03:24:18 nobu Exp $ created at: Tue Feb 19 04:10:38 JST 2002 @@ -12,31 +12,72 @@ **********************************************************************/ +static const char *const +STRINGIO_VERSION = "3.2.1.dev"; + +#include <stdbool.h> + #include "ruby.h" #include "ruby/io.h" +#include "ruby/encoding.h" +#include "ruby/version.h" #if defined(HAVE_FCNTL_H) || defined(_WIN32) #include <fcntl.h> #elif defined(HAVE_SYS_FCNTL_H) #include <sys/fcntl.h> #endif +#ifndef RB_INTEGER_TYPE_P +# define RB_INTEGER_TYPE_P(c) (FIXNUM_P(c) || RB_TYPE_P(c, T_BIGNUM)) +#endif + +#ifndef RB_PASS_CALLED_KEYWORDS +# define rb_funcallv_kw(recv, mid, arg, argv, kw_splat) rb_funcallv(recv, mid, arg, argv) +# define rb_class_new_instance_kw(argc, argv, klass, kw_splat) rb_class_new_instance(argc, argv, klass) +#endif + +static inline bool +str_chilled_p(VALUE str) +{ +#if (RUBY_API_VERSION_MAJOR == 3 && RUBY_API_VERSION_MINOR >= 4) || RUBY_API_VERSION_MAJOR >= 4 + // Do not attempt to modify chilled strings on Ruby 3.4+ + // RUBY_FL_USER2 == STR_CHILLED_LITERAL + // RUBY_FL_USER3 == STR_CHILLED_SYMBOL_TO_S + return FL_TEST_RAW(str, RUBY_FL_USER2 | RUBY_FL_USER3); +#else + return false; +#endif +} + +#ifndef HAVE_TYPE_RB_IO_MODE_T +typedef int rb_io_mode_t; +#endif + struct StringIO { VALUE string; + rb_encoding *enc; long pos; long lineno; - int flags; + rb_io_mode_t flags; int count; }; -static void strio_mark _((struct StringIO *)); -static void strio_free _((struct StringIO *)); -static void strio_init(int, VALUE *, struct StringIO *); +static VALUE strio_init(int, VALUE *, struct StringIO *, VALUE); +static VALUE strio_unget_bytes(struct StringIO *, const char *, long); +static long strio_write(VALUE self, VALUE str); + +#define IS_STRIO(obj) (rb_typeddata_is_kind_of((obj), &strio_data_type)) +#define error_inval(msg) (rb_syserr_fail(EINVAL, msg)) +#define get_enc(ptr) ((ptr)->enc ? (ptr)->enc : !NIL_P((ptr)->string) ? rb_enc_get((ptr)->string) : NULL) -#define IS_STRIO(obj) (RDATA(obj)->dmark == (RUBY_DATA_FUNC)strio_mark) -#define error_inval(msg) (errno = EINVAL, rb_sys_fail(msg)) +static bool +readonly_string_p(VALUE string) +{ + return OBJ_FROZEN_RAW(string); +} static struct StringIO * -strio_alloc() +strio_alloc(void) { struct StringIO *ptr = ALLOC(struct StringIO); ptr->string = Qnil; @@ -48,36 +89,44 @@ strio_alloc() } static void -strio_mark(struct StringIO *ptr) +strio_mark(void *p) { - if (ptr) { - rb_gc_mark(ptr->string); - } + struct StringIO *ptr = p; + + rb_gc_mark(ptr->string); } static void -strio_free(struct StringIO *ptr) +strio_free(void *p) { + struct StringIO *ptr = p; if (--ptr->count <= 0) { xfree(ptr); } } -static struct StringIO* -check_strio(VALUE self) +static size_t +strio_memsize(const void *p) { - Check_Type(self, T_DATA); - if (!IS_STRIO(self)) { - rb_raise(rb_eTypeError, "wrong argument type %s (expected StringIO)", - rb_class2name(CLASS_OF(self))); - } - return DATA_PTR(self); + return sizeof(struct StringIO); } +static const rb_data_type_t strio_data_type = { + "strio", + { + strio_mark, + strio_free, + strio_memsize, + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED +}; + +#define check_strio(self) ((struct StringIO*)rb_check_typeddata((self), &strio_data_type)) + static struct StringIO* get_strio(VALUE self) { - struct StringIO *ptr = check_strio(self); + struct StringIO *ptr = check_strio(rb_io_taint_check(self)); if (!ptr) { rb_raise(rb_eIOError, "uninitialized stream"); @@ -86,62 +135,122 @@ get_strio(VALUE self) } static VALUE -strio_substr(struct StringIO *ptr, int pos, int len) +enc_subseq(VALUE str, long pos, long len, rb_encoding *enc) +{ + str = rb_str_subseq(str, pos, len); + rb_enc_associate(str, enc); + return str; +} + +static VALUE +strio_substr(struct StringIO *ptr, long pos, long len, rb_encoding *enc) { VALUE str = ptr->string; - rb_encoding *enc = rb_enc_get(str); - int rlen = RSTRING_LEN(str) - pos; + long rlen = RSTRING_LEN(str) - pos; if (len > rlen) len = rlen; if (len < 0) len = 0; - return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc); + if (len == 0) return rb_enc_str_new(0, 0, enc); + return enc_subseq(str, pos, len, enc); } #define StringIO(obj) get_strio(obj) -#define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE)) -#define READABLE(ptr) ((ptr)->flags & FMODE_READABLE) -#define WRITABLE(ptr) ((ptr)->flags & FMODE_WRITABLE) +#define STRIO_READABLE FL_USER4 +#define STRIO_WRITABLE FL_USER5 +#define STRIO_READWRITE (STRIO_READABLE|STRIO_WRITABLE) +typedef char strio_flags_check[(STRIO_READABLE/FMODE_READABLE == STRIO_WRITABLE/FMODE_WRITABLE) * 2 - 1]; +#define STRIO_MODE_SET_P(strio, mode) \ + ((RBASIC(strio)->flags & STRIO_##mode) && \ + ((struct StringIO*)DATA_PTR(strio))->flags & FMODE_##mode) +#define CLOSED(strio) (!STRIO_MODE_SET_P(strio, READWRITE)) +#define READABLE(strio) STRIO_MODE_SET_P(strio, READABLE) +#define WRITABLE(strio) STRIO_MODE_SET_P(strio, WRITABLE) + +static VALUE sym_exception; static struct StringIO* -readable(struct StringIO *ptr) +readable(VALUE strio) { - if (!READABLE(ptr)) { + struct StringIO *ptr = StringIO(strio); + if (!READABLE(strio)) { rb_raise(rb_eIOError, "not opened for reading"); } return ptr; } static struct StringIO* -writable(struct StringIO *ptr) +writable(VALUE strio) { - if (!WRITABLE(ptr)) { + struct StringIO *ptr = StringIO(strio); + if (!WRITABLE(strio)) { rb_raise(rb_eIOError, "not opened for writing"); } - if (!OBJ_TAINTED(ptr->string)) { - rb_secure(4); - } return ptr; } static void check_modifiable(struct StringIO *ptr) { - if (OBJ_FROZEN(ptr->string)) { + if (NIL_P(ptr->string)) { + /* Null device StringIO */ + } + else if (OBJ_FROZEN_RAW(ptr->string)) { rb_raise(rb_eIOError, "not modifiable string"); } + else { + rb_str_modify(ptr->string); + } +} + +static inline bool +outside_p(struct StringIO *ptr, long pos) +{ + return NIL_P(ptr->string) || pos >= RSTRING_LEN(ptr->string); +} + +static inline bool +eos_p(struct StringIO *ptr) +{ + return outside_p(ptr, ptr->pos); } static VALUE strio_s_allocate(VALUE klass) { - return Data_Wrap_Struct(klass, strio_mark, strio_free, 0); + return TypedData_Wrap_Struct(klass, &strio_data_type, 0); } /* - * call-seq: StringIO.new(string=""[, mode]) + * call-seq: + * StringIO.new(string = '', mode = 'r+') -> new_stringio * - * Creates new StringIO instance from with _string_ and _mode_. + * Returns a new \StringIO instance formed from +string+ and +mode+; + * the instance should be closed when no longer needed: + * + * strio = StringIO.new + * strio.string # => "" + * strio.closed_read? # => false + * strio.closed_write? # => false + * strio.close + * + * If +string+ is frozen, the default +mode+ is <tt>'r'</tt>: + * + * strio = StringIO.new('foo'.freeze) + * strio.string # => "foo" + * strio.closed_read? # => false + * strio.closed_write? # => true + * strio.close + * + * Argument +mode+ must be a valid + * {Access Mode}[rdoc-ref:File@Access+Modes], + * which may be a string or an integer constant: + * + * StringIO.new('foo', 'w+') + * StringIO.new('foo', File::RDONLY) + * + * Related: StringIO.open + * (passes the \StringIO object to the block; closes the object automatically on block exit). */ static VALUE strio_initialize(int argc, VALUE *argv, VALUE self) @@ -152,75 +261,170 @@ strio_initialize(int argc, VALUE *argv, VALUE self) DATA_PTR(self) = ptr = strio_alloc(); } rb_call_super(0, 0); - strio_init(argc, argv, ptr); - return self; + return strio_init(argc, argv, ptr, self); } -static void -strio_init(int argc, VALUE *argv, struct StringIO *ptr) +static int +detect_bom(VALUE str, int *bomlen) { - VALUE string, mode; - int trunc = Qfalse; - - switch (rb_scan_args(argc, argv, "02", &string, &mode)) { - case 2: - if (FIXNUM_P(mode)) { - int flags = FIX2INT(mode); - ptr->flags = rb_io_modenum_flags(flags); - trunc = flags & O_TRUNC; - } - else { - const char *m = StringValueCStr(mode); - ptr->flags = rb_io_mode_flags(m); - trunc = *m == 'w'; - } - StringValue(string); - if ((ptr->flags & FMODE_WRITABLE) && OBJ_FROZEN(string)) { - errno = EACCES; - rb_sys_fail(0); + const char *p; + long len; + + RSTRING_GETMEM(str, p, len); + if (len < 1) return 0; + switch ((unsigned char)p[0]) { + case 0xEF: + if (len < 2) break; + if ((unsigned char)p[1] == 0xBB && len > 2) { + if ((unsigned char)p[2] == 0xBF) { + *bomlen = 3; + return rb_utf8_encindex(); + } } - if (trunc) { - rb_str_resize(string, 0); + break; + + case 0xFE: + if (len < 2) break; + if ((unsigned char)p[1] == 0xFF) { + *bomlen = 2; + return rb_enc_find_index("UTF-16BE"); } break; - case 1: - StringValue(string); - ptr->flags = OBJ_FROZEN(string) ? FMODE_READABLE : FMODE_READWRITE; + + case 0xFF: + if (len < 2) break; + if ((unsigned char)p[1] == 0xFE) { + if (len >= 4 && (unsigned char)p[2] == 0 && (unsigned char)p[3] == 0) { + *bomlen = 4; + return rb_enc_find_index("UTF-32LE"); + } + *bomlen = 2; + return rb_enc_find_index("UTF-16LE"); + } break; + case 0: - string = rb_str_new("", 0); - ptr->flags = FMODE_READWRITE; + if (len < 4) break; + if ((unsigned char)p[1] == 0 && (unsigned char)p[2] == 0xFE && (unsigned char)p[3] == 0xFF) { + *bomlen = 4; + return rb_enc_find_index("UTF-32BE"); + } break; } - ptr->string = string; + return 0; +} + +static rb_encoding * +set_encoding_by_bom(struct StringIO *ptr) +{ + int bomlen, idx = detect_bom(ptr->string, &bomlen); + rb_encoding *extenc = NULL; + + if (idx) { + extenc = rb_enc_from_index(idx); + ptr->pos = bomlen; + if (ptr->flags & FMODE_WRITABLE) { + rb_enc_associate_index(ptr->string, idx); + } + } + ptr->enc = extenc; + return extenc; +} + +static VALUE +strio_init(int argc, VALUE *argv, struct StringIO *ptr, VALUE self) +{ + VALUE string, vmode, opt; + int oflags; + rb_io_enc_t convconfig; + + argc = rb_scan_args(argc, argv, "02:", &string, &vmode, &opt); + rb_io_extract_modeenc(&vmode, 0, opt, &oflags, &ptr->flags, &convconfig); + if (!NIL_P(string)) { + StringValue(string); + } + else if (!argc) { + string = rb_enc_str_new("", 0, rb_default_external_encoding()); + } + + if (!NIL_P(string) && readonly_string_p(string)) { + if (ptr->flags & FMODE_WRITABLE) { + rb_syserr_fail(EACCES, 0); + } + } + else { + if (NIL_P(vmode)) { + ptr->flags |= FMODE_WRITABLE; + } + } + if (!NIL_P(string) && (ptr->flags & FMODE_TRUNC)) { + rb_str_resize(string, 0); + } + RB_OBJ_WRITE(self, &ptr->string, string); + if (argc == 1 && !NIL_P(string)) { + ptr->enc = rb_enc_get(string); + } + else { + ptr->enc = convconfig.enc; + } + ptr->pos = 0; + ptr->lineno = 0; + if (ptr->flags & FMODE_SETENC_BY_BOM) set_encoding_by_bom(ptr); + RBASIC(self)->flags |= (ptr->flags & FMODE_READWRITE) * (STRIO_READABLE / FMODE_READABLE); + return self; } static VALUE strio_finalize(VALUE self) { struct StringIO *ptr = StringIO(self); - ptr->string = Qnil; + RB_OBJ_WRITE(self, &ptr->string, Qnil); ptr->flags &= ~FMODE_READWRITE; return self; } /* - * call-seq: StringIO.open(string=""[, mode]) {|strio| ...} + * call-seq: + * StringIO.open(string = '', mode = 'r+') -> new_stringio + * StringIO.open(string = '', mode = 'r+') {|strio| ... } -> object + * + * Creates new \StringIO instance by calling <tt>StringIO.new(string, mode)</tt>. + * + * With no block given, returns the new instance: + * + * strio = StringIO.open # => #<StringIO> + * + * With a block given, calls the block with the new instance + * and returns the block's value; + * closes the instance on block exit: * - * Equivalent to StringIO.new except that when it is called with a block, it - * yields with the new instance and closes it, and returns the result which - * returned from the block. + * StringIO.open('foo') {|strio| strio.string.upcase } # => "FOO" + * + * Related: StringIO.new. */ static VALUE strio_s_open(int argc, VALUE *argv, VALUE klass) { - VALUE obj = rb_class_new_instance(argc, argv, klass); + VALUE obj = rb_class_new_instance_kw(argc, argv, klass, RB_PASS_CALLED_KEYWORDS); if (!rb_block_given_p()) return obj; return rb_ensure(rb_yield, obj, strio_finalize, obj); } +/* :nodoc: */ +static VALUE +strio_s_new(int argc, VALUE *argv, VALUE klass) +{ + if (rb_block_given_p()) { + VALUE cname = rb_obj_as_string(klass); + + rb_warn("%"PRIsVALUE"::new() does not take block; use %"PRIsVALUE"::open() instead", + cname, cname); + } + return rb_class_new_instance_kw(argc, argv, klass, RB_PASS_CALLED_KEYWORDS); +} + /* - * Returns +false+. Just for compatibility to IO. + * Returns +false+; for compatibility with IO. */ static VALUE strio_false(VALUE self) @@ -230,7 +434,7 @@ strio_false(VALUE self) } /* - * Returns +nil+. Just for compatibility to IO. + * Returns +nil+; for compatibility with IO. */ static VALUE strio_nil(VALUE self) @@ -240,7 +444,7 @@ strio_nil(VALUE self) } /* - * Returns *strio* itself. Just for compatibility to IO. + * Returns +self+; for compatibility with IO. */ static VALUE strio_self(VALUE self) @@ -250,7 +454,7 @@ strio_self(VALUE self) } /* - * Returns 0. Just for compatibility to IO. + * Returns 0; for compatibility with IO. */ static VALUE strio_0(VALUE self) @@ -277,13 +481,28 @@ strio_unimpl(int argc, VALUE *argv, VALUE self) { StringIO(self); rb_notimplement(); - return Qnil; /* not reached */ + + UNREACHABLE; } /* - * call-seq: strio.string -> string + * call-seq: + * string -> string + * + * Returns underlying string: + * + * StringIO.open('foo') do |strio| + * p strio.string + * strio.string = 'bar' + * p strio.string + * end + * + * Output: * - * Returns underlying String object, the subject of IO. + * "foo" + * "bar" + * + * Related: StringIO#string= (assigns the underlying string). */ static VALUE strio_get_string(VALUE self) @@ -293,133 +512,210 @@ strio_get_string(VALUE self) /* * call-seq: - * strio.string = string -> string + * string = other_string -> other_string + * + * Replaces the stored string with +other_string+, and sets the position to zero; + * returns +other_string+: + * + * StringIO.open('foo') do |strio| + * p strio.string + * strio.string = 'bar' + * p strio.string + * end + * + * Output: * - * Changes underlying String object, the subject of IO. + * "foo" + * "bar" + * + * Related: StringIO#string (returns the stored string). */ static VALUE strio_set_string(VALUE self, VALUE string) { struct StringIO *ptr = StringIO(self); - if (!OBJ_TAINTED(self)) rb_secure(4); + rb_io_taint_check(self); ptr->flags &= ~FMODE_READWRITE; StringValue(string); - ptr->flags = OBJ_FROZEN(string) ? FMODE_READABLE : FMODE_READWRITE; + ptr->flags = readonly_string_p(string) ? FMODE_READABLE : FMODE_READWRITE; ptr->pos = 0; ptr->lineno = 0; - return ptr->string = string; + RB_OBJ_WRITE(self, &ptr->string, string); + return string; } /* * call-seq: - * strio.close -> nil + * close -> nil + * + * Closes +self+ for both reading and writing; returns +nil+: + * + * strio = StringIO.new + * strio.closed? # => false + * strio.close # => nil + * strio.closed? # => true + * strio.read # Raises IOError: not opened for reading + * strio.write # Raises IOError: not opened for writing * - * Closes strio. The *strio* is unavailable for any further data - * operations; an +IOError+ is raised if such an attempt is made. + * Related: StringIO#close_read, StringIO#close_write, StringIO.closed?. */ static VALUE strio_close(VALUE self) { - struct StringIO *ptr = StringIO(self); - if (CLOSED(ptr)) { - rb_raise(rb_eIOError, "closed stream"); - } - ptr->flags &= ~FMODE_READWRITE; + StringIO(self); + RBASIC(self)->flags &= ~STRIO_READWRITE; return Qnil; } /* * call-seq: - * strio.close_read -> nil + * close_read -> nil + * + * Closes +self+ for reading; + * closed-write setting remains unchanged; + * returns +nil+: + * + * strio = StringIO.new + * strio.closed_read? # => false + * strio.close_read # => nil + * strio.closed_read? # => true + * strio.closed_write? # => false + * strio.read # Raises IOError: not opened for reading * - * Closes the read end of a StringIO. Will raise an +IOError+ if the - * *strio* is not readable. + * Related: StringIO#close, StringIO#close_write. */ static VALUE strio_close_read(VALUE self) { struct StringIO *ptr = StringIO(self); - if (!READABLE(ptr)) { + if (!(ptr->flags & FMODE_READABLE)) { rb_raise(rb_eIOError, "closing non-duplex IO for reading"); } - ptr->flags &= ~FMODE_READABLE; + RBASIC(self)->flags &= ~STRIO_READABLE; return Qnil; } /* * call-seq: - * strio.close_write -> nil + * close_write -> nil * - * Closes the write end of a StringIO. Will raise an +IOError+ if the - * *strio* is not writeable. + * Closes +self+ for writing; closed-read setting remains unchanged; returns +nil+: + * + * strio = StringIO.new + * strio.closed_write? # => false + * strio.close_write # => nil + * strio.closed_write? # => true + * strio.closed_read? # => false + * strio.write('foo') # Raises IOError: not opened for writing + * + * Related: StringIO#close, StringIO#close_read, StringIO#closed_write?. */ static VALUE strio_close_write(VALUE self) { struct StringIO *ptr = StringIO(self); - if (!WRITABLE(ptr)) { + if (!(ptr->flags & FMODE_WRITABLE)) { rb_raise(rb_eIOError, "closing non-duplex IO for writing"); } - ptr->flags &= ~FMODE_WRITABLE; + RBASIC(self)->flags &= ~STRIO_WRITABLE; return Qnil; } /* * call-seq: - * strio.closed? -> true or false + * closed? -> true or false + * + * Returns whether +self+ is closed for both reading and writing: + * + * strio = StringIO.new + * strio.closed? # => false # Open for reading and writing. + * strio.close_read + * strio.closed? # => false # Still open for writing. + * strio.close_write + * strio.closed? # => true # Now closed for both. * - * Returns +true+ if *strio* is completely closed, +false+ otherwise. + * Related: StringIO.closed_read?, StringIO.closed_write?. */ static VALUE strio_closed(VALUE self) { - struct StringIO *ptr = StringIO(self); - if (!CLOSED(ptr)) return Qfalse; + StringIO(self); + if (!CLOSED(self)) return Qfalse; return Qtrue; } /* * call-seq: - * strio.closed_read? -> true or false + * closed_read? -> true or false + * + * Returns whether +self+ is closed for reading: * - * Returns +true+ if *strio* is not readable, +false+ otherwise. + * strio = StringIO.new + * strio.closed_read? # => false + * strio.close_read + * strio.closed_read? # => true + * + * Related: StringIO#closed?, StringIO#closed_write?, StringIO#close_read. */ static VALUE strio_closed_read(VALUE self) { - struct StringIO *ptr = StringIO(self); - if (READABLE(ptr)) return Qfalse; + StringIO(self); + if (READABLE(self)) return Qfalse; return Qtrue; } /* * call-seq: - * strio.closed_write? -> true or false + * closed_write? -> true or false + * + * Returns whether +self+ is closed for writing: * - * Returns +true+ if *strio* is not writable, +false+ otherwise. + * strio = StringIO.new + * strio.closed_write? # => false + * strio.close_write + * strio.closed_write? # => true + * + * Related: StringIO#close_write, StringIO#closed?, StringIO#closed_read?. */ static VALUE strio_closed_write(VALUE self) { - struct StringIO *ptr = StringIO(self); - if (WRITABLE(ptr)) return Qfalse; + StringIO(self); + if (WRITABLE(self)) return Qfalse; return Qtrue; } +static struct StringIO * +strio_to_read(VALUE self) +{ + struct StringIO *ptr = readable(self); + if (eos_p(ptr)) return NULL; + return ptr; +} + /* * call-seq: - * strio.eof -> true or false - * strio.eof? -> true or false + * eof? -> true or false + * + * Returns whether +self+ is positioned at end-of-stream: + * + * strio = StringIO.new('foo') + * strio.pos # => 0 + * strio.eof? # => false + * strio.read # => "foo" + * strio.pos # => 3 + * strio.eof? # => true + * strio.close_read + * strio.eof? # Raises IOError: not opened for reading * - * Returns true if *strio* is at end of file. The stringio must be - * opened for reading or an +IOError+ will be raised. + * Related: StringIO#pos. */ static VALUE strio_eof(VALUE self) { - struct StringIO *ptr = readable(StringIO(self)); - if (ptr->pos < RSTRING_LEN(ptr->string)) return Qfalse; + if (strio_to_read(self)) return Qfalse; return Qtrue; } @@ -427,29 +723,31 @@ strio_eof(VALUE self) static VALUE strio_copy(VALUE copy, VALUE orig) { - struct StringIO *ptr; + struct StringIO *ptr, *old_ptr; + VALUE old_string = Qundef; orig = rb_convert_type(orig, T_DATA, "StringIO", "to_strio"); if (copy == orig) return copy; ptr = StringIO(orig); - if (check_strio(copy)) { - strio_free(DATA_PTR(copy)); + old_ptr = check_strio(copy); + if (old_ptr) { + old_string = old_ptr->string; + strio_free(old_ptr); } DATA_PTR(copy) = ptr; - OBJ_INFECT(copy, orig); + RB_OBJ_WRITTEN(copy, old_string, ptr->string); + RBASIC(copy)->flags &= ~STRIO_READWRITE; + RBASIC(copy)->flags |= RBASIC(orig)->flags & STRIO_READWRITE; ++ptr->count; return copy; } /* * call-seq: - * strio.lineno -> integer + * lineno -> current_line_number * - * Returns the current line number in *strio*. The stringio must be - * opened for reading. +lineno+ counts the number of times +gets+ is - * called, rather than the number of newlines encountered. The two - * values will differ if +gets+ is called with a separator other than - * newline. See also the <code>$.</code> variable. + * Returns the current line number in +self+; + * see {Line Number}[rdoc-ref:StringIO@Line+Number]. */ static VALUE strio_get_lineno(VALUE self) @@ -459,10 +757,10 @@ strio_get_lineno(VALUE self) /* * call-seq: - * strio.lineno = integer -> integer + * lineno = new_line_number -> new_line_number * - * Manually sets the current line number to the given value. - * <code>$.</code> is updated only on the next read. + * Sets the current line number in +self+ to the given +new_line_number+; + * see {Line Number}[rdoc-ref:StringIO@Line+Number]. */ static VALUE strio_set_lineno(VALUE self, VALUE lineno) @@ -471,43 +769,73 @@ strio_set_lineno(VALUE self, VALUE lineno) return lineno; } -/* call-seq: strio.binmode -> true */ -#define strio_binmode strio_self +/* + * call-seq: + * binmode -> self + * + * Sets the data mode in +self+ to binary mode; + * see {Data Mode}[rdoc-ref:StringIO@Data+Mode]. + * + */ +static VALUE +strio_binmode(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + rb_encoding *enc = rb_ascii8bit_encoding(); + + ptr->enc = enc; + if (WRITABLE(self)) { + rb_enc_associate(ptr->string, enc); + } + return self; +} -/* call-seq: strio.fcntl */ #define strio_fcntl strio_unimpl -/* call-seq: strio.flush -> strio */ #define strio_flush strio_self -/* call-seq: strio.fsync -> 0 */ #define strio_fsync strio_0 /* * call-seq: - * strio.reopen(other_StrIO) -> strio - * strio.reopen(string, mode) -> strio + * reopen(other, mode = 'r+') -> self + * + * Reinitializes the stream with the given +other+ (string or StringIO) and +mode+; + * see IO.new: + * + * StringIO.open('foo') do |strio| + * p strio.string + * strio.reopen('bar') + * p strio.string + * other_strio = StringIO.new('baz') + * strio.reopen(other_strio) + * p strio.string + * other_strio.close + * end + * + * Output: + * + * "foo" + * "bar" + * "baz" * - * Reinitializes *strio* with the given <i>other_StrIO</i> or _string_ - * and _mode_ (see StringIO#new). */ static VALUE strio_reopen(int argc, VALUE *argv, VALUE self) { - if (!OBJ_TAINTED(self)) rb_secure(4); - if (argc == 1 && TYPE(*argv) != T_STRING) { + rb_io_taint_check(self); + if (argc == 1 && !RB_TYPE_P(*argv, T_STRING)) { return strio_copy(self, *argv); } - strio_init(argc, argv, StringIO(self)); - return self; + return strio_init(argc, argv, StringIO(self), self); } /* * call-seq: - * strio.pos -> integer - * strio.tell -> integer + * pos -> stream_position * - * Returns the current offset (in bytes) of *strio*. + * Returns the current position (in bytes); + * see {Position}[rdoc-ref:StringIO@Position]. */ static VALUE strio_get_pos(VALUE self) @@ -517,9 +845,10 @@ strio_get_pos(VALUE self) /* * call-seq: - * strio.pos = integer -> integer + * pos = new_position -> new_position * - * Seeks to the given position (in bytes) in *strio*. + * Sets the current position (in bytes); + * see {Position}[rdoc-ref:StringIO@Position]. */ static VALUE strio_set_pos(VALUE self, VALUE pos) @@ -535,10 +864,11 @@ strio_set_pos(VALUE self, VALUE pos) /* * call-seq: - * strio.rewind -> 0 + * rewind -> 0 * - * Positions *strio* to the beginning of input, resetting - * +lineno+ to zero. + * Sets the current position and line number to zero; + * see {Position}[rdoc-ref:IO@Position] + * and {Line Number}[rdoc-ref:IO@Line+Number]. */ static VALUE strio_rewind(VALUE self) @@ -551,47 +881,53 @@ strio_rewind(VALUE self) /* * call-seq: - * strio.seek(amount, whence=SEEK_SET) -> 0 + * seek(offset, whence = SEEK_SET) -> 0 * - * Seeks to a given offset _amount_ in the stream according to - * the value of _whence_ (see IO#seek). + * Sets the position to the given integer +offset+ (in bytes), + * with respect to a given constant +whence+; + * see {IO#seek}[rdoc-ref:IO#seek]. */ static VALUE strio_seek(int argc, VALUE *argv, VALUE self) { VALUE whence; struct StringIO *ptr = StringIO(self); - long offset; + long amount, offset; rb_scan_args(argc, argv, "11", NULL, &whence); - offset = NUM2LONG(argv[0]); - if (CLOSED(ptr)) { + amount = NUM2LONG(argv[0]); + if (CLOSED(self)) { rb_raise(rb_eIOError, "closed stream"); } switch (NIL_P(whence) ? 0 : NUM2LONG(whence)) { case 0: + offset = 0; break; case 1: - offset += ptr->pos; + offset = ptr->pos; break; case 2: - offset += RSTRING_LEN(ptr->string); + if (NIL_P(ptr->string)) { + offset = 0; + } else { + offset = RSTRING_LEN(ptr->string); + } break; default: error_inval("invalid whence"); } - if (offset < 0) { + if (amount > LONG_MAX - offset || amount + offset < 0) { error_inval(0); } - ptr->pos = offset; + ptr->pos = amount + offset; return INT2FIX(0); } /* * call-seq: - * strio.sync -> true + * sync -> true * - * Returns +true+ always. + * Returns +true+; implemented only for compatibility with other stream classes. */ static VALUE strio_get_sync(VALUE self) @@ -600,63 +936,71 @@ strio_get_sync(VALUE self) return Qtrue; } -/* call-seq: strio.sync = boolean -> boolean */ #define strio_set_sync strio_first #define strio_tell strio_get_pos /* * call-seq: - * strio.each_byte {|byte| block } -> strio + * each_byte {|byte| ... } -> self * - * See IO#each_byte. + * :include: stringio/each_byte.rdoc + * + * Related: StringIO#each_char, StringIO#each_codepoint, StringIO#each_line. */ static VALUE strio_each_byte(VALUE self) { - struct StringIO *ptr = readable(StringIO(self)); - while (ptr->pos < RSTRING_LEN(ptr->string)) { + struct StringIO *ptr; + + RETURN_ENUMERATOR(self, 0, 0); + + while ((ptr = strio_to_read(self)) != NULL) { char c = RSTRING_PTR(ptr->string)[ptr->pos++]; rb_yield(CHR2FIX(c)); } - return Qnil; + return self; } /* * call-seq: - * strio.getc -> string or nil + * getc -> character, byte, or nil + * + * :include: stringio/getc.rdoc * - * See IO#getc. */ static VALUE strio_getc(VALUE self) { - struct StringIO *ptr = readable(StringIO(self)); - rb_encoding *enc = rb_enc_get(ptr->string); + struct StringIO *ptr = readable(self); + rb_encoding *enc = get_enc(ptr); + VALUE str = ptr->string; + long pos = ptr->pos; int len; char *p; - if (ptr->pos >= RSTRING_LEN(ptr->string)) { + if (eos_p(ptr)) { return Qnil; } - p = RSTRING_PTR(ptr->string)+ptr->pos; - len = rb_enc_mbclen(p, RSTRING_END(ptr->string), enc); + p = RSTRING_PTR(str)+pos; + len = rb_enc_mbclen(p, RSTRING_END(str), enc); ptr->pos += len; - return rb_enc_str_new(p, len, rb_enc_get(ptr->string)); + return enc_subseq(str, pos, len, enc); } /* * call-seq: - * strio.getbyte -> fixnum or nil + * getbyte -> integer or nil + * + * :include: stringio/getbyte.rdoc * - * See IO#getbyte. */ static VALUE strio_getbyte(VALUE self) { - struct StringIO *ptr = readable(StringIO(self)); + struct StringIO *ptr = readable(self); int c; - if (ptr->pos >= RSTRING_LEN(ptr->string)) { + if (eos_p(ptr)) { return Qnil; } c = RSTRING_PTR(ptr->string)[ptr->pos++]; @@ -668,6 +1012,9 @@ strio_extend(struct StringIO *ptr, long pos, long len) { long olen; + if (len > LONG_MAX - pos) + rb_raise(rb_eArgError, "string size too big"); + check_modifiable(ptr); olen = RSTRING_LEN(ptr->string); if (pos + len > olen) { @@ -675,84 +1022,201 @@ strio_extend(struct StringIO *ptr, long pos, long len) if (pos > olen) MEMZERO(RSTRING_PTR(ptr->string) + olen, char, pos - olen); } - else { - rb_str_modify(ptr->string); +} + +static void +strio_unget_string(struct StringIO *ptr, VALUE c) +{ + const char *cp = NULL; + long cl = RSTRING_LEN(c); + if (cl > 0) { + if (c != ptr->string) cp = RSTRING_PTR(c); + strio_unget_bytes(ptr, cp, cl); + RB_GC_GUARD(c); } } /* * call-seq: - * strio.ungetc(string) -> nil + * ungetc(character) -> nil * - * Pushes back one character (passed as a parameter) onto *strio* - * such that a subsequent buffered read will return it. Pushing back - * behind the beginning of the buffer string is not possible. Nothing - * will be done if such an attempt is made. - * In other case, there is no limitation for multiple pushbacks. + * Pushes back ("unshifts") a character or integer onto the stream; + * see {Character IO}[rdoc-ref:IO@Character+IO]. */ static VALUE strio_ungetc(VALUE self, VALUE c) { - struct StringIO *ptr = readable(StringIO(self)); - long lpos, clen; - char *p, *pend; - rb_encoding *enc; + struct StringIO *ptr = readable(self); + rb_encoding *enc, *enc2; + check_modifiable(ptr); + if (NIL_P(ptr->string)) return Qnil; if (NIL_P(c)) return Qnil; - if (FIXNUM_P(c)) { - int cc = FIX2INT(c); + if (RB_INTEGER_TYPE_P(c)) { + int len, cc = NUM2INT(c); char buf[16]; enc = rb_enc_get(ptr->string); + len = rb_enc_codelen(cc, enc); + if (len <= 0) { + rb_enc_uint_chr(cc, enc); /* to raise an exception */ + UNREACHABLE; + } rb_enc_mbcput(cc, buf, enc); - c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc); + return strio_unget_bytes(ptr, buf, len); } else { - SafeStringValue(c); - enc = rb_enc_check(ptr->string, c); + StringValue(c); + if (RSTRING_LEN(c) == 0) return Qnil; + enc = rb_enc_get(ptr->string); + enc2 = rb_enc_get(c); + if (enc != enc2 && enc != rb_ascii8bit_encoding()) { + c = rb_str_conv_enc(c, enc2, enc); + } + strio_unget_string(ptr, c); + return Qnil; } - /* get logical position */ - lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos - 1; - for (;;) { - clen = rb_enc_mbclen(p, pend, enc); - if (p+clen >= pend) break; - p += clen; - lpos++; +} + +/* + * call-seq: + * ungetbyte(byte) -> nil + * + * Pushes back ("unshifts") an 8-bit byte onto the stream; + * see {Byte IO}[rdoc-ref:IO@Byte+IO]. + */ +static VALUE +strio_ungetbyte(VALUE self, VALUE c) +{ + struct StringIO *ptr = readable(self); + + check_modifiable(ptr); + if (NIL_P(ptr->string)) return Qnil; + if (NIL_P(c)) return Qnil; + if (RB_INTEGER_TYPE_P(c)) { + /* rb_int_and() not visible from exts */ + VALUE v = rb_funcall(c, '&', 1, INT2FIX(0xff)); + const char cc = NUM2INT(v) & 0xFF; + strio_unget_bytes(ptr, &cc, 1); + } + else { + StringValue(c); + strio_unget_string(ptr, c); } - rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c); - ptr->pos = p - RSTRING_PTR(ptr->string); + return Qnil; +} +static VALUE +strio_unget_bytes(struct StringIO *ptr, const char *cp, long cl) +{ + long pos = ptr->pos, len, rest; + VALUE str = ptr->string; + char *s; + + len = RSTRING_LEN(str); + rest = pos - len; + if (cl > pos) { + long ex = cl - (rest < 0 ? pos : len); + rb_str_modify_expand(str, ex); + rb_str_set_len(str, len + ex); + s = RSTRING_PTR(str); + if (rest < 0) memmove(s + cl, s + pos, -rest); + pos = 0; + } + else { + if (rest > 0) { + rb_str_modify_expand(str, rest); + rb_str_set_len(str, len + rest); + } + s = RSTRING_PTR(str); + if (rest > cl) memset(s + len, 0, rest - cl); + pos -= cl; + } + memcpy(s + pos, (cp ? cp : s), cl); + ptr->pos = pos; return Qnil; } /* * call-seq: - * strio.readchar -> fixnum + * readchar -> string * - * See IO#readchar. + * Like +getc+, but raises an exception if already at end-of-stream; + * see {Character IO}[rdoc-ref:IO@Character+IO]. */ static VALUE strio_readchar(VALUE self) { - VALUE c = strio_getc(self); + VALUE c = rb_funcallv(self, rb_intern("getc"), 0, 0); if (NIL_P(c)) rb_eof_error(); return c; } /* * call-seq: - * strio.readbyte -> fixnum + * readbyte -> byte * - * See IO#readbyte. + * Like +getbyte+, but raises an exception if already at end-of-stream; + * see {Byte IO}[rdoc-ref:IO@Byte+IO]. */ static VALUE strio_readbyte(VALUE self) { - VALUE c = strio_getbyte(self); + VALUE c = rb_funcallv(self, rb_intern("getbyte"), 0, 0); if (NIL_P(c)) rb_eof_error(); return c; } +/* + * call-seq: + * each_char {|char| ... } -> self + * + * :include: stringio/each_char.rdoc + * + * Related: StringIO#each_byte, StringIO#each_codepoint, StringIO#each_line. + */ +static VALUE +strio_each_char(VALUE self) +{ + VALUE c; + + RETURN_ENUMERATOR(self, 0, 0); + + while (!NIL_P(c = strio_getc(self))) { + rb_yield(c); + } + return self; +} + +/* + * call-seq: + * each_codepoint {|codepoint| ... } -> self + * + * :include: stringio/each_codepoint.rdoc + * + * Related: StringIO#each_byte, StringIO#each_char, StringIO#each_line. + */ +static VALUE +strio_each_codepoint(VALUE self) +{ + struct StringIO *ptr; + rb_encoding *enc; + unsigned int c; + int n; + + RETURN_ENUMERATOR(self, 0, 0); + + ptr = readable(self); + enc = get_enc(ptr); + while ((ptr = strio_to_read(self)) != NULL) { + c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos, + RSTRING_END(ptr->string), &n, enc); + ptr->pos += n; + rb_yield(UINT2NUM(c)); + } + return self; +} + /* Boyer-Moore search: copied from regex.c */ static void bm_init_skip(long *skip, const char *pat, long m) @@ -786,77 +1250,158 @@ bm_search(const char *little, long llen, const char *big, long blen, const long return -1; } -static VALUE -strio_getline(int argc, VALUE *argv, struct StringIO *ptr) +struct getline_arg { + VALUE rs; + long limit; + unsigned int chomp: 1; +}; + +static struct getline_arg * +prepare_getline_args(struct StringIO *ptr, struct getline_arg *arg, int argc, VALUE *argv) { - const char *s, *e, *p; - long n, limit = 0; - VALUE str; + VALUE rs, lim, opts; + long limit = -1; + int respect_chomp; - if (argc == 0) { - str = rb_rs; - } - else { - VALUE lim, tmp; + argc = rb_scan_args(argc, argv, "02:", &rs, &lim, &opts); + respect_chomp = argc == 0 || !NIL_P(rs); + switch (argc) { + case 0: + rs = rb_rs; + break; - rb_scan_args(argc, argv, "11", &str, &lim); - if (!NIL_P(lim)) limit = NUM2LONG(lim); - else if (!NIL_P(str) && TYPE(str) != T_STRING) { - tmp = rb_check_string_type(str); + case 1: + if (!NIL_P(rs) && !RB_TYPE_P(rs, T_STRING)) { + VALUE tmp = rb_check_string_type(rs); if (NIL_P(tmp)) { - limit = NUM2LONG(str); - if (limit == 0) return rb_str_new(0,0); - str = rb_rs; + limit = NUM2LONG(rs); + rs = rb_rs; } else { - str = tmp; + rs = tmp; } } - else { - StringValue(str); + break; + + case 2: + if (!NIL_P(rs)) StringValue(rs); + if (!NIL_P(lim)) limit = NUM2LONG(lim); + break; + } + if (!NIL_P(ptr->string) && !NIL_P(rs)) { + rb_encoding *enc_rs, *enc_io; + enc_rs = rb_enc_get(rs); + enc_io = get_enc(ptr); + if (enc_rs != enc_io && + (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT || + (RSTRING_LEN(rs) > 0 && !rb_enc_asciicompat(enc_io)))) { + if (rs == rb_rs) { + rs = rb_enc_str_new(0, 0, enc_io); + rb_str_buf_cat_ascii(rs, "\n"); + rs = rs; + } + else { + rb_raise(rb_eArgError, "encoding mismatch: %s IO with %s RS", + rb_enc_name(enc_io), + rb_enc_name(enc_rs)); + } + } + } + arg->rs = rs; + arg->limit = limit; + arg->chomp = 0; + if (!NIL_P(opts)) { + static ID keywords[1]; + VALUE vchomp; + if (!keywords[0]) { + keywords[0] = rb_intern_const("chomp"); + } + rb_get_kwargs(opts, keywords, 0, 1, &vchomp); + if (respect_chomp) { + arg->chomp = (vchomp != Qundef) && RTEST(vchomp); } } + return arg; +} + +static inline int +chomp_newline_width(const char *s, const char *e) +{ + if (e > s && *--e == '\n') { + if (e > s && *--e == '\r') return 2; + return 1; + } + return 0; +} - if (ptr->pos >= (n = RSTRING_LEN(ptr->string))) { +static VALUE +strio_getline(struct getline_arg *arg, struct StringIO *ptr) +{ + const char *s, *e, *p; + long n, limit = arg->limit; + VALUE str = arg->rs; + long w = 0; + rb_encoding *enc = get_enc(ptr); + + if (NIL_P(ptr->string) || ptr->pos >= (n = RSTRING_LEN(ptr->string))) { return Qnil; } s = RSTRING_PTR(ptr->string); e = s + RSTRING_LEN(ptr->string); s += ptr->pos; - if (limit > 0 && s + limit < e) { - e = s + limit; + if (limit > 0 && (size_t)limit < (size_t)(e - s)) { + e = rb_enc_right_char_head(s, s + limit, e, get_enc(ptr)); } if (NIL_P(str)) { - str = strio_substr(ptr, ptr->pos, e - s); + if (arg->chomp) { + w = chomp_newline_width(s, e); + } + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } else if ((n = RSTRING_LEN(str)) == 0) { + const char *paragraph_end = NULL; p = s; - while (*p == '\n') { + while (p[(p + 1 < e) && (*p == '\r') && 0] == '\n') { + p += *p == '\r'; if (++p == e) { return Qnil; } } s = p; while ((p = memchr(p, '\n', e - p)) && (p != e)) { - if (*++p == '\n') { - e = p; - break; + p++; + if (!((p < e && *p == '\n') || + (p + 1 < e && *p == '\r' && *(p+1) == '\n'))) { + continue; + } + paragraph_end = p - ((*(p-2) == '\r') ? 2 : 1); + while ((p < e && *p == '\n') || + (p + 1 < e && *p == '\r' && *(p+1) == '\n')) { + p += (*p == '\r') ? 2 : 1; } + e = p; + break; } - str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s); + if (arg->chomp && paragraph_end) { + w = e - paragraph_end; + } + str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s - w, enc); } else if (n == 1) { if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) { e = p + 1; + w = (arg->chomp ? (p > s && *(p-1) == '\r') + 1 : 0); } - str = strio_substr(ptr, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } else { - if (n < e - s) { - if (e - s < 1024) { + if (n < e - s + arg->chomp) { + /* unless chomping, RS at the end does not matter */ + if (e - s < 1024 || n == e - s) { for (p = s; p + n <= e; ++p) { if (MEMCMP(p, RSTRING_PTR(str), char, n) == 0) { e = p + n; + w = (arg->chomp ? n : 0); break; } } @@ -866,11 +1411,11 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) p = RSTRING_PTR(str); bm_init_skip(skip, p, n); if ((pos = bm_search(p, n, s, e - s, skip)) >= 0) { - e = s + pos + n; + e = s + pos + (arg->chomp ? 0 : n); } } } - str = strio_substr(ptr, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } ptr->pos = e - RSTRING_PTR(ptr->string); ptr->lineno++; @@ -879,55 +1424,72 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) /* * call-seq: - * strio.gets(sep=$/) -> string or nil - * strio.gets(limit) -> string or nil - * strio.gets(sep, limit) -> string or nil + * gets(sep = $/, chomp: false) -> string or nil + * gets(limit, chomp: false) -> string or nil + * gets(sep, limit, chomp: false) -> string or nil + * + * :include: stringio/gets.rdoc * - * See IO#gets. */ static VALUE strio_gets(int argc, VALUE *argv, VALUE self) { - VALUE str = strio_getline(argc, argv, readable(StringIO(self))); + struct StringIO *ptr = readable(self); + struct getline_arg arg; + VALUE str; + if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) { + if (NIL_P(ptr->string)) return Qnil; + return rb_enc_str_new(0, 0, get_enc(ptr)); + } + + str = strio_getline(&arg, ptr); rb_lastline_set(str); return str; } /* * call-seq: - * strio.readline(sep=$/) -> string - * strio.readline(limit) -> string or nil - * strio.readline(sep, limit) -> string or nil + * readline(sep = $/, chomp: false) -> string + * readline(limit, chomp: false) -> string + * readline(sep, limit, chomp: false) -> string * - * See IO#readline. + * Reads a line as with IO#gets, but raises EOFError if already at end-of-file; + * see {Line IO}[rdoc-ref:IO@Line+IO]. */ static VALUE strio_readline(int argc, VALUE *argv, VALUE self) { - VALUE line = strio_getline(argc, argv, readable(StringIO(self))); + VALUE line = rb_funcallv_kw(self, rb_intern("gets"), argc, argv, RB_PASS_CALLED_KEYWORDS); if (NIL_P(line)) rb_eof_error(); return line; } /* + * :markup: markdown + * * call-seq: - * strio.each(sep=$/) {|line| block } -> strio - * strio.each(limit) {|line| block } -> strio - * strio.each(sep, limit) {|line| block } -> strio - * strio.each_line(sep=$/) {|line| block } -> strio - * strio.each_line(limit) {|line| block } -> strio - * strio.each_line(sep,limit) {|line| block } -> strio - * - * See IO#each. + * each_line(sep = $/, chomp: false) {|line| ... } -> self + * each_line(limit, chomp: false) {|line| ... } -> self + * each_line(sep, limit, chomp: false) {|line| ... } -> self + * + * :include: stringio/each_line.md + * */ static VALUE strio_each(int argc, VALUE *argv, VALUE self) { - struct StringIO *ptr = StringIO(self); VALUE line; + struct StringIO *ptr = readable(self); + struct getline_arg arg; - while (!NIL_P(line = strio_getline(argc, argv, readable(ptr)))) { + RETURN_ENUMERATOR(self, argc, argv); + + if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) { + rb_raise(rb_eArgError, "invalid limit: 0 for each_line"); + } + + while (!NIL_P(line = strio_getline(&arg, ptr))) { rb_yield(line); } return self; @@ -935,18 +1497,25 @@ strio_each(int argc, VALUE *argv, VALUE self) /* * call-seq: - * strio.readlines(sep=$/) -> array - * strio.readlines(limit) -> array - * strio.readlines(sep,limit) -> array + * strio.readlines(sep=$/, chomp: false) -> array + * strio.readlines(limit, chomp: false) -> array + * strio.readlines(sep, limit, chomp: false) -> array * * See IO#readlines. */ static VALUE strio_readlines(int argc, VALUE *argv, VALUE self) { - struct StringIO *ptr = StringIO(self); - VALUE ary = rb_ary_new(), line; - while (!NIL_P(line = strio_getline(argc, argv, readable(ptr)))) { + VALUE ary, line; + struct StringIO *ptr = readable(self); + struct getline_arg arg; + + if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) { + rb_raise(rb_eArgError, "invalid limit: 0 for readlines"); + } + + ary = rb_ary_new(); + while (!NIL_P(line = strio_getline(&arg, ptr))) { rb_ary_push(ary, line); } return ary; @@ -954,40 +1523,69 @@ strio_readlines(int argc, VALUE *argv, VALUE self) /* * call-seq: - * strio.write(string) -> integer - * strio.syswrite(string) -> integer + * strio.write(string, ...) -> integer + * strio.syswrite(string) -> integer * - * Appends the given string to the underlying buffer string of *strio*. + * Appends the given string to the underlying buffer string. * The stream must be opened for writing. If the argument is not a * string, it will be converted to a string using <code>to_s</code>. * Returns the number of bytes written. See IO#write. */ static VALUE +strio_write_m(int argc, VALUE *argv, VALUE self) +{ + long len = 0; + while (argc-- > 0) { + /* StringIO can't exceed long limit */ + len += strio_write(self, *argv++); + } + return LONG2NUM(len); +} + +static long strio_write(VALUE self, VALUE str) { - struct StringIO *ptr = writable(StringIO(self)); + struct StringIO *ptr = writable(self); long len, olen; + rb_encoding *enc, *enc2; + rb_encoding *const ascii8bit = rb_ascii8bit_encoding(); + rb_encoding *usascii = 0; - if (TYPE(str) != T_STRING) + if (!RB_TYPE_P(str, T_STRING)) str = rb_obj_as_string(str); + enc = get_enc(ptr); + if (!enc) return 0; + enc2 = rb_enc_get(str); + if (enc != enc2 && enc != ascii8bit && enc != (usascii = rb_usascii_encoding())) { + VALUE converted = rb_str_conv_enc(str, enc2, enc); + if (converted == str && enc2 != ascii8bit && enc2 != usascii) { /* conversion failed */ + rb_enc_check(rb_enc_from_encoding(enc), str); + } + str = converted; + } len = RSTRING_LEN(str); - if (len == 0) return INT2FIX(0); + if (len == 0) return 0; check_modifiable(ptr); olen = RSTRING_LEN(ptr->string); if (ptr->flags & FMODE_APPEND) { ptr->pos = olen; } if (ptr->pos == olen) { - rb_str_cat(ptr->string, RSTRING_PTR(str), len); + if (enc == ascii8bit || enc2 == ascii8bit) { + rb_enc_str_buf_cat(ptr->string, RSTRING_PTR(str), len, enc); + } + else { + rb_str_buf_append(ptr->string, str); + } } else { strio_extend(ptr, ptr->pos, len); + rb_str_modify(ptr->string); memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len); - OBJ_INFECT(ptr->string, str); } - OBJ_INFECT(ptr->string, self); + RB_GC_GUARD(str); ptr->pos += len; - return LONG2NUM(len); + return len; } /* @@ -1017,25 +1615,28 @@ strio_write(VALUE self, VALUE str) /* * call-seq: - * strio.putc(obj) -> obj + * putc(object) -> object + * + * :include: stringio/putc.rdoc * - * See IO#putc. */ static VALUE strio_putc(VALUE self, VALUE ch) { - struct StringIO *ptr = writable(StringIO(self)); - int c = NUM2CHR(ch); - long olen; + struct StringIO *ptr = writable(self); + VALUE str; check_modifiable(ptr); - olen = RSTRING_LEN(ptr->string); - if (ptr->flags & FMODE_APPEND) { - ptr->pos = olen; + if (RB_TYPE_P(ch, T_STRING)) { + if (NIL_P(ptr->string)) return ch; + str = rb_str_substr(ch, 0, 1); + } + else { + char c = NUM2CHR(ch); + if (NIL_P(ptr->string)) return ch; + str = rb_str_new(&c, 1); } - strio_extend(ptr, ptr->pos, 1); - RSTRING_PTR(ptr->string)[ptr->pos++] = c; - OBJ_INFECT(ptr->string, self); + strio_write(self, str); return ch; } @@ -1049,45 +1650,53 @@ strio_putc(VALUE self, VALUE ch) /* * call-seq: - * strio.read([length [, buffer]]) -> string, buffer, or nil + * read(maxlen = nil, out_string = nil) → new_string, out_string, or nil + * + * :include: stringio/read.rdoc * - * See IO#read. */ static VALUE strio_read(int argc, VALUE *argv, VALUE self) { - struct StringIO *ptr = readable(StringIO(self)); + struct StringIO *ptr = readable(self); VALUE str = Qnil; - long len, olen; + long len; + int binary = 0; switch (argc) { case 2: str = argv[1]; - StringValue(str); - rb_str_modify(str); + if (!NIL_P(str)) { + StringValue(str); + rb_str_modify(str); + } + /* fall through */ case 1: if (!NIL_P(argv[0])) { - len = olen = NUM2LONG(argv[0]); + len = NUM2LONG(argv[0]); if (len < 0) { rb_raise(rb_eArgError, "negative length %ld given", len); } - if (len > 0 && ptr->pos >= RSTRING_LEN(ptr->string)) { + if (eos_p(ptr)) { if (!NIL_P(str)) rb_str_resize(str, 0); - return Qnil; + return len > 0 ? Qnil : rb_str_new(0, 0); } + binary = 1; break; } /* fall through */ case 0: - olen = -1; + if (NIL_P(ptr->string)) return Qnil; len = RSTRING_LEN(ptr->string); if (len <= ptr->pos) { + rb_encoding *enc = get_enc(ptr); if (NIL_P(str)) { str = rb_str_new(0, 0); } else { rb_str_resize(str, 0); } + rb_enc_associate(str, enc); return str; } else { @@ -1095,30 +1704,78 @@ strio_read(int argc, VALUE *argv, VALUE self) } break; default: - rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + rb_error_arity(argc, 0, 2); } if (NIL_P(str)) { - str = strio_substr(ptr, ptr->pos, len); + rb_encoding *enc = binary ? rb_ascii8bit_encoding() : get_enc(ptr); + str = strio_substr(ptr, ptr->pos, len, enc); } else { long rest = RSTRING_LEN(ptr->string) - ptr->pos; if (len > rest) len = rest; rb_str_resize(str, len); MEMCPY(RSTRING_PTR(str), RSTRING_PTR(ptr->string) + ptr->pos, char, len); + if (!binary) { + rb_enc_copy(str, ptr->string); + } } - if (NIL_P(str)) { - str = rb_str_new(0, 0); - len = 0; + ptr->pos += RSTRING_LEN(str); + return str; +} + +/* + * call-seq: + * pread(maxlen, offset, out_string = nil) -> new_string or out_string + * + * :include: stringio/pread.rdoc + * + */ +static VALUE +strio_pread(int argc, VALUE *argv, VALUE self) +{ + VALUE rb_len, rb_offset, rb_buf; + rb_scan_args(argc, argv, "21", &rb_len, &rb_offset, &rb_buf); + long len = NUM2LONG(rb_len); + long offset = NUM2LONG(rb_offset); + + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big): %" PRIsVALUE, rb_len); } - else { - ptr->pos += len = RSTRING_LEN(str); + + if (len == 0) { + if (NIL_P(rb_buf)) { + return rb_str_new("", 0); + } + return rb_buf; } - return str; + + if (offset < 0) { + rb_syserr_fail_str(EINVAL, rb_sprintf("pread: Invalid offset argument: %" PRIsVALUE, rb_offset)); + } + + struct StringIO *ptr = readable(self); + + if (outside_p(ptr, offset)) { + rb_eof_error(); + } + + if (NIL_P(rb_buf)) { + return strio_substr(ptr, offset, len, rb_ascii8bit_encoding()); + } + + long rest = RSTRING_LEN(ptr->string) - offset; + if (len > rest) len = rest; + rb_str_resize(rb_buf, len); + rb_enc_associate(rb_buf, rb_ascii8bit_encoding()); + MEMCPY(RSTRING_PTR(rb_buf), RSTRING_PTR(ptr->string) + offset, char, len); + return rb_buf; } + /* * call-seq: * strio.sysread(integer[, outbuf]) -> string + * strio.readpartial(integer[, outbuf]) -> string * * Similar to #read, but raises +EOFError+ at end of string instead of * returning +nil+, as well as IO#sysread does. @@ -1126,44 +1783,79 @@ strio_read(int argc, VALUE *argv, VALUE self) static VALUE strio_sysread(int argc, VALUE *argv, VALUE self) { - VALUE val = strio_read(argc, argv, self); - if (NIL_P(val) || RSTRING_LEN(val) == 0) { + VALUE val = rb_funcallv_kw(self, rb_intern("read"), argc, argv, RB_PASS_CALLED_KEYWORDS); + if (NIL_P(val)) { rb_eof_error(); } return val; } -#define strio_syswrite strio_write - -/* call-seq: strio.path -> nil */ -#define strio_path strio_nil - /* * call-seq: - * strio.isatty -> nil - * strio.tty? -> nil + * strio.read_nonblock(integer[, outbuf [, opts]]) -> string * + * Similar to #read, but raises +EOFError+ at end of string unless the + * +exception: false+ option is passed in. */ +static VALUE +strio_read_nonblock(int argc, VALUE *argv, VALUE self) +{ + VALUE opts = Qnil, val; + + rb_scan_args(argc, argv, "11:", NULL, NULL, &opts); + + if (!NIL_P(opts)) { + argc--; + } + + val = strio_read(argc, argv, self); + if (NIL_P(val)) { + if (!NIL_P(opts) && + rb_hash_lookup2(opts, sym_exception, Qundef) == Qfalse) + return Qnil; + else + rb_eof_error(); + } + + return val; +} + +/* + * See IO#write + */ +#define strio_syswrite rb_io_write + +/* + * See IO#write_nonblock + */ +static VALUE +strio_syswrite_nonblock(int argc, VALUE *argv, VALUE self) +{ + VALUE str; + + rb_scan_args(argc, argv, "10:", &str, NULL); + return strio_syswrite(self, str); +} + #define strio_isatty strio_false -/* call-seq: strio.pid -> nil */ #define strio_pid strio_nil -/* call-seq: strio.fileno -> nil */ #define strio_fileno strio_nil /* * call-seq: - * strio.size -> integer + * size -> integer + * + * :include: stringio/size.rdoc * - * Returns the size of the buffer string. */ static VALUE strio_size(VALUE self) { VALUE string = StringIO(self)->string; if (NIL_P(string)) { - rb_raise(rb_eIOError, "not opened"); + return INT2FIX(0); } return ULONG2NUM(RSTRING_LEN(string)); } @@ -1172,35 +1864,149 @@ strio_size(VALUE self) * call-seq: * strio.truncate(integer) -> 0 * - * Truncates the buffer string to at most _integer_ bytes. The *strio* + * Truncates the buffer string to at most _integer_ bytes. The stream * must be opened for writing. */ static VALUE strio_truncate(VALUE self, VALUE len) { - VALUE string = writable(StringIO(self))->string; + VALUE string = writable(self)->string; long l = NUM2LONG(len); - long plen = RSTRING_LEN(string); + long plen; if (l < 0) { - error_inval("negative legnth"); + error_inval("negative length"); } + if (NIL_P(string)) return 0; + plen = RSTRING_LEN(string); rb_str_resize(string, l); if (plen < l) { MEMZERO(RSTRING_PTR(string) + plen, char, l - plen); } - return len; + return INT2FIX(0); +} + +/* + * call-seq: + * external_encoding -> encoding or nil + * + * Returns an Encoding object that represents the encoding of the string; + * see {Encodings}[rdoc-ref:StringIO@Encodings]: + * + * strio = StringIO.new('foo') + * strio.external_encoding # => #<Encoding:UTF-8> + * + * Returns +nil+ if +self+ has no string and is in write mode: + * + * strio = StringIO.new(nil, 'w+') + * strio.external_encoding # => nil + * + */ + +static VALUE +strio_external_encoding(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + return rb_enc_from_encoding(get_enc(ptr)); +} + +/* + * call-seq: + * internal_encoding -> nil + * + * Returns +nil+; for compatibility with IO. + */ + +static VALUE +strio_internal_encoding(VALUE self) +{ + return Qnil; } /* - * Pseudo I/O on String object. + * call-seq: + * strio.set_encoding(ext_enc, [int_enc[, opt]]) => strio + * + * Specify the encoding of the StringIO as <i>ext_enc</i>. + * Use the default external encoding if <i>ext_enc</i> is nil. + * 2nd argument <i>int_enc</i> and optional hash <i>opt</i> argument + * are ignored; they are for API compatibility to IO. + */ + +static VALUE +strio_set_encoding(int argc, VALUE *argv, VALUE self) +{ + rb_encoding* enc; + struct StringIO *ptr = StringIO(self); + VALUE ext_enc, int_enc, opt; + + argc = rb_scan_args(argc, argv, "11:", &ext_enc, &int_enc, &opt); + + if (NIL_P(ext_enc)) { + enc = rb_default_external_encoding(); + } + else { + enc = rb_find_encoding(ext_enc); + if (!enc) { + rb_io_enc_t convconfig; + int oflags; + rb_io_mode_t fmode; + VALUE vmode = rb_str_append(rb_str_new_cstr("r:"), ext_enc); + rb_io_extract_modeenc(&vmode, 0, Qnil, &oflags, &fmode, &convconfig); + enc = convconfig.enc2; + } + } + ptr->enc = enc; + if (!NIL_P(ptr->string) && WRITABLE(self) && !str_chilled_p(ptr->string)) { + rb_enc_associate(ptr->string, enc); + } + + return self; +} + +/* + * call-seq: + * strio.set_encoding_by_bom => strio or nil + * + * Sets the encoding according to the BOM (Byte Order Mark) in the + * string. + * + * Returns +self+ if the BOM is found, otherwise +nil. + */ +static VALUE +strio_set_encoding_by_bom(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + + if (!set_encoding_by_bom(ptr)) return Qnil; + return rb_enc_from_encoding(ptr->enc); +} + +/* + * :markup: markdown + * + * :include: stringio/stringio.md */ void -Init_stringio() +Init_stringio(void) { - VALUE StringIO = rb_define_class("StringIO", rb_cData); +#undef rb_intern + +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + + VALUE StringIO = rb_define_class("StringIO", rb_cObject); + + /* The version string */ + rb_define_const(StringIO, "VERSION", rb_str_new_cstr(STRINGIO_VERSION)); rb_include_module(StringIO, rb_mEnumerable); rb_define_alloc_func(StringIO, strio_s_allocate); + + /* Maximum length that a StringIO instance can hold */ + rb_define_const(StringIO, "MAX_LENGTH", LONG2NUM(LONG_MAX)); + + rb_define_singleton_method(StringIO, "new", strio_s_new, -1); rb_define_singleton_method(StringIO, "open", strio_s_open, -1); rb_define_method(StringIO, "initialize", strio_initialize, -1); rb_define_method(StringIO, "initialize_copy", strio_copy, 1); @@ -1211,6 +2017,8 @@ Init_stringio() rb_define_method(StringIO, "lineno", strio_get_lineno, 0); rb_define_method(StringIO, "lineno=", strio_set_lineno, 1); + + /* call-seq: strio.binmode -> true */ rb_define_method(StringIO, "binmode", strio_binmode, 0); rb_define_method(StringIO, "close", strio_close, 0); rb_define_method(StringIO, "close_read", strio_close_read, 0); @@ -1220,46 +2028,85 @@ Init_stringio() rb_define_method(StringIO, "closed_write?", strio_closed_write, 0); rb_define_method(StringIO, "eof", strio_eof, 0); rb_define_method(StringIO, "eof?", strio_eof, 0); + /* call-seq: strio.fcntl */ rb_define_method(StringIO, "fcntl", strio_fcntl, -1); + /* call-seq: strio.flush -> strio */ rb_define_method(StringIO, "flush", strio_flush, 0); + /* call-seq: strio.fsync -> 0 */ rb_define_method(StringIO, "fsync", strio_fsync, 0); rb_define_method(StringIO, "pos", strio_get_pos, 0); rb_define_method(StringIO, "pos=", strio_set_pos, 1); rb_define_method(StringIO, "rewind", strio_rewind, 0); rb_define_method(StringIO, "seek", strio_seek, -1); rb_define_method(StringIO, "sync", strio_get_sync, 0); + /* call-seq: strio.sync = boolean -> boolean */ rb_define_method(StringIO, "sync=", strio_set_sync, 1); rb_define_method(StringIO, "tell", strio_tell, 0); - rb_define_method(StringIO, "path", strio_path, 0); rb_define_method(StringIO, "each", strio_each, -1); - rb_define_method(StringIO, "each_byte", strio_each_byte, 0); rb_define_method(StringIO, "each_line", strio_each, -1); + rb_define_method(StringIO, "each_byte", strio_each_byte, 0); + rb_define_method(StringIO, "each_char", strio_each_char, 0); + rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0); rb_define_method(StringIO, "getc", strio_getc, 0); rb_define_method(StringIO, "ungetc", strio_ungetc, 1); - rb_define_method(StringIO, "readchar", strio_readchar, 0); + rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1); rb_define_method(StringIO, "getbyte", strio_getbyte, 0); - rb_define_method(StringIO, "readbyte", strio_readbyte, 0); rb_define_method(StringIO, "gets", strio_gets, -1); - rb_define_method(StringIO, "readline", strio_readline, -1); rb_define_method(StringIO, "readlines", strio_readlines, -1); rb_define_method(StringIO, "read", strio_read, -1); - rb_define_method(StringIO, "sysread", strio_sysread, -1); - rb_define_method(StringIO, "readpartial", strio_sysread, -1); + rb_define_method(StringIO, "pread", strio_pread, -1); - rb_define_method(StringIO, "write", strio_write, 1); - rb_define_method(StringIO, "<<", strio_addstr, 1); - rb_define_method(StringIO, "print", strio_print, -1); - rb_define_method(StringIO, "printf", strio_printf, -1); + rb_define_method(StringIO, "write", strio_write_m, -1); rb_define_method(StringIO, "putc", strio_putc, 1); - rb_define_method(StringIO, "puts", strio_puts, -1); - rb_define_method(StringIO, "syswrite", strio_syswrite, 1); + /* + * call-seq: + * strio.isatty -> nil + * strio.tty? -> nil + * + */ rb_define_method(StringIO, "isatty", strio_isatty, 0); rb_define_method(StringIO, "tty?", strio_isatty, 0); + + /* call-seq: strio.pid -> nil */ rb_define_method(StringIO, "pid", strio_pid, 0); + + /* call-seq: strio.fileno -> nil */ rb_define_method(StringIO, "fileno", strio_fileno, 0); rb_define_method(StringIO, "size", strio_size, 0); rb_define_method(StringIO, "length", strio_size, 0); rb_define_method(StringIO, "truncate", strio_truncate, 1); + + rb_define_method(StringIO, "external_encoding", strio_external_encoding, 0); + rb_define_method(StringIO, "internal_encoding", strio_internal_encoding, 0); + rb_define_method(StringIO, "set_encoding", strio_set_encoding, -1); + rb_define_method(StringIO, "set_encoding_by_bom", strio_set_encoding_by_bom, 0); + + { + /* :stopdoc: */ + VALUE mReadable = rb_define_module_under(rb_cIO, "generic_readable"); + /* :startdoc: */ + rb_define_method(mReadable, "readchar", strio_readchar, 0); + rb_define_method(mReadable, "readbyte", strio_readbyte, 0); + rb_define_method(mReadable, "readline", strio_readline, -1); + rb_define_method(mReadable, "sysread", strio_sysread, -1); + rb_define_method(mReadable, "readpartial", strio_sysread, -1); + rb_define_method(mReadable, "read_nonblock", strio_read_nonblock, -1); + rb_include_module(StringIO, mReadable); + } + { + /* :stopdoc: */ + VALUE mWritable = rb_define_module_under(rb_cIO, "generic_writable"); + /* :startdoc: */ + rb_define_method(mWritable, "<<", strio_addstr, 1); + rb_define_method(mWritable, "print", strio_print, -1); + rb_define_method(mWritable, "printf", strio_printf, -1); + rb_define_method(mWritable, "puts", strio_puts, -1); + rb_define_method(mWritable, "syswrite", strio_syswrite, 1); + rb_define_method(mWritable, "write_nonblock", strio_syswrite_nonblock, -1); + rb_include_module(StringIO, mWritable); + } + + sym_exception = ID2SYM(rb_intern("exception")); } |
