diff options
Diffstat (limited to 'ext/stringio/stringio.c')
| -rw-r--r-- | ext/stringio/stringio.c | 758 |
1 files changed, 497 insertions, 261 deletions
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index ecd7b07203..a2aef6b11c 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -1,3 +1,4 @@ +/* -*- mode: c; indent-tabs-mode: t -*- */ /********************************************************************** stringio.c - @@ -11,6 +12,8 @@ **********************************************************************/ +#define STRINGIO_VERSION "3.0.1.2" + #include "ruby.h" #include "ruby/io.h" #include "ruby/encoding.h" @@ -20,18 +23,106 @@ #include <sys/fcntl.h> #endif +#ifndef RB_INTEGER_TYPE_P +# define RB_INTEGER_TYPE_P(c) (FIXNUM_P(c) || RB_TYPE_P(c, T_BIGNUM)) +#endif + +#ifndef RB_PASS_CALLED_KEYWORDS +# define rb_funcallv_kw(recv, mid, arg, argv, kw_splat) rb_funcallv(recv, mid, arg, argv) +# define rb_class_new_instance_kw(argc, argv, klass, kw_splat) rb_class_new_instance(argc, argv, klass) +#endif + +#ifndef HAVE_RB_IO_EXTRACT_MODEENC +#define rb_io_extract_modeenc strio_extract_modeenc +static void +strio_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, + int *oflags_p, int *fmode_p, struct rb_io_enc_t *convconfig_p) +{ + VALUE mode = *vmode_p; + VALUE intmode; + int fmode; + int has_enc = 0, has_vmode = 0; + + convconfig_p->enc = convconfig_p->enc2 = 0; + + vmode_handle: + if (NIL_P(mode)) { + fmode = FMODE_READABLE; + } + else if (!NIL_P(intmode = rb_check_to_integer(mode, "to_int"))) { + int flags = NUM2INT(intmode); + fmode = rb_io_oflags_fmode(flags); + } + else { + const char *m = StringValueCStr(mode), *n, *e; + fmode = rb_io_modestr_fmode(m); + n = strchr(m, ':'); + if (n) { + long len; + char encname[ENCODING_MAXNAMELEN+1]; + has_enc = 1; + if (fmode & FMODE_SETENC_BY_BOM) { + n = strchr(n, '|'); + } + e = strchr(++n, ':'); + len = e ? e - n : (long)strlen(n); + if (len > 0 && len <= ENCODING_MAXNAMELEN) { + if (e) { + memcpy(encname, n, len); + encname[len] = '\0'; + n = encname; + } + convconfig_p->enc = rb_enc_find(n); + } + if (e && (len = strlen(++e)) > 0 && len <= ENCODING_MAXNAMELEN) { + convconfig_p->enc2 = rb_enc_find(e); + } + } + } + + if (!NIL_P(opthash)) { + rb_encoding *extenc = 0, *intenc = 0; + VALUE v; + if (!has_vmode) { + ID id_mode; + CONST_ID(id_mode, "mode"); + v = rb_hash_aref(opthash, ID2SYM(id_mode)); + if (!NIL_P(v)) { + if (!NIL_P(mode)) { + rb_raise(rb_eArgError, "mode specified twice"); + } + has_vmode = 1; + mode = v; + goto vmode_handle; + } + } + + if (rb_io_extract_encoding_option(opthash, &extenc, &intenc, &fmode)) { + if (has_enc) { + rb_raise(rb_eArgError, "encoding specified twice"); + } + } + } + *fmode_p = fmode; +} +#endif + struct StringIO { VALUE string; + rb_encoding *enc; long pos; long lineno; int flags; int count; }; -static void strio_init(int, VALUE *, struct StringIO *, VALUE); +static VALUE strio_init(int, VALUE *, struct StringIO *, VALUE); +static VALUE strio_unget_bytes(struct StringIO *, const char *, long); +static long strio_write(VALUE self, VALUE str); #define IS_STRIO(obj) (rb_typeddata_is_kind_of((obj), &strio_data_type)) -#define error_inval(msg) (errno = EINVAL, rb_sys_fail(msg)) +#define error_inval(msg) (rb_syserr_fail(EINVAL, msg)) +#define get_enc(ptr) ((ptr)->enc ? (ptr)->enc : rb_enc_get((ptr)->string)) static struct StringIO * strio_alloc(void) @@ -49,9 +140,8 @@ static void strio_mark(void *p) { struct StringIO *ptr = p; - if (ptr) { - rb_gc_mark(ptr->string); - } + + rb_gc_mark(ptr->string); } static void @@ -66,8 +156,6 @@ strio_free(void *p) static size_t strio_memsize(const void *p) { - const struct StringIO *ptr = p; - if (!ptr) return 0; return sizeof(struct StringIO); } @@ -95,16 +183,23 @@ get_strio(VALUE self) } static VALUE -strio_substr(struct StringIO *ptr, long pos, long len) +enc_subseq(VALUE str, long pos, long len, rb_encoding *enc) +{ + str = rb_str_subseq(str, pos, len); + rb_enc_associate(str, enc); + return str; +} + +static VALUE +strio_substr(struct StringIO *ptr, long pos, long len, rb_encoding *enc) { VALUE str = ptr->string; - rb_encoding *enc = rb_enc_get(str); long rlen = RSTRING_LEN(str) - pos; if (len > rlen) len = rlen; if (len < 0) len = 0; - if (len == 0) return rb_str_new(0,0); - return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc); + if (len == 0) return rb_enc_str_new(0, 0, enc); + return enc_subseq(str, pos, len, enc); } #define StringIO(obj) get_strio(obj) @@ -139,8 +234,6 @@ writable(VALUE strio) if (!WRITABLE(strio)) { rb_raise(rb_eIOError, "not opened for writing"); } - if (!OBJ_TAINTED(ptr->string)) { - } return ptr; } @@ -172,50 +265,116 @@ strio_initialize(int argc, VALUE *argv, VALUE self) DATA_PTR(self) = ptr = strio_alloc(); } rb_call_super(0, 0); - strio_init(argc, argv, ptr, self); - return self; + return strio_init(argc, argv, ptr, self); } -static void -strio_init(int argc, VALUE *argv, struct StringIO *ptr, VALUE self) +static int +detect_bom(VALUE str, int *bomlen) { - VALUE string, mode; - int trunc = 0; + const char *p; + long len; - switch (rb_scan_args(argc, argv, "02", &string, &mode)) { - case 2: - if (FIXNUM_P(mode)) { - int flags = FIX2INT(mode); - ptr->flags = rb_io_oflags_fmode(flags); - trunc = flags & O_TRUNC; + RSTRING_GETMEM(str, p, len); + if (len < 1) return 0; + switch ((unsigned char)p[0]) { + case 0xEF: + if (len < 2) break; + if ((unsigned char)p[1] == 0xBB && len > 2) { + if ((unsigned char)p[2] == 0xBF) { + *bomlen = 3; + return rb_utf8_encindex(); + } } - else { - const char *m = StringValueCStr(mode); - ptr->flags = rb_io_modestr_fmode(m); - trunc = *m == 'w'; + break; + + case 0xFE: + if (len < 2) break; + if ((unsigned char)p[1] == 0xFF) { + *bomlen = 2; + return rb_enc_find_index("UTF-16BE"); } - StringValue(string); - if ((ptr->flags & FMODE_WRITABLE) && OBJ_FROZEN(string)) { - errno = EACCES; - rb_sys_fail(0); + break; + + case 0xFF: + if (len < 2) break; + if ((unsigned char)p[1] == 0xFE) { + if (len >= 4 && (unsigned char)p[2] == 0 && (unsigned char)p[3] == 0) { + *bomlen = 4; + return rb_enc_find_index("UTF-32LE"); + } + *bomlen = 2; + return rb_enc_find_index("UTF-16LE"); } - if (trunc) { - rb_str_resize(string, 0); + break; + + case 0: + if (len < 4) break; + if ((unsigned char)p[1] == 0 && (unsigned char)p[2] == 0xFE && (unsigned char)p[3] == 0xFF) { + *bomlen = 4; + return rb_enc_find_index("UTF-32BE"); } break; - case 1: + } + return 0; +} + +static rb_encoding * +set_encoding_by_bom(struct StringIO *ptr) +{ + int bomlen, idx = detect_bom(ptr->string, &bomlen); + rb_encoding *extenc = NULL; + + if (idx) { + extenc = rb_enc_from_index(idx); + ptr->pos = bomlen; + if (ptr->flags & FMODE_WRITABLE) { + rb_enc_associate_index(ptr->string, idx); + } + } + ptr->enc = extenc; + return extenc; +} + +static VALUE +strio_init(int argc, VALUE *argv, struct StringIO *ptr, VALUE self) +{ + VALUE string, vmode, opt; + int oflags; + struct rb_io_enc_t convconfig; + + argc = rb_scan_args(argc, argv, "02:", &string, &vmode, &opt); + rb_io_extract_modeenc(&vmode, 0, opt, &oflags, &ptr->flags, &convconfig); + if (argc) { StringValue(string); - ptr->flags = OBJ_FROZEN(string) ? FMODE_READABLE : FMODE_READWRITE; - break; - case 0: + } + else { string = rb_enc_str_new("", 0, rb_default_external_encoding()); - ptr->flags = FMODE_READWRITE; - break; + } + if (OBJ_FROZEN_RAW(string)) { + if (ptr->flags & FMODE_WRITABLE) { + rb_syserr_fail(EACCES, 0); + } + } + else { + if (NIL_P(vmode)) { + ptr->flags |= FMODE_WRITABLE; + } + } + if (ptr->flags & FMODE_TRUNC) { + rb_str_resize(string, 0); } ptr->string = string; + if (argc == 1) { + ptr->enc = rb_enc_get(string); + } + else { + ptr->enc = convconfig.enc; + } ptr->pos = 0; ptr->lineno = 0; + if (ptr->flags & FMODE_SETENC_BY_BOM) set_encoding_by_bom(ptr); RBASIC(self)->flags |= (ptr->flags & FMODE_READWRITE) * (STRIO_READABLE / FMODE_READABLE); + return self; } static VALUE @@ -237,11 +396,24 @@ strio_finalize(VALUE self) static VALUE strio_s_open(int argc, VALUE *argv, VALUE klass) { - VALUE obj = rb_class_new_instance(argc, argv, klass); + VALUE obj = rb_class_new_instance_kw(argc, argv, klass, RB_PASS_CALLED_KEYWORDS); if (!rb_block_given_p()) return obj; return rb_ensure(rb_yield, obj, strio_finalize, obj); } +/* :nodoc: */ +static VALUE +strio_s_new(int argc, VALUE *argv, VALUE klass) +{ + if (rb_block_given_p()) { + VALUE cname = rb_obj_as_string(klass); + + rb_warn("%"PRIsVALUE"::new() does not take block; use %"PRIsVALUE"::open() instead", + cname, cname); + } + return rb_class_new_instance_kw(argc, argv, klass, RB_PASS_CALLED_KEYWORDS); +} + /* * Returns +false+. Just for compatibility to IO. */ @@ -263,7 +435,7 @@ strio_nil(VALUE self) } /* - * Returns *strio* itself. Just for compatibility to IO. + * Returns an object itself. Just for compatibility to IO. */ static VALUE strio_self(VALUE self) @@ -339,16 +511,13 @@ strio_set_string(VALUE self, VALUE string) * call-seq: * strio.close -> nil * - * Closes strio. The *strio* is unavailable for any further data + * Closes a StringIO. The stream is unavailable for any further data * operations; an +IOError+ is raised if such an attempt is made. */ static VALUE strio_close(VALUE self) { StringIO(self); - if (CLOSED(self)) { - rb_raise(rb_eIOError, "closed stream"); - } RBASIC(self)->flags &= ~STRIO_READWRITE; return Qnil; } @@ -358,13 +527,13 @@ strio_close(VALUE self) * strio.close_read -> nil * * Closes the read end of a StringIO. Will raise an +IOError+ if the - * *strio* is not readable. + * receiver is not readable. */ static VALUE strio_close_read(VALUE self) { - StringIO(self); - if (!READABLE(self)) { + struct StringIO *ptr = StringIO(self); + if (!(ptr->flags & FMODE_READABLE)) { rb_raise(rb_eIOError, "closing non-duplex IO for reading"); } RBASIC(self)->flags &= ~STRIO_READABLE; @@ -376,13 +545,13 @@ strio_close_read(VALUE self) * strio.close_write -> nil * * Closes the write end of a StringIO. Will raise an +IOError+ if the - * *strio* is not writeable. + * receiver is not writeable. */ static VALUE strio_close_write(VALUE self) { - StringIO(self); - if (!WRITABLE(self)) { + struct StringIO *ptr = StringIO(self); + if (!(ptr->flags & FMODE_WRITABLE)) { rb_raise(rb_eIOError, "closing non-duplex IO for writing"); } RBASIC(self)->flags &= ~STRIO_WRITABLE; @@ -393,7 +562,7 @@ strio_close_write(VALUE self) * call-seq: * strio.closed? -> true or false * - * Returns +true+ if *strio* is completely closed, +false+ otherwise. + * Returns +true+ if the stream is completely closed, +false+ otherwise. */ static VALUE strio_closed(VALUE self) @@ -407,7 +576,7 @@ strio_closed(VALUE self) * call-seq: * strio.closed_read? -> true or false * - * Returns +true+ if *strio* is not readable, +false+ otherwise. + * Returns +true+ if the stream is not readable, +false+ otherwise. */ static VALUE strio_closed_read(VALUE self) @@ -421,7 +590,7 @@ strio_closed_read(VALUE self) * call-seq: * strio.closed_write? -> true or false * - * Returns +true+ if *strio* is not writable, +false+ otherwise. + * Returns +true+ if the stream is not writable, +false+ otherwise. */ static VALUE strio_closed_write(VALUE self) @@ -431,19 +600,26 @@ strio_closed_write(VALUE self) return Qtrue; } +static struct StringIO * +strio_to_read(VALUE self) +{ + struct StringIO *ptr = readable(self); + if (ptr->pos < RSTRING_LEN(ptr->string)) return ptr; + return NULL; +} + /* * call-seq: * strio.eof -> true or false * strio.eof? -> true or false * - * Returns true if *strio* is at end of file. The stringio must be - * opened for reading or an +IOError+ will be raised. + * Returns true if the stream is at the end of the data (underlying string). + * The stream must be opened for reading or an +IOError+ will be raised. */ static VALUE strio_eof(VALUE self) { - struct StringIO *ptr = readable(self); - if (ptr->pos < RSTRING_LEN(ptr->string)) return Qfalse; + if (strio_to_read(self)) return Qfalse; return Qtrue; } @@ -460,7 +636,6 @@ strio_copy(VALUE copy, VALUE orig) strio_free(DATA_PTR(copy)); } DATA_PTR(copy) = ptr; - OBJ_INFECT(copy, orig); RBASIC(copy)->flags &= ~STRIO_READWRITE; RBASIC(copy)->flags |= RBASIC(orig)->flags & STRIO_READWRITE; ++ptr->count; @@ -471,7 +646,7 @@ strio_copy(VALUE copy, VALUE orig) * call-seq: * strio.lineno -> integer * - * Returns the current line number in *strio*. The stringio must be + * Returns the current line number. The stream must be * opened for reading. +lineno+ counts the number of times +gets+ is * called, rather than the number of newlines encountered. The two * values will differ if +gets+ is called with a separator other than @@ -497,12 +672,20 @@ strio_set_lineno(VALUE self, VALUE lineno) return lineno; } +/* + * call-seq: + * strio.binmode -> stringio + * + * Puts stream into binary mode. See IO#binmode. + * + */ static VALUE strio_binmode(VALUE self) { struct StringIO *ptr = StringIO(self); rb_encoding *enc = rb_ascii8bit_encoding(); + ptr->enc = enc; if (WRITABLE(self)) { rb_enc_associate(ptr->string, enc); } @@ -520,7 +703,7 @@ strio_binmode(VALUE self) * strio.reopen(other_StrIO) -> strio * strio.reopen(string, mode) -> strio * - * Reinitializes *strio* with the given <i>other_StrIO</i> or _string_ + * Reinitializes the stream with the given <i>other_StrIO</i> or _string_ * and _mode_ (see StringIO#new). */ static VALUE @@ -530,8 +713,7 @@ strio_reopen(int argc, VALUE *argv, VALUE self) if (argc == 1 && !RB_TYPE_P(*argv, T_STRING)) { return strio_copy(self, *argv); } - strio_init(argc, argv, StringIO(self), self); - return self; + return strio_init(argc, argv, StringIO(self), self); } /* @@ -539,7 +721,7 @@ strio_reopen(int argc, VALUE *argv, VALUE self) * strio.pos -> integer * strio.tell -> integer * - * Returns the current offset (in bytes) of *strio*. + * Returns the current offset (in bytes). */ static VALUE strio_get_pos(VALUE self) @@ -551,7 +733,7 @@ strio_get_pos(VALUE self) * call-seq: * strio.pos = integer -> integer * - * Seeks to the given position (in bytes) in *strio*. + * Seeks to the given position (in bytes). */ static VALUE strio_set_pos(VALUE self, VALUE pos) @@ -569,7 +751,7 @@ strio_set_pos(VALUE self, VALUE pos) * call-seq: * strio.rewind -> 0 * - * Positions *strio* to the beginning of input, resetting + * Positions the stream to the beginning of input, resetting * +lineno+ to zero. */ static VALUE @@ -593,29 +775,30 @@ strio_seek(int argc, VALUE *argv, VALUE self) { VALUE whence; struct StringIO *ptr = StringIO(self); - long offset; + long amount, offset; rb_scan_args(argc, argv, "11", NULL, &whence); - offset = NUM2LONG(argv[0]); + amount = NUM2LONG(argv[0]); if (CLOSED(self)) { rb_raise(rb_eIOError, "closed stream"); } switch (NIL_P(whence) ? 0 : NUM2LONG(whence)) { case 0: + offset = 0; break; case 1: - offset += ptr->pos; + offset = ptr->pos; break; case 2: - offset += RSTRING_LEN(ptr->string); + offset = RSTRING_LEN(ptr->string); break; default: error_inval("invalid whence"); } - if (offset < 0) { + if (amount > LONG_MAX - offset || amount + offset < 0) { error_inval(0); } - ptr->pos = offset; + ptr->pos = amount + offset; return INT2FIX(0); } @@ -646,11 +829,11 @@ strio_get_sync(VALUE self) static VALUE strio_each_byte(VALUE self) { - struct StringIO *ptr = readable(self); + struct StringIO *ptr; RETURN_ENUMERATOR(self, 0, 0); - while (ptr->pos < RSTRING_LEN(ptr->string)) { + while ((ptr = strio_to_read(self)) != NULL) { char c = RSTRING_PTR(ptr->string)[ptr->pos++]; rb_yield(CHR2FIX(c)); } @@ -658,18 +841,6 @@ strio_each_byte(VALUE self) } /* - * This is a deprecated alias for #each_byte. - */ -static VALUE -strio_bytes(VALUE self) -{ - rb_warn("StringIO#bytes is deprecated; use #each_byte instead"); - if (!rb_block_given_p()) - return rb_enumeratorize(self, ID2SYM(rb_intern("each_byte")), 0, 0); - return strio_each_byte(self); -} - -/* * call-seq: * strio.getc -> string or nil * @@ -679,17 +850,19 @@ static VALUE strio_getc(VALUE self) { struct StringIO *ptr = readable(self); - rb_encoding *enc = rb_enc_get(ptr->string); + rb_encoding *enc = get_enc(ptr); + VALUE str = ptr->string; + long pos = ptr->pos; int len; char *p; - if (ptr->pos >= RSTRING_LEN(ptr->string)) { + if (pos >= RSTRING_LEN(str)) { return Qnil; } - p = RSTRING_PTR(ptr->string)+ptr->pos; - len = rb_enc_mbclen(p, RSTRING_END(ptr->string), enc); + p = RSTRING_PTR(str)+pos; + len = rb_enc_mbclen(p, RSTRING_END(str), enc); ptr->pos += len; - return rb_enc_str_new(p, len, rb_enc_get(ptr->string)); + return enc_subseq(str, pos, len, enc); } /* @@ -715,6 +888,9 @@ strio_extend(struct StringIO *ptr, long pos, long len) { long olen; + if (len > LONG_MAX - pos) + rb_raise(rb_eArgError, "string size too big"); + check_modifiable(ptr); olen = RSTRING_LEN(ptr->string); if (pos + len > olen) { @@ -731,7 +907,7 @@ strio_extend(struct StringIO *ptr, long pos, long len) * call-seq: * strio.ungetc(string) -> nil * - * Pushes back one character (passed as a parameter) onto *strio* + * Pushes back one character (passed as a parameter) * such that a subsequent buffered read will return it. There is no * limitation for multiple pushbacks including pushing back behind the * beginning of the buffer string. @@ -740,19 +916,19 @@ static VALUE strio_ungetc(VALUE self, VALUE c) { struct StringIO *ptr = readable(self); - long lpos, clen; - char *p, *pend; rb_encoding *enc, *enc2; - if (NIL_P(c)) return Qnil; check_modifiable(ptr); - if (FIXNUM_P(c)) { - int cc = FIX2INT(c); + if (NIL_P(c)) return Qnil; + if (RB_INTEGER_TYPE_P(c)) { + int len, cc = NUM2INT(c); char buf[16]; enc = rb_enc_get(ptr->string); + len = rb_enc_codelen(cc, enc); + if (len <= 0) rb_enc_uint_chr(cc, enc); rb_enc_mbcput(cc, buf, enc); - c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc); + return strio_unget_bytes(ptr, buf, len); } else { SafeStringValue(c); @@ -761,29 +937,10 @@ strio_ungetc(VALUE self, VALUE c) if (enc != enc2 && enc != rb_ascii8bit_encoding()) { c = rb_str_conv_enc(c, enc2, enc); } + strio_unget_bytes(ptr, RSTRING_PTR(c), RSTRING_LEN(c)); + RB_GC_GUARD(c); + return Qnil; } - if (RSTRING_LEN(ptr->string) < ptr->pos) { - long len = RSTRING_LEN(ptr->string); - rb_str_resize(ptr->string, ptr->pos - 1); - memset(RSTRING_PTR(ptr->string) + len, 0, ptr->pos - len - 1); - rb_str_concat(ptr->string, c); - ptr->pos--; - } - else { - /* get logical position */ - lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos; - for (;;) { - clen = rb_enc_mbclen(p, pend, enc); - if (p+clen >= pend) break; - p += clen; - lpos++; - } - clen = p - RSTRING_PTR(ptr->string); - rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c); - ptr->pos = clen; - } - - return Qnil; } /* @@ -796,36 +953,55 @@ static VALUE strio_ungetbyte(VALUE self, VALUE c) { struct StringIO *ptr = readable(self); - char buf[1], *cp = buf; - long pos = ptr->pos, cl = 1; - VALUE str = ptr->string; + check_modifiable(ptr); if (NIL_P(c)) return Qnil; - if (FIXNUM_P(c)) { - buf[0] = (char)FIX2INT(c); + if (RB_INTEGER_TYPE_P(c)) { + /* rb_int_and() not visible from exts */ + VALUE v = rb_funcall(c, '&', 1, INT2FIX(0xff)); + const char cc = NUM2INT(v) & 0xFF; + strio_unget_bytes(ptr, &cc, 1); } else { + long cl; SafeStringValue(c); - cp = RSTRING_PTR(c); cl = RSTRING_LEN(c); - if (cl == 0) return Qnil; + if (cl > 0) { + strio_unget_bytes(ptr, RSTRING_PTR(c), cl); + RB_GC_GUARD(c); + } } - check_modifiable(ptr); - rb_str_modify(str); + return Qnil; +} + +static VALUE +strio_unget_bytes(struct StringIO *ptr, const char *cp, long cl) +{ + long pos = ptr->pos, len, rest; + VALUE str = ptr->string; + char *s; + + len = RSTRING_LEN(str); + rest = pos - len; if (cl > pos) { - char *s; - long rest = RSTRING_LEN(str) - pos; - rb_str_resize(str, rest + cl); + long ex = cl - (rest < 0 ? pos : len); + rb_str_modify_expand(str, ex); + rb_str_set_len(str, len + ex); s = RSTRING_PTR(str); - memmove(s + cl, s + pos, rest); + if (rest < 0) memmove(s + cl, s + pos, -rest); pos = 0; } else { + if (rest > 0) { + rb_str_modify_expand(str, rest); + rb_str_set_len(str, len + rest); + } + s = RSTRING_PTR(str); + if (rest > cl) memset(s + len, 0, rest - cl); pos -= cl; } - memcpy(RSTRING_PTR(str) + pos, cp, cl); + memcpy(s + pos, cp, cl); ptr->pos = pos; - RB_GC_GUARD(c); return Qnil; } @@ -838,7 +1014,7 @@ strio_ungetbyte(VALUE self, VALUE c) static VALUE strio_readchar(VALUE self) { - VALUE c = rb_funcall2(self, rb_intern("getc"), 0, 0); + VALUE c = rb_funcallv(self, rb_intern("getc"), 0, 0); if (NIL_P(c)) rb_eof_error(); return c; } @@ -852,7 +1028,7 @@ strio_readchar(VALUE self) static VALUE strio_readbyte(VALUE self) { - VALUE c = rb_funcall2(self, rb_intern("getbyte"), 0, 0); + VALUE c = rb_funcallv(self, rb_intern("getbyte"), 0, 0); if (NIL_P(c)) rb_eof_error(); return c; } @@ -878,18 +1054,6 @@ strio_each_char(VALUE self) } /* - * This is a deprecated alias for <code>each_char</code>. - */ -static VALUE -strio_chars(VALUE self) -{ - rb_warn("StringIO#chars is deprecated; use #each_char instead"); - if (!rb_block_given_p()) - return rb_enumeratorize(self, ID2SYM(rb_intern("each_char")), 0, 0); - return strio_each_char(self); -} - -/* * call-seq: * strio.each_codepoint {|c| block } -> strio * strio.each_codepoint -> anEnumerator @@ -907,32 +1071,16 @@ strio_each_codepoint(VALUE self) RETURN_ENUMERATOR(self, 0, 0); ptr = readable(self); - enc = rb_enc_get(ptr->string); - for (;;) { - if (ptr->pos >= RSTRING_LEN(ptr->string)) { - return self; - } - + enc = get_enc(ptr); + while ((ptr = strio_to_read(self)) != NULL) { c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_END(ptr->string), &n, enc); - rb_yield(UINT2NUM(c)); ptr->pos += n; + rb_yield(UINT2NUM(c)); } return self; } -/* - * This is a deprecated alias for <code>each_codepoint</code>. - */ -static VALUE -strio_codepoints(VALUE self) -{ - rb_warn("StringIO#codepoints is deprecated; use #each_codepoint instead"); - if (!rb_block_given_p()) - return rb_enumeratorize(self, ID2SYM(rb_intern("each_codepoint")), 0, 0); - return strio_each_codepoint(self); -} - /* Boyer-Moore search: copied from regex.c */ static void bm_init_skip(long *skip, const char *pat, long m) @@ -966,14 +1114,19 @@ bm_search(const char *little, long llen, const char *big, long blen, const long return -1; } -static VALUE -strio_getline(int argc, VALUE *argv, struct StringIO *ptr) +struct getline_arg { + VALUE rs; + long limit; + unsigned int chomp: 1; +}; + +static struct getline_arg * +prepare_getline_args(struct getline_arg *arg, int argc, VALUE *argv) { - const char *s, *e, *p; - long n, limit = 0; - VALUE str, lim; + VALUE str, lim, opts; + long limit = -1; - rb_scan_args(argc, argv, "02", &str, &lim); + argc = rb_scan_args(argc, argv, "02:", &str, &lim, &opts); switch (argc) { case 0: str = rb_rs; @@ -984,7 +1137,6 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) VALUE tmp = rb_check_string_type(str); if (NIL_P(tmp)) { limit = NUM2LONG(str); - if (limit == 0) return rb_str_new(0,0); str = rb_rs; } else { @@ -998,6 +1150,39 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) if (!NIL_P(lim)) limit = NUM2LONG(lim); break; } + arg->rs = str; + arg->limit = limit; + arg->chomp = 0; + if (!NIL_P(opts)) { + static ID keywords[1]; + VALUE vchomp; + if (!keywords[0]) { + keywords[0] = rb_intern_const("chomp"); + } + rb_get_kwargs(opts, keywords, 0, 1, &vchomp); + arg->chomp = (vchomp != Qundef) && RTEST(vchomp); + } + return arg; +} + +static inline int +chomp_newline_width(const char *s, const char *e) +{ + if (e > s && *--e == '\n') { + if (e > s && *--e == '\r') return 2; + return 1; + } + return 0; +} + +static VALUE +strio_getline(struct getline_arg *arg, struct StringIO *ptr) +{ + const char *s, *e, *p; + long n, limit = arg->limit; + VALUE str = arg->rs; + int w = 0; + rb_encoding *enc = get_enc(ptr); if (ptr->pos >= (n = RSTRING_LEN(ptr->string))) { return Qnil; @@ -1005,15 +1190,19 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) s = RSTRING_PTR(ptr->string); e = s + RSTRING_LEN(ptr->string); s += ptr->pos; - if (limit > 0 && s + limit < e) { - e = rb_enc_right_char_head(s, s + limit, e, rb_enc_get(ptr->string)); + if (limit > 0 && (size_t)limit < (size_t)(e - s)) { + e = rb_enc_right_char_head(s, s + limit, e, get_enc(ptr)); } if (NIL_P(str)) { - str = strio_substr(ptr, ptr->pos, e - s); + if (arg->chomp) { + w = chomp_newline_width(s, e); + } + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } else if ((n = RSTRING_LEN(str)) == 0) { p = s; - while (*p == '\n') { + while (p[(p + 1 < e) && (*p == '\r') && 0] == '\n') { + p += *p == '\r'; if (++p == e) { return Qnil; } @@ -1022,23 +1211,34 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) while ((p = memchr(p, '\n', e - p)) && (p != e)) { if (*++p == '\n') { e = p + 1; + w = (arg->chomp ? 1 : 0); + break; + } + else if (*p == '\r' && p < e && p[1] == '\n') { + e = p + 2; + w = (arg->chomp ? 2 : 0); break; } } - str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s); + if (!w && arg->chomp) { + w = chomp_newline_width(s, e); + } + str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s - w, enc); } else if (n == 1) { if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) { e = p + 1; + w = (arg->chomp ? (p > s && *(p-1) == '\r') + 1 : 0); } - str = strio_substr(ptr, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } else { - if (n < e - s) { - if (e - s < 1024) { + if (n < e - s + arg->chomp) { + /* unless chomping, RS at the end does not matter */ + if (e - s < 1024 || n == e - s) { for (p = s; p + n <= e; ++p) { if (MEMCMP(p, RSTRING_PTR(str), char, n) == 0) { - e = p + n; + e = p + (arg->chomp ? 0 : n); break; } } @@ -1048,11 +1248,11 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) p = RSTRING_PTR(str); bm_init_skip(skip, p, n); if ((pos = bm_search(p, n, s, e - s, skip)) >= 0) { - e = s + pos + n; + e = s + pos + (arg->chomp ? 0 : n); } } } - str = strio_substr(ptr, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s - w, enc); } ptr->pos = e - RSTRING_PTR(ptr->string); ptr->lineno++; @@ -1061,48 +1261,55 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) /* * call-seq: - * strio.gets(sep=$/) -> string or nil - * strio.gets(limit) -> string or nil - * strio.gets(sep, limit) -> string or nil + * strio.gets(sep=$/, chomp: false) -> string or nil + * strio.gets(limit, chomp: false) -> string or nil + * strio.gets(sep, limit, chomp: false) -> string or nil * * See IO#gets. */ static VALUE strio_gets(int argc, VALUE *argv, VALUE self) { - VALUE str = strio_getline(argc, argv, readable(self)); + struct getline_arg arg; + VALUE str; + if (prepare_getline_args(&arg, argc, argv)->limit == 0) { + struct StringIO *ptr = readable(self); + return rb_enc_str_new(0, 0, get_enc(ptr)); + } + + str = strio_getline(&arg, readable(self)); rb_lastline_set(str); return str; } /* * call-seq: - * strio.readline(sep=$/) -> string - * strio.readline(limit) -> string or nil - * strio.readline(sep, limit) -> string or nil + * strio.readline(sep=$/, chomp: false) -> string + * strio.readline(limit, chomp: false) -> string or nil + * strio.readline(sep, limit, chomp: false) -> string or nil * * See IO#readline. */ static VALUE strio_readline(int argc, VALUE *argv, VALUE self) { - VALUE line = rb_funcall2(self, rb_intern("gets"), argc, argv); + VALUE line = rb_funcallv_kw(self, rb_intern("gets"), argc, argv, RB_PASS_CALLED_KEYWORDS); if (NIL_P(line)) rb_eof_error(); return line; } /* * call-seq: - * strio.each(sep=$/) {|line| block } -> strio - * strio.each(limit) {|line| block } -> strio - * strio.each(sep, limit) {|line| block } -> strio - * strio.each(...) -> anEnumerator + * strio.each(sep=$/, chomp: false) {|line| block } -> strio + * strio.each(limit, chomp: false) {|line| block } -> strio + * strio.each(sep, limit, chomp: false) {|line| block } -> strio + * strio.each(...) -> anEnumerator * - * strio.each_line(sep=$/) {|line| block } -> strio - * strio.each_line(limit) {|line| block } -> strio - * strio.each_line(sep,limit) {|line| block } -> strio - * strio.each_line(...) -> anEnumerator + * strio.each_line(sep=$/, chomp: false) {|line| block } -> strio + * strio.each_line(limit, chomp: false) {|line| block } -> strio + * strio.each_line(sep, limit, chomp: false) {|line| block } -> strio + * strio.each_line(...) -> anEnumerator * * See IO#each. */ @@ -1110,38 +1317,26 @@ static VALUE strio_each(int argc, VALUE *argv, VALUE self) { VALUE line; + struct getline_arg arg; StringIO(self); RETURN_ENUMERATOR(self, argc, argv); - if (argc > 0 && !NIL_P(argv[argc-1]) && NIL_P(rb_check_string_type(argv[argc-1])) && - NUM2LONG(argv[argc-1]) == 0) { + if (prepare_getline_args(&arg, argc, argv)->limit == 0) { rb_raise(rb_eArgError, "invalid limit: 0 for each_line"); } - while (!NIL_P(line = strio_getline(argc, argv, readable(self)))) { + while (!NIL_P(line = strio_getline(&arg, readable(self)))) { rb_yield(line); } return self; } /* - * This is a deprecated alias for <code>each_line</code>. - */ -static VALUE -strio_lines(int argc, VALUE *argv, VALUE self) -{ - rb_warn("StringIO#lines is deprecated; use #each_line instead"); - if (!rb_block_given_p()) - return rb_enumeratorize(self, ID2SYM(rb_intern("each_line")), argc, argv); - return strio_each(argc, argv, self); -} - -/* * call-seq: - * strio.readlines(sep=$/) -> array - * strio.readlines(limit) -> array - * strio.readlines(sep,limit) -> array + * strio.readlines(sep=$/, chomp: false) -> array + * strio.readlines(limit, chomp: false) -> array + * strio.readlines(sep, limit, chomp: false) -> array * * See IO#readlines. */ @@ -1149,15 +1344,15 @@ static VALUE strio_readlines(int argc, VALUE *argv, VALUE self) { VALUE ary, line; + struct getline_arg arg; StringIO(self); ary = rb_ary_new(); - if (argc > 0 && !NIL_P(argv[argc-1]) && NIL_P(rb_check_string_type(argv[argc-1])) && - NUM2LONG(argv[argc-1]) == 0) { + if (prepare_getline_args(&arg, argc, argv)->limit == 0) { rb_raise(rb_eArgError, "invalid limit: 0 for readlines"); } - while (!NIL_P(line = strio_getline(argc, argv, readable(self)))) { + while (!NIL_P(line = strio_getline(&arg, readable(self)))) { rb_ary_push(ary, line); } return ary; @@ -1165,31 +1360,47 @@ strio_readlines(int argc, VALUE *argv, VALUE self) /* * call-seq: - * strio.write(string) -> integer - * strio.syswrite(string) -> integer + * strio.write(string, ...) -> integer + * strio.syswrite(string) -> integer * - * Appends the given string to the underlying buffer string of *strio*. + * Appends the given string to the underlying buffer string. * The stream must be opened for writing. If the argument is not a * string, it will be converted to a string using <code>to_s</code>. * Returns the number of bytes written. See IO#write. */ static VALUE +strio_write_m(int argc, VALUE *argv, VALUE self) +{ + long len = 0; + while (argc-- > 0) { + /* StringIO can't exceed long limit */ + len += strio_write(self, *argv++); + } + return LONG2NUM(len); +} + +static long strio_write(VALUE self, VALUE str) { struct StringIO *ptr = writable(self); long len, olen; rb_encoding *enc, *enc2; rb_encoding *const ascii8bit = rb_ascii8bit_encoding(); + rb_encoding *usascii = 0; if (!RB_TYPE_P(str, T_STRING)) str = rb_obj_as_string(str); - enc = rb_enc_get(ptr->string); + enc = get_enc(ptr); enc2 = rb_enc_get(str); - if (enc != enc2 && enc != ascii8bit) { - str = rb_str_conv_enc(str, enc2, enc); + if (enc != enc2 && enc != ascii8bit && enc != (usascii = rb_usascii_encoding())) { + VALUE converted = rb_str_conv_enc(str, enc2, enc); + if (converted == str && enc2 != ascii8bit && enc2 != usascii) { /* conversion failed */ + rb_enc_check(rb_enc_from_encoding(enc), str); + } + str = converted; } len = RSTRING_LEN(str); - if (len == 0) return INT2FIX(0); + if (len == 0) return 0; check_modifiable(ptr); olen = RSTRING_LEN(ptr->string); if (ptr->flags & FMODE_APPEND) { @@ -1198,7 +1409,6 @@ strio_write(VALUE self, VALUE str) if (ptr->pos == olen) { if (enc == ascii8bit || enc2 == ascii8bit) { rb_enc_str_buf_cat(ptr->string, RSTRING_PTR(str), len, enc); - OBJ_INFECT(ptr->string, str); } else { rb_str_buf_append(ptr->string, str); @@ -1207,12 +1417,10 @@ strio_write(VALUE self, VALUE str) else { strio_extend(ptr, ptr->pos, len); memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len); - OBJ_INFECT(ptr->string, str); } - OBJ_INFECT(ptr->string, self); RB_GC_GUARD(str); ptr->pos += len; - return LONG2NUM(len); + return len; } /* @@ -1293,6 +1501,7 @@ strio_read(int argc, VALUE *argv, VALUE self) StringValue(str); rb_str_modify(str); } + /* fall through */ case 1: if (!NIL_P(argv[0])) { len = NUM2LONG(argv[0]); @@ -1310,12 +1519,14 @@ strio_read(int argc, VALUE *argv, VALUE self) case 0: len = RSTRING_LEN(ptr->string); if (len <= ptr->pos) { + rb_encoding *enc = get_enc(ptr); if (NIL_P(str)) { str = rb_str_new(0, 0); } else { rb_str_resize(str, 0); } + rb_enc_associate(str, enc); return str; } else { @@ -1323,11 +1534,11 @@ strio_read(int argc, VALUE *argv, VALUE self) } break; default: - rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + rb_error_arity(argc, 0, 2); } if (NIL_P(str)) { - str = strio_substr(ptr, ptr->pos, len); - if (binary) rb_enc_associate(str, rb_ascii8bit_encoding()); + rb_encoding *enc = binary ? rb_ascii8bit_encoding() : get_enc(ptr); + str = strio_substr(ptr, ptr->pos, len, enc); } else { long rest = RSTRING_LEN(ptr->string) - ptr->pos; @@ -1354,7 +1565,7 @@ strio_read(int argc, VALUE *argv, VALUE self) static VALUE strio_sysread(int argc, VALUE *argv, VALUE self) { - VALUE val = rb_funcall2(self, rb_intern("read"), argc, argv); + VALUE val = rb_funcallv_kw(self, rb_intern("read"), argc, argv, RB_PASS_CALLED_KEYWORDS); if (NIL_P(val)) { rb_eof_error(); } @@ -1372,20 +1583,17 @@ static VALUE strio_read_nonblock(int argc, VALUE *argv, VALUE self) { VALUE opts = Qnil, val; - int no_exception = 0; rb_scan_args(argc, argv, "11:", NULL, NULL, &opts); if (!NIL_P(opts)) { argc--; - - if (Qfalse == rb_hash_aref(opts, sym_exception)) - no_exception = 1; } val = strio_read(argc, argv, self); if (NIL_P(val)) { - if (no_exception) + if (!NIL_P(opts) && + rb_hash_lookup2(opts, sym_exception, Qundef) == Qfalse) return Qnil; else rb_eof_error(); @@ -1432,7 +1640,7 @@ strio_size(VALUE self) * call-seq: * strio.truncate(integer) -> 0 * - * Truncates the buffer string to at most _integer_ bytes. The *strio* + * Truncates the buffer string to at most _integer_ bytes. The stream * must be opened for writing. */ static VALUE @@ -1456,13 +1664,15 @@ strio_truncate(VALUE self, VALUE len) * strio.external_encoding => encoding * * Returns the Encoding object that represents the encoding of the file. - * If strio is write mode and no encoding is specified, returns <code>nil</code>. + * If the stream is write mode and no encoding is specified, returns + * +nil+. */ static VALUE strio_external_encoding(VALUE self) { - return rb_enc_from_encoding(rb_enc_get(StringIO(self)->string)); + struct StringIO *ptr = StringIO(self); + return rb_enc_from_encoding(get_enc(ptr)); } /* @@ -1470,13 +1680,13 @@ strio_external_encoding(VALUE self) * strio.internal_encoding => encoding * * Returns the Encoding of the internal string if conversion is - * specified. Otherwise returns nil. + * specified. Otherwise returns +nil+. */ static VALUE strio_internal_encoding(VALUE self) { - return Qnil; + return Qnil; } /* @@ -1493,7 +1703,7 @@ static VALUE strio_set_encoding(int argc, VALUE *argv, VALUE self) { rb_encoding* enc; - VALUE str = StringIO(self)->string; + struct StringIO *ptr = StringIO(self); VALUE ext_enc, int_enc, opt; argc = rb_scan_args(argc, argv, "11:", &ext_enc, &int_enc, &opt); @@ -1504,30 +1714,59 @@ strio_set_encoding(int argc, VALUE *argv, VALUE self) else { enc = rb_to_encoding(ext_enc); } - rb_enc_associate(str, enc); + ptr->enc = enc; + if (WRITABLE(self)) { + rb_enc_associate(ptr->string, enc); + } + return self; } +static VALUE +strio_set_encoding_by_bom(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + + if (!set_encoding_by_bom(ptr)) return Qnil; + return rb_enc_from_encoding(ptr->enc); +} + /* - * Pseudo I/O on String object. + * Pseudo I/O on String object, with interface corresponding to IO. * - * Commonly used to simulate `$stdio` or `$stderr` + * Commonly used to simulate <code>$stdio</code> or <code>$stderr</code> * * === Examples * * require 'stringio' * + * # Writing stream emulation * io = StringIO.new * io.puts "Hello World" - * io.string #=> "Hello World" + * io.string #=> "Hello World\n" + * + * # Reading stream emulation + * io = StringIO.new "first\nsecond\nlast\n" + * io.getc #=> "f" + * io.gets #=> "irst\n" + * io.read #=> "second\nlast\n" */ void Init_stringio(void) { - VALUE StringIO = rb_define_class("StringIO", rb_cData); +#undef rb_intern + +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + + VALUE StringIO = rb_define_class("StringIO", rb_cObject); + + rb_define_const(StringIO, "VERSION", rb_str_new_cstr(STRINGIO_VERSION)); rb_include_module(StringIO, rb_mEnumerable); rb_define_alloc_func(StringIO, strio_s_allocate); + rb_define_singleton_method(StringIO, "new", strio_s_new, -1); rb_define_singleton_method(StringIO, "open", strio_s_open, -1); rb_define_method(StringIO, "initialize", strio_initialize, -1); rb_define_method(StringIO, "initialize_copy", strio_copy, 1); @@ -1566,13 +1805,9 @@ Init_stringio(void) rb_define_method(StringIO, "each", strio_each, -1); rb_define_method(StringIO, "each_line", strio_each, -1); - rb_define_method(StringIO, "lines", strio_lines, -1); rb_define_method(StringIO, "each_byte", strio_each_byte, 0); - rb_define_method(StringIO, "bytes", strio_bytes, 0); rb_define_method(StringIO, "each_char", strio_each_char, 0); - rb_define_method(StringIO, "chars", strio_chars, 0); rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0); - rb_define_method(StringIO, "codepoints", strio_codepoints, 0); rb_define_method(StringIO, "getc", strio_getc, 0); rb_define_method(StringIO, "ungetc", strio_ungetc, 1); rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1); @@ -1581,7 +1816,7 @@ Init_stringio(void) rb_define_method(StringIO, "readlines", strio_readlines, -1); rb_define_method(StringIO, "read", strio_read, -1); - rb_define_method(StringIO, "write", strio_write, 1); + rb_define_method(StringIO, "write", strio_write_m, -1); rb_define_method(StringIO, "putc", strio_putc, 1); /* @@ -1605,6 +1840,7 @@ Init_stringio(void) rb_define_method(StringIO, "external_encoding", strio_external_encoding, 0); rb_define_method(StringIO, "internal_encoding", strio_internal_encoding, 0); rb_define_method(StringIO, "set_encoding", strio_set_encoding, -1); + rb_define_method(StringIO, "set_encoding_by_bom", strio_set_encoding_by_bom, 0); { VALUE mReadable = rb_define_module_under(rb_cIO, "generic_readable"); |
