summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-05 07:28:09 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-05 07:28:09 +0000
commita2144bd72aad7c25e160cf283f957d59fe7c90b2 (patch)
tree06a002592baeb95dc5db6eebf2e697f504b26b20
parentcc7b3feb097fce18b207da8366f247e8b487fe34 (diff)
chomp option
* io.c (extract_getline_opts): extract chomp option. [Feature #12553] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56581 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--NEWS5
-rw-r--r--internal.h1
-rw-r--r--io.c114
-rw-r--r--string.c32
-rw-r--r--test/ruby/test_io.rb49
6 files changed, 152 insertions, 54 deletions
diff --git a/ChangeLog b/ChangeLog
index 54d03aa5ea..a0c58e74f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sat Nov 5 16:28:07 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (extract_getline_opts): extract chomp option.
+ [Feature #12553]
+
Sat Nov 5 15:58:24 2016 Sho Hashimoto <sho-h@netlab.jp>
* tool/mkconfig.rb: [DOC] add rbconfig documentation.
diff --git a/NEWS b/NEWS
index 3666aa4c34..439057a6e1 100644
--- a/NEWS
+++ b/NEWS
@@ -83,6 +83,11 @@ with all sufficient information, see the ChangeLog file or Redmine
* Integer#digits for extracting columns of place-value notation [Feature #12447]
+* IO
+
+ * IO#gets, IO#readline, IO#each_line, IO#readlines, IO#foreach now takes
+ an optional keyword argument, chomp flag.
+
* Kernel
* Kernel#clone now takes an optional keyword argument, freeze flag.
diff --git a/internal.h b/internal.h
index c7e80aa978..847de98eee 100644
--- a/internal.h
+++ b/internal.h
@@ -1402,6 +1402,7 @@ VALUE rb_id_quote_unprintable(ID);
char *rb_str_fill_terminator(VALUE str, const int termlen);
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen);
VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg);
+VALUE rb_str_chomp_string(VALUE str, VALUE chomp);
#ifdef RUBY_ENCODING_H
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len,
diff --git a/io.c b/io.c
index 44440d95ae..1f00f6c4fa 100644
--- a/io.c
+++ b/io.c
@@ -2977,7 +2977,7 @@ swallow(rb_io_t *fptr, int term)
}
static VALUE
-rb_io_getline_fast(rb_io_t *fptr, rb_encoding *enc)
+rb_io_getline_fast(rb_io_t *fptr, rb_encoding *enc, int chomp)
{
VALUE str = Qnil;
int len = 0;
@@ -2990,21 +2990,27 @@ rb_io_getline_fast(rb_io_t *fptr, rb_encoding *enc)
if (pending > 0) {
const char *p = READ_DATA_PENDING_PTR(fptr);
const char *e;
+ int chomplen = 0;
e = memchr(p, '\n', pending);
if (e) {
pending = (int)(e - p + 1);
+ if (chomp) {
+ chomplen = (pending > 1 && *(e-1) == '\r') + 1;
+ }
}
if (NIL_P(str)) {
- str = rb_str_new(p, pending);
+ str = rb_str_new(p, pending - chomplen);
fptr->rbuf.off += pending;
fptr->rbuf.len -= pending;
}
else {
- rb_str_resize(str, len + pending);
- read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
+ rb_str_resize(str, len + pending - chomplen);
+ read_buffered_data(RSTRING_PTR(str)+len, pending - chomplen, fptr);
+ fptr->rbuf.off += chomplen;
+ fptr->rbuf.len -= chomplen;
}
- len += pending;
+ len += pending - chomplen;
if (cr != ENC_CODERANGE_BROKEN)
pos += rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + len, enc, &cr);
if (e) break;
@@ -3024,14 +3030,30 @@ struct getline_arg {
VALUE io;
VALUE rs;
long limit;
+ unsigned int chomp: 1;
};
static void
-extract_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit)
+extract_getline_opts(VALUE opts, struct getline_arg *args)
+{
+ int chomp = FALSE;
+ if (!NIL_P(opts)) {
+ static ID kwds[1];
+ VALUE vchomp;
+ if (!kwds[0]) {
+ kwds[0] = rb_intern_const("chomp");
+ }
+ rb_get_kwargs(opts, kwds, 0, -2, &vchomp);
+ chomp = (vchomp != Qundef) && RTEST(vchomp);
+ }
+ args->chomp = chomp;
+}
+
+static void
+extract_getline_args(int argc, VALUE *argv, struct getline_arg *args)
{
VALUE rs = rb_rs, lim = Qnil;
- rb_check_arity(argc, 0, 2);
if (argc == 1) {
VALUE tmp = Qnil;
@@ -3047,8 +3069,8 @@ extract_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit)
if (!NIL_P(rs))
StringValue(rs);
}
- *rsp = rs;
- *limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
+ args->rs = rs;
+ args->limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
}
static void
@@ -3081,14 +3103,17 @@ check_getline_args(VALUE *rsp, long *limit, VALUE io)
}
static void
-prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
+prepare_getline_args(int argc, VALUE *argv, struct getline_arg *args, VALUE io)
{
- extract_getline_args(argc, argv, rsp, limit);
- check_getline_args(rsp, limit, io);
+ VALUE opts;
+ argc = rb_scan_args(argc, argv, "02:", NULL, NULL, &opts);
+ extract_getline_args(argc, argv, args);
+ extract_getline_opts(opts, args);
+ check_getline_args(&args->rs, &args->limit, io);
}
static VALUE
-rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
+rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr)
{
VALUE str = Qnil;
int nolimit = 0;
@@ -3098,6 +3123,7 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
if (NIL_P(rs) && limit < 0) {
str = read_all(fptr, 0, Qnil);
if (RSTRING_LEN(str) == 0) return Qnil;
+ if (chomp) rb_str_chomp_string(str, rb_default_rs);
}
else if (limit == 0) {
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
@@ -3105,7 +3131,7 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) &&
rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
- return rb_io_getline_fast(fptr, enc);
+ return rb_io_getline_fast(fptr, enc, chomp);
}
else {
int c, newline = -1;
@@ -3113,6 +3139,7 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
long rslen = 0;
int rspara = 0;
int extra_limit = 16;
+ int chomp_cr = chomp;
SET_BINARY_MODE(fptr);
enc = io_read_encoding(fptr);
@@ -3137,6 +3164,7 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
rsptr = RSTRING_PTR(rs);
}
newline = (unsigned char)rsptr[rslen - 1];
+ chomp_cr = chomp && rslen == 1 && newline == '\n';
}
/* MS - Optimization */
@@ -3151,7 +3179,13 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
pp = rb_enc_left_char_head(s, p, e, enc);
if (pp != p) continue;
if (!rspara) rscheck(rsptr, rslen, rs);
- if (memcmp(p, rsptr, rslen) == 0) break;
+ if (memcmp(p, rsptr, rslen) == 0) {
+ if (chomp) {
+ if (chomp_cr && p > s && *(p-1) == '\r') --p;
+ rb_str_set_len(str, p - s);
+ }
+ break;
+ }
}
if (limit == 0) {
s = RSTRING_PTR(str);
@@ -3185,7 +3219,7 @@ rb_io_getline_0(VALUE rs, long limit, rb_io_t *fptr)
}
static VALUE
-rb_io_getline_1(VALUE rs, long limit, VALUE io)
+rb_io_getline_1(VALUE rs, long limit, int chomp, VALUE io)
{
rb_io_t *fptr;
int old_lineno, new_lineno;
@@ -3193,7 +3227,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
GetOpenFile(io, fptr);
old_lineno = fptr->lineno;
- str = rb_io_getline_0(rs, limit, fptr);
+ str = rb_io_getline_0(rs, limit, chomp, fptr);
if (!NIL_P(str) && (new_lineno = fptr->lineno) != old_lineno) {
if (io == ARGF.current_file) {
ARGF.lineno += new_lineno - old_lineno;
@@ -3210,17 +3244,16 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
static VALUE
rb_io_getline(int argc, VALUE *argv, VALUE io)
{
- VALUE rs;
- long limit;
+ struct getline_arg args;
- prepare_getline_args(argc, argv, &rs, &limit, io);
- return rb_io_getline_1(rs, limit, io);
+ prepare_getline_args(argc, argv, &args, io);
+ return rb_io_getline_1(args.rs, args.limit, args.chomp, io);
}
VALUE
rb_io_gets(VALUE io)
{
- return rb_io_getline_1(rb_default_rs, -1, io);
+ return rb_io_getline_1(rb_default_rs, -1, FALSE, io);
}
VALUE
@@ -3228,7 +3261,7 @@ rb_io_gets_internal(VALUE io)
{
rb_io_t *fptr;
GetOpenFile(io, fptr);
- return rb_io_getline_0(rb_default_rs, -1, fptr);
+ return rb_io_getline_0(rb_default_rs, -1, FALSE, fptr);
}
/*
@@ -3356,7 +3389,7 @@ rb_io_readline(int argc, VALUE *argv, VALUE io)
return line;
}
-static VALUE io_readlines(VALUE rs, long limit, VALUE io);
+static VALUE io_readlines(const struct getline_arg *arg, VALUE io);
/*
* call-seq:
@@ -3379,22 +3412,21 @@ static VALUE io_readlines(VALUE rs, long limit, VALUE io);
static VALUE
rb_io_readlines(int argc, VALUE *argv, VALUE io)
{
- VALUE rs;
- long limit;
+ struct getline_arg args;
- prepare_getline_args(argc, argv, &rs, &limit, io);
- return io_readlines(rs, limit, io);
+ prepare_getline_args(argc, argv, &args, io);
+ return io_readlines(&args, io);
}
static VALUE
-io_readlines(VALUE rs, long limit, VALUE io)
+io_readlines(const struct getline_arg *arg, VALUE io)
{
VALUE line, ary;
- if (limit == 0)
+ if (arg->limit == 0)
rb_raise(rb_eArgError, "invalid limit: 0 for readlines");
ary = rb_ary_new();
- while (!NIL_P(line = rb_io_getline_1(rs, limit, io))) {
+ while (!NIL_P(line = rb_io_getline_1(arg->rs, arg->limit, arg->chomp, io))) {
rb_ary_push(ary, line);
}
return ary;
@@ -3432,14 +3464,14 @@ io_readlines(VALUE rs, long limit, VALUE io)
static VALUE
rb_io_each_line(int argc, VALUE *argv, VALUE io)
{
- VALUE str, rs;
- long limit;
+ VALUE str;
+ struct getline_arg args;
RETURN_ENUMERATOR(io, argc, argv);
- prepare_getline_args(argc, argv, &rs, &limit, io);
- if (limit == 0)
+ prepare_getline_args(argc, argv, &args, io);
+ if (args.limit == 0)
rb_raise(rb_eArgError, "invalid limit: 0 for each_line");
- while (!NIL_P(str = rb_io_getline_1(rs, limit, io))) {
+ while (!NIL_P(str = rb_io_getline_1(args.rs, args.limit, args.chomp, io))) {
rb_yield(str);
}
return io;
@@ -9738,7 +9770,7 @@ io_s_foreach(struct getline_arg *arg)
{
VALUE str;
- while (!NIL_P(str = rb_io_getline_1(arg->rs, arg->limit, arg->io))) {
+ while (!NIL_P(str = rb_io_getline_1(arg->rs, arg->limit, arg->chomp, arg->io))) {
rb_lastline_set(str);
rb_yield(str);
}
@@ -9782,9 +9814,10 @@ rb_io_s_foreach(int argc, VALUE *argv, VALUE self)
argc = rb_scan_args(argc, argv, "13:", NULL, NULL, NULL, NULL, &opt);
RETURN_ENUMERATOR(self, orig_argc, argv);
- extract_getline_args(argc-1, argv+1, &garg.rs, &garg.limit);
+ extract_getline_args(argc-1, argv+1, &garg);
open_key_args(argc, argv, opt, &arg);
if (NIL_P(arg.io)) return Qnil;
+ extract_getline_opts(opt, &garg);
check_getline_args(&garg.rs, &garg.limit, garg.io = arg.io);
return rb_ensure(io_s_foreach, (VALUE)&garg, rb_io_close, arg.io);
}
@@ -9792,7 +9825,7 @@ rb_io_s_foreach(int argc, VALUE *argv, VALUE self)
static VALUE
io_s_readlines(struct getline_arg *arg)
{
- return io_readlines(arg->rs, arg->limit, arg->io);
+ return io_readlines(arg, arg->io);
}
/*
@@ -9821,9 +9854,10 @@ rb_io_s_readlines(int argc, VALUE *argv, VALUE io)
struct getline_arg garg;
argc = rb_scan_args(argc, argv, "13:", NULL, NULL, NULL, NULL, &opt);
- extract_getline_args(argc-1, argv+1, &garg.rs, &garg.limit);
+ extract_getline_args(argc-1, argv+1, &garg);
open_key_args(argc, argv, opt, &arg);
if (NIL_P(arg.io)) return Qnil;
+ extract_getline_opts(opt, &garg);
check_getline_args(&garg.rs, &garg.limit, garg.io = arg.io);
return rb_ensure(io_s_readlines, (VALUE)&garg, rb_io_close, arg.io);
}
diff --git a/string.c b/string.c
index 68cc400167..2ca835c74a 100644
--- a/string.c
+++ b/string.c
@@ -7975,6 +7975,20 @@ chomp_rs(int argc, const VALUE *argv)
}
}
+VALUE
+rb_str_chomp_string(VALUE str, VALUE rs)
+{
+ long olen = RSTRING_LEN(str);
+ long len = chompped_length(str, rs);
+ if (len >= olen) return Qnil;
+ STR_SET_LEN(str, len);
+ TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
+ if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
+ ENC_CODERANGE_CLEAR(str);
+ }
+ return str;
+}
+
/*
* call-seq:
* str.chomp!(separator=$/) -> str or nil
@@ -7987,21 +8001,11 @@ static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
VALUE rs;
- long olen;
str_modify_keep_cr(str);
- if ((olen = RSTRING_LEN(str)) > 0 && !NIL_P(rs = chomp_rs(argc, argv))) {
- long len;
- len = chompped_length(str, rs);
- if (len < olen) {
- STR_SET_LEN(str, len);
- TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
- }
- return str;
- }
- }
- return Qnil;
+ if (RSTRING_LEN(str) == 0) return Qnil;
+ rs = chomp_rs(argc, argv);
+ if (NIL_P(rs)) return Qnil;
+ return rb_str_chomp_string(str, rs);
}
diff --git a/test/ruby/test_io.rb b/test/ruby/test_io.rb
index 5c713451df..a5cdc7b19f 100644
--- a/test/ruby/test_io.rb
+++ b/test/ruby/test_io.rb
@@ -204,6 +204,55 @@ class TestIO < Test::Unit::TestCase
end)
end
+ def test_gets_chomp_rs
+ rs = ":"
+ pipe(proc do |w|
+ w.print "aaa:bbb"
+ w.close
+ end, proc do |r|
+ assert_equal "aaa", r.gets(rs, chomp: true)
+ assert_equal "bbb", r.gets(rs, chomp: true)
+ assert_nil r.gets(rs, chomp: true)
+ r.close
+ end)
+ end
+
+ def test_gets_chomp_default_rs
+ pipe(proc do |w|
+ w.print "aaa\r\nbbb\nccc"
+ w.close
+ end, proc do |r|
+ assert_equal "aaa", r.gets(chomp: true)
+ assert_equal "bbb", r.gets(chomp: true)
+ assert_equal "ccc", r.gets(chomp: true)
+ assert_nil r.gets
+ r.close
+ end)
+ end
+
+ def test_gets_chomp_rs_nil
+ pipe(proc do |w|
+ w.print "a\n\nb\n\n"
+ w.close
+ end, proc do |r|
+ assert_equal "a\n\nb\n", r.gets(nil, chomp: true)
+ assert_nil r.gets("")
+ r.close
+ end)
+ end
+
+ def test_gets_chomp_paragraph
+ pipe(proc do |w|
+ w.print "a\n\nb\n\n"
+ w.close
+ end, proc do |r|
+ assert_equal "a", r.gets("", chomp: true)
+ assert_equal "b", r.gets("", chomp: true)
+ assert_nil r.gets("", chomp: true)
+ r.close
+ end)
+ end
+
def test_gets_limit_extra_arg
pipe(proc do |w|
w << "0123456789\n0123456789"