summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-12-10 07:48:17 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-12-10 07:48:17 +0000
commit516b9026fd36801deb95f4cf5d0a2fb19527301a (patch)
tree163402f4d2af886740304bd992de583252d95d77
parentd5b5338421c1a9c4128fad839590fcbb6fcdea70 (diff)
string.c: chompped_length chomp_rs
* string.c (chompped_length, chomp_rs): extract from rb_str_chomp_bang to share with rb_str_chomp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48756 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--string.c194
1 files changed, 113 insertions, 81 deletions
diff --git a/string.c b/string.c
index 4d82d9ba87..6a3755ea49 100644
--- a/string.c
+++ b/string.c
@@ -7044,110 +7044,142 @@ rb_str_chop(VALUE str)
}
-/*
- * call-seq:
- * str.chomp!(separator=$/) -> str or nil
- *
- * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
- * returning <i>str</i>, or <code>nil</code> if no modifications were made.
- */
-
-static VALUE
-rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
+static long
+chompped_length(VALUE str, VALUE rs)
{
rb_encoding *enc;
- VALUE rs;
int newline;
- char *p, *pp, *e;
- long len, rslen;
+ char *pp, *e, *rsptr;
+ long rslen;
+ char *const p = RSTRING_PTR(str);
+ long len = RSTRING_LEN(str);
- str_modify_keep_cr(str);
- len = RSTRING_LEN(str);
- if (len == 0) return Qnil;
- p = RSTRING_PTR(str);
+ if (len == 0) return 0;
e = p + len;
- if (argc == 0) {
- rs = rb_rs;
- if (rs == rb_default_rs) {
- smart_chomp:
- enc = rb_enc_get(str);
- if (rb_enc_mbminlen(enc) > 1) {
- pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
- if (rb_enc_is_newline(pp, e, enc)) {
+ enc = rb_enc_get(str);
+ if (rs == rb_default_rs) {
+ smart_chomp:
+ if (rb_enc_mbminlen(enc) > 1) {
+ pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
+ if (rb_enc_is_newline(pp, e, enc)) {
+ e = pp;
+ }
+ pp = e - rb_enc_mbminlen(enc);
+ if (pp >= p) {
+ pp = rb_enc_left_char_head(p, pp, e, enc);
+ if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
e = pp;
}
- pp = e - rb_enc_mbminlen(enc);
+ }
+ }
+ else {
+ switch (*(e-1)) { /* not e[-1] to get rid of VC bug */
+ case '\n':
+ if (--e > p && *(e-1) == '\r') {
+ --e;
+ }
+ break;
+ case '\r':
+ --e;
+ break;
+ }
+ }
+ return e - p;
+ }
+
+ RSTRING_GETMEM(rs, rsptr, rslen);
+ if (rslen == 0) {
+ if (rb_enc_mbminlen(enc) > 1) {
+ while (e > p) {
+ pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
+ if (!rb_enc_is_newline(pp, e, enc)) break;
+ e = pp;
+ pp -= rb_enc_mbminlen(enc);
if (pp >= p) {
pp = rb_enc_left_char_head(p, pp, e, enc);
if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
e = pp;
}
}
- if (e == RSTRING_END(str)) {
- return Qnil;
- }
- len = e - RSTRING_PTR(str);
- STR_SET_LEN(str, len);
}
- else {
- if (RSTRING_PTR(str)[len-1] == '\n') {
- STR_DEC_LEN(str);
- if (RSTRING_LEN(str) > 0 &&
- RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
- STR_DEC_LEN(str);
- }
- }
- else if (RSTRING_PTR(str)[len-1] == '\r') {
- STR_DEC_LEN(str);
- }
- else {
- return Qnil;
- }
+ }
+ else {
+ while (e > p && *(e-1) == '\n') {
+ --e;
+ if (e > p && *(e-1) == '\r')
+ --e;
}
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
- return str;
}
+ return e - p;
}
- else {
- rb_scan_args(argc, argv, "01", &rs);
+ if (rslen > len) return len;
+
+ enc = rb_enc_check(str, rs);
+ if (is_broken_string(rs)) {
+ return len;
}
- if (NIL_P(rs)) return Qnil;
- StringValue(rs);
- rslen = RSTRING_LEN(rs);
- if (rslen == 0) {
- while (len>0 && p[len-1] == '\n') {
- len--;
- if (len>0 && p[len-1] == '\r')
- len--;
+ newline = rsptr[rslen-1];
+ if (rslen == rb_enc_mbminlen(enc)) {
+ if (rslen == 1) {
+ if (newline == '\n')
+ goto smart_chomp;
}
- if (len < RSTRING_LEN(str)) {
- STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
- return str;
+ else {
+ if (rb_enc_is_newline(rsptr, rsptr+rslen, enc))
+ goto smart_chomp;
}
- return Qnil;
}
- if (rslen > len) return Qnil;
- newline = RSTRING_PTR(rs)[rslen-1];
- if (rslen == 1 && newline == '\n')
- goto smart_chomp;
- enc = rb_enc_check(str, rs);
- if (is_broken_string(rs)) {
- return Qnil;
- }
pp = e - rslen;
if (p[len-1] == newline &&
(rslen <= 1 ||
- memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
- if (rb_enc_left_char_head(p, pp, e, enc) != pp)
- return Qnil;
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
+ memcmp(rsptr, pp, rslen) == 0)) {
+ if (rb_enc_left_char_head(p, pp, e, enc) == pp)
+ return len - rslen;
+ RB_GC_GUARD(rs);
+ }
+ return len;
+}
+
+static VALUE
+chomp_rs(int argc, const VALUE *argv)
+{
+ rb_check_arity(argc, 0, 1);
+ if (argc > 0) {
+ VALUE rs = argv[0];
+ if (!NIL_P(rs)) StringValue(rs);
+ return rs;
+ }
+ else {
+ return rb_rs;
+ }
+}
+
+/*
+ * call-seq:
+ * str.chomp!(separator=$/) -> str or nil
+ *
+ * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
+ * returning <i>str</i>, or <code>nil</code> if no modifications were made.
+ */
+
+static VALUE
+rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
+{
+ VALUE rs;
+ long olen;
+ str_modify_keep_cr(str);
+ if ((olen = RSTRING_LEN(str)) > 0 && !NIL_P(rs = chomp_rs(argc, argv))) {
+ long len;
+ len = chompped_length(str, rs);
+ if (len < olen) {
+ STR_SET_LEN(str, len);
+ RSTRING_PTR(str)[len] = '\0';
+ if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
+ ENC_CODERANGE_CLEAR(str);
+ }
+ return str;
}
- STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
- return str;
}
return Qnil;
}
@@ -7178,9 +7210,9 @@ rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
- str = rb_str_dup(str);
- rb_str_chomp_bang(argc, argv, str);
- return str;
+ VALUE rs = chomp_rs(argc, argv);
+ if (NIL_P(rs)) return rb_str_dup(str);
+ return rb_str_subseq(str, 0, chompped_length(str, rs));
}
static long