summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--string.c216
-rw-r--r--test/ruby/test_string.rb111
2 files changed, 301 insertions, 26 deletions
diff --git a/string.c b/string.c
index 52d1f28cc1..51f721c41e 100644
--- a/string.c
+++ b/string.c
@@ -10184,6 +10184,22 @@ rb_str_chomp(int argc, VALUE *argv, VALUE str)
return rb_str_subseq(str, 0, chompped_length(str, rs));
}
+static void
+tr_setup_table_multi(char table[TR_TABLE_SIZE], VALUE *tablep, VALUE *ctablep,
+ VALUE str, int num_selectors, VALUE *selectors)
+{
+ int i;
+
+ for (i=0; i<num_selectors; i++) {
+ VALUE selector = selectors[i];
+ rb_encoding *enc;
+
+ StringValue(selector);
+ enc = rb_enc_check(str, selector);
+ tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
+ }
+}
+
static long
lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
{
@@ -10207,9 +10223,28 @@ lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
return s - start;
}
+static long
+lstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc,
+ char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
+{
+ const char *const start = s;
+
+ if (!s || s >= e) return 0;
+
+ /* remove leading characters in the table */
+ while (s < e) {
+ int n;
+ unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
+
+ if (!tr_find(cc, table, del, nodel)) break;
+ s += n;
+ }
+ return s - start;
+}
+
/*
* call-seq:
- * lstrip! -> self or nil
+ * lstrip!(*selectors) -> self or nil
*
* Like String#lstrip, except that:
*
@@ -10220,16 +10255,28 @@ lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
*/
static VALUE
-rb_str_lstrip_bang(VALUE str)
+rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
char *start, *s;
long olen, loffset;
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
- loffset = lstrip_offset(str, start, start+olen, enc);
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
+ }
+ else {
+ loffset = lstrip_offset(str, start, start+olen, enc);
+ }
+
if (loffset > 0) {
long len = olen-loffset;
s = start + loffset;
@@ -10244,7 +10291,7 @@ rb_str_lstrip_bang(VALUE str)
/*
* call-seq:
- * lstrip -> new_string
+ * lstrip(*selectors) -> new_string
*
* Returns a copy of +self+ with leading whitespace removed;
* see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]:
@@ -10255,16 +10302,39 @@ rb_str_lstrip_bang(VALUE str)
* s.lstrip
* # => "abc\u0000\t\n\v\f\r "
*
+ * If +selectors+ are given, removes characters of +selectors+ from the beginning of +self+:
+ *
+ * s = "---abc+++"
+ * s.lstrip("-") # => "abc+++"
+ *
+ * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]),
+ * and may use any of its valid forms, including negation, ranges, and escapes:
+ *
+ * "01234abc56789".lstrip("0-9") # "abc56789"
+ * "01234abc56789".lstrip("0-9", "^4-6") # "4abc56789"
+ *
* Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
*/
static VALUE
-rb_str_lstrip(VALUE str)
+rb_str_lstrip(int argc, VALUE *argv, VALUE str)
{
char *start;
long len, loffset;
+
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
RSTRING_GETMEM(str, start, len);
- loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ loffset = lstrip_offset_table(str, start, start+len, STR_ENC_GET(str), table, del, nodel);
+ }
+ else {
+ loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
+ }
if (loffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, len - loffset);
}
@@ -10298,9 +10368,33 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
return e - t;
}
+static long
+rstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc,
+ char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
+{
+ const char *t;
+ char *tp;
+
+ rb_str_check_dummy_enc(enc);
+ if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
+ rb_raise(rb_eEncCompatError, "invalid byte sequence in %s", rb_enc_name(enc));
+ }
+ if (!s || s >= e) return 0;
+ t = e;
+
+ /* remove trailing characters in the table */
+ while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
+ unsigned int c = rb_enc_codepoint(tp, e, enc);
+ if (!tr_find(c, table, del, nodel)) break;
+ t = tp;
+ }
+
+ return e - t;
+}
+
/*
* call-seq:
- * rstrip! -> self or nil
+ * rstrip!(*selectors) -> self or nil
*
* Like String#rstrip, except that:
*
@@ -10311,16 +10405,27 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
*/
static VALUE
-rb_str_rstrip_bang(VALUE str)
+rb_str_rstrip_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
- roffset = rstrip_offset(str, start, start+olen, enc);
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
+ }
+ else {
+ roffset = rstrip_offset(str, start, start+olen, enc);
+ }
if (roffset > 0) {
long len = olen - roffset;
@@ -10334,7 +10439,7 @@ rb_str_rstrip_bang(VALUE str)
/*
* call-seq:
- * rstrip -> new_string
+ * rstrip(*selectors) -> new_string
*
* Returns a copy of +self+ with trailing whitespace removed;
* see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]:
@@ -10344,20 +10449,41 @@ rb_str_rstrip_bang(VALUE str)
* s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
* s.rstrip # => "\u0000\t\n\v\f\r abc"
*
+ * If +selectors+ are given, removes characters of +selectors+ from the end of +self+:
+ *
+ * s = "---abc+++"
+ * s.rstrip("+") # => "---abc"
+ *
+ * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]),
+ * and may use any of its valid forms, including negation, ranges, and escapes:
+ *
+ * "01234abc56789".rstrip("0-9") # "01234abc"
+ * "01234abc56789".rstrip("0-9", "^4-6") # "01234abc56"
+ *
* Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
*/
static VALUE
-rb_str_rstrip(VALUE str)
+rb_str_rstrip(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
char *start;
long olen, roffset;
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
- roffset = rstrip_offset(str, start, start+olen, enc);
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
+ }
+ else {
+ roffset = rstrip_offset(str, start, start+olen, enc);
+ }
if (roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, 0, olen-roffset);
}
@@ -10365,7 +10491,7 @@ rb_str_rstrip(VALUE str)
/*
* call-seq:
- * strip! -> self or nil
+ * strip!(*selectors) -> self or nil
*
* Like String#strip, except that:
*
@@ -10376,17 +10502,30 @@ rb_str_rstrip(VALUE str)
*/
static VALUE
-rb_str_strip_bang(VALUE str)
+rb_str_strip_bang(int argc, VALUE *argv, VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc;
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
RSTRING_GETMEM(str, start, olen);
- loffset = lstrip_offset(str, start, start+olen, enc);
- roffset = rstrip_offset(str, start+loffset, start+olen, enc);
+
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
+ roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
+ }
+ else {
+ loffset = lstrip_offset(str, start, start+olen, enc);
+ roffset = rstrip_offset(str, start+loffset, start+olen, enc);
+ }
if (loffset > 0 || roffset > 0) {
long len = olen-roffset;
@@ -10404,7 +10543,7 @@ rb_str_strip_bang(VALUE str)
/*
* call-seq:
- * strip -> new_string
+ * strip(*selectors) -> new_string
*
* Returns a copy of +self+ with leading and trailing whitespace removed;
* see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]:
@@ -10414,19 +10553,44 @@ rb_str_strip_bang(VALUE str)
* # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r "
* s.strip # => "abc"
*
+ * If +selectors+ are given, removes characters of +selectors+ from both ends of +self+:
+ *
+ * s = "---abc+++"
+ * s.strip("-+") # => "abc"
+ * s.strip("+-") # => "abc"
+ *
+ * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]),
+ * and may use any of its valid forms, including negation, ranges, and escapes:
+ *
+ * "01234abc56789".strip("0-9") # "abc"
+ * "01234abc56789".strip("0-9", "^4-6") # "4abc56"
+ *
* Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
*/
static VALUE
-rb_str_strip(VALUE str)
+rb_str_strip(int argc, VALUE *argv, VALUE str)
{
char *start;
long olen, loffset, roffset;
rb_encoding *enc = STR_ENC_GET(str);
+ rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS);
+
RSTRING_GETMEM(str, start, olen);
- loffset = lstrip_offset(str, start, start+olen, enc);
- roffset = rstrip_offset(str, start+loffset, start+olen, enc);
+
+ if (argc > 0) {
+ char table[TR_TABLE_SIZE];
+ VALUE del = 0, nodel = 0;
+
+ tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
+ loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
+ roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
+ }
+ else {
+ loffset = lstrip_offset(str, start, start+olen, enc);
+ roffset = rstrip_offset(str, start+loffset, start+olen, enc);
+ }
if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str);
return rb_str_subseq(str, loffset, olen-loffset-roffset);
@@ -12714,9 +12878,9 @@ Init_String(void)
rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
rb_define_method(rb_cString, "chop", rb_str_chop, 0);
rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
- rb_define_method(rb_cString, "strip", rb_str_strip, 0);
- rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
- rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
+ rb_define_method(rb_cString, "strip", rb_str_strip, -1);
+ rb_define_method(rb_cString, "lstrip", rb_str_lstrip, -1);
+ rb_define_method(rb_cString, "rstrip", rb_str_rstrip, -1);
rb_define_method(rb_cString, "delete_prefix", rb_str_delete_prefix, 1);
rb_define_method(rb_cString, "delete_suffix", rb_str_delete_suffix, 1);
@@ -12724,9 +12888,9 @@ Init_String(void)
rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
- rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
- rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
- rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
+ rb_define_method(rb_cString, "strip!", rb_str_strip_bang, -1);
+ rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, -1);
+ rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, -1);
rb_define_method(rb_cString, "delete_prefix!", rb_str_delete_prefix_bang, 1);
rb_define_method(rb_cString, "delete_suffix!", rb_str_delete_suffix_bang, 1);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 1fe0629331..7227f903c7 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2049,6 +2049,117 @@ CODE
assert_equal(S("x") ,a)
end
+ def test_strip_with_selectors
+ assert_equal(S("abc"), S("---abc+++").strip("-+"))
+ assert_equal(S("abc"), S("+++abc---").strip("-+"))
+ assert_equal(S("abc"), S("+-+abc-+-").strip("-+"))
+ assert_equal(S(""), S("---+++").strip("-+"))
+ assert_equal(S("abc "), S("---abc ").strip("-"))
+ assert_equal(S(" abc"), S(" abc+++").strip("+"))
+
+ # Test with multibyte characters
+ assert_equal(S("abc"), S("あああabcいいい").strip("あい"))
+ assert_equal(S("abc"), S("いいいabcあああ").strip("あい"))
+
+ # Test with NUL characters
+ assert_equal(S("abc\0"), S("---abc\0--").strip("-"))
+ assert_equal(S("\0abc"), S("--\0abc---").strip("-"))
+
+ # Test without modification
+ assert_equal(S("abc"), S("abc").strip("-+"))
+ assert_equal(S("abc"), S("abc").strip(""))
+
+ # Test with range
+ assert_equal(S("abc"), S("012abc345").strip("0-9"))
+ assert_equal(S("abc"), S("012abc345").strip("^a-z"))
+
+ # Test with multiple selectors
+ assert_equal(S("4abc56"), S("01234abc56789").strip("0-9", "^4-6"))
+ end
+
+ def test_strip_bang_with_chars
+ a = S("---abc+++")
+ assert_equal(S("abc"), a.strip!("-+"))
+ assert_equal(S("abc"), a)
+
+ a = S("+++abc---")
+ assert_equal(S("abc"), a.strip!("-+"))
+ assert_equal(S("abc"), a)
+
+ a = S("abc")
+ assert_nil(a.strip!("-+"))
+ assert_equal(S("abc"), a)
+
+ # Test with multibyte characters
+ a = S("あああabcいいい")
+ assert_equal(S("abc"), a.strip!("あい"))
+ assert_equal(S("abc"), a)
+ end
+
+ def test_lstrip_with_selectors
+ assert_equal(S("abc+++"), S("---abc+++").lstrip("-"))
+ assert_equal(S("abc---"), S("+++abc---").lstrip("+"))
+ assert_equal(S("abc"), S("---abc").lstrip("-"))
+ assert_equal(S(""), S("---").lstrip("-"))
+
+ # Test with multibyte characters
+ assert_equal(S("abcいいい"), S("あああabcいいい").lstrip("あ"))
+
+ # Test with NUL characters
+ assert_equal(S("\0abc+++"), S("--\0abc+++").lstrip("-"))
+
+ # Test without modification
+ assert_equal(S("abc"), S("abc").lstrip("-"))
+
+ # Test with range
+ assert_equal(S("abc345"), S("012abc345").lstrip("0-9"))
+
+ # Test with multiple selectors
+ assert_equal(S("4abc56789"), S("01234abc56789").lstrip("0-9", "^4-6"))
+ end
+
+ def test_lstrip_bang_with_chars
+ a = S("---abc+++")
+ assert_equal(S("abc+++"), a.lstrip!("-"))
+ assert_equal(S("abc+++"), a)
+
+ a = S("abc")
+ assert_nil(a.lstrip!("-"))
+ assert_equal(S("abc"), a)
+ end
+
+ def test_rstrip_with_selectors
+ assert_equal(S("---abc"), S("---abc+++").rstrip("+"))
+ assert_equal(S("+++abc"), S("+++abc---").rstrip("-"))
+ assert_equal(S("abc"), S("abc+++").rstrip("+"))
+ assert_equal(S(""), S("+++").rstrip("+"))
+
+ # Test with multibyte characters
+ assert_equal(S("あああabc"), S("あああabcいいい").rstrip("い"))
+
+ # Test with NUL characters
+ assert_equal(S("---abc\0"), S("---abc\0++").rstrip("+"))
+
+ # Test without modification
+ assert_equal(S("abc"), S("abc").rstrip("-"))
+
+ # Test with range
+ assert_equal(S("012abc"), S("012abc345").rstrip("0-9"))
+
+ # Test with multiple selectors
+ assert_equal(S("01234abc56"), S("01234abc56789").rstrip("0-9", "^4-6"))
+ end
+
+ def test_rstrip_bang_with_chars
+ a = S("---abc+++")
+ assert_equal(S("---abc"), a.rstrip!("+"))
+ assert_equal(S("---abc"), a)
+
+ a = S("abc")
+ assert_nil(a.rstrip!("+"))
+ assert_equal(S("abc"), a)
+ end
+
def test_sub
assert_equal(S("h*llo"), S("hello").sub(/[aeiou]/, S('*')))
assert_equal(S("h<e>llo"), S("hello").sub(/([aeiou])/, S('<\1>')))