diff options
| -rw-r--r-- | string.c | 216 | ||||
| -rw-r--r-- | test/ruby/test_string.rb | 111 |
2 files changed, 301 insertions, 26 deletions
@@ -10184,6 +10184,22 @@ rb_str_chomp(int argc, VALUE *argv, VALUE str) return rb_str_subseq(str, 0, chompped_length(str, rs)); } +static void +tr_setup_table_multi(char table[TR_TABLE_SIZE], VALUE *tablep, VALUE *ctablep, + VALUE str, int num_selectors, VALUE *selectors) +{ + int i; + + for (i=0; i<num_selectors; i++) { + VALUE selector = selectors[i]; + rb_encoding *enc; + + StringValue(selector); + enc = rb_enc_check(str, selector); + tr_setup_table(selector, table, i==0, tablep, ctablep, enc); + } +} + static long lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) { @@ -10207,9 +10223,28 @@ lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) return s - start; } +static long +lstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc, + char table[TR_TABLE_SIZE], VALUE del, VALUE nodel) +{ + const char *const start = s; + + if (!s || s >= e) return 0; + + /* remove leading characters in the table */ + while (s < e) { + int n; + unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); + + if (!tr_find(cc, table, del, nodel)) break; + s += n; + } + return s - start; +} + /* * call-seq: - * lstrip! -> self or nil + * lstrip!(*selectors) -> self or nil * * Like String#lstrip, except that: * @@ -10220,16 +10255,28 @@ lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) */ static VALUE -rb_str_lstrip_bang(VALUE str) +rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start, *s; long olen, loffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + } + if (loffset > 0) { long len = olen-loffset; s = start + loffset; @@ -10244,7 +10291,7 @@ rb_str_lstrip_bang(VALUE str) /* * call-seq: - * lstrip -> new_string + * lstrip(*selectors) -> new_string * * Returns a copy of +self+ with leading whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10255,16 +10302,39 @@ rb_str_lstrip_bang(VALUE str) * s.lstrip * # => "abc\u0000\t\n\v\f\r " * + * If +selectors+ are given, removes characters of +selectors+ from the beginning of +self+: + * + * s = "---abc+++" + * s.lstrip("-") # => "abc+++" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".lstrip("0-9") # "abc56789" + * "01234abc56789".lstrip("0-9", "^4-6") # "4abc56789" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_lstrip(VALUE str) +rb_str_lstrip(int argc, VALUE *argv, VALUE str) { char *start; long len, loffset; + + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + RSTRING_GETMEM(str, start, len); - loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+len, STR_ENC_GET(str), table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); + } if (loffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, len - loffset); } @@ -10298,9 +10368,33 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) return e - t; } +static long +rstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc, + char table[TR_TABLE_SIZE], VALUE del, VALUE nodel) +{ + const char *t; + char *tp; + + rb_str_check_dummy_enc(enc); + if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncCompatError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + if (!s || s >= e) return 0; + t = e; + + /* remove trailing characters in the table */ + while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { + unsigned int c = rb_enc_codepoint(tp, e, enc); + if (!tr_find(c, table, del, nodel)) break; + t = tp; + } + + return e - t; +} + /* * call-seq: - * rstrip! -> self or nil + * rstrip!(*selectors) -> self or nil * * Like String#rstrip, except that: * @@ -10311,16 +10405,27 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) */ static VALUE -rb_str_rstrip_bang(VALUE str) +rb_str_rstrip_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start; long olen, roffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - roffset = rstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + roffset = rstrip_offset(str, start, start+olen, enc); + } if (roffset > 0) { long len = olen - roffset; @@ -10334,7 +10439,7 @@ rb_str_rstrip_bang(VALUE str) /* * call-seq: - * rstrip -> new_string + * rstrip(*selectors) -> new_string * * Returns a copy of +self+ with trailing whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10344,20 +10449,41 @@ rb_str_rstrip_bang(VALUE str) * s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r " * s.rstrip # => "\u0000\t\n\v\f\r abc" * + * If +selectors+ are given, removes characters of +selectors+ from the end of +self+: + * + * s = "---abc+++" + * s.rstrip("+") # => "---abc" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".rstrip("0-9") # "01234abc" + * "01234abc56789".rstrip("0-9", "^4-6") # "01234abc56" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_rstrip(VALUE str) +rb_str_rstrip(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start; long olen, roffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - roffset = rstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + roffset = rstrip_offset(str, start, start+olen, enc); + } if (roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, 0, olen-roffset); } @@ -10365,7 +10491,7 @@ rb_str_rstrip(VALUE str) /* * call-seq: - * strip! -> self or nil + * strip!(*selectors) -> self or nil * * Like String#strip, except that: * @@ -10376,17 +10502,30 @@ rb_str_rstrip(VALUE str) */ static VALUE -rb_str_strip_bang(VALUE str) +rb_str_strip_bang(int argc, VALUE *argv, VALUE str) { char *start; long olen, loffset, roffset; rb_encoding *enc; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); - roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + } if (loffset > 0 || roffset > 0) { long len = olen-roffset; @@ -10404,7 +10543,7 @@ rb_str_strip_bang(VALUE str) /* * call-seq: - * strip -> new_string + * strip(*selectors) -> new_string * * Returns a copy of +self+ with leading and trailing whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10414,19 +10553,44 @@ rb_str_strip_bang(VALUE str) * # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r " * s.strip # => "abc" * + * If +selectors+ are given, removes characters of +selectors+ from both ends of +self+: + * + * s = "---abc+++" + * s.strip("-+") # => "abc" + * s.strip("+-") # => "abc" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".strip("0-9") # "abc" + * "01234abc56789".strip("0-9", "^4-6") # "4abc56" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_strip(VALUE str) +rb_str_strip(int argc, VALUE *argv, VALUE str) { char *start; long olen, loffset, roffset; rb_encoding *enc = STR_ENC_GET(str); + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); - roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + } if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, olen-loffset-roffset); @@ -12714,9 +12878,9 @@ Init_String(void) rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); rb_define_method(rb_cString, "chop", rb_str_chop, 0); rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); - rb_define_method(rb_cString, "strip", rb_str_strip, 0); - rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0); - rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0); + rb_define_method(rb_cString, "strip", rb_str_strip, -1); + rb_define_method(rb_cString, "lstrip", rb_str_lstrip, -1); + rb_define_method(rb_cString, "rstrip", rb_str_rstrip, -1); rb_define_method(rb_cString, "delete_prefix", rb_str_delete_prefix, 1); rb_define_method(rb_cString, "delete_suffix", rb_str_delete_suffix, 1); @@ -12724,9 +12888,9 @@ Init_String(void) rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); - rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); - rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0); - rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, -1); + rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, -1); + rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, -1); rb_define_method(rb_cString, "delete_prefix!", rb_str_delete_prefix_bang, 1); rb_define_method(rb_cString, "delete_suffix!", rb_str_delete_suffix_bang, 1); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 1fe0629331..7227f903c7 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2049,6 +2049,117 @@ CODE assert_equal(S("x") ,a) end + def test_strip_with_selectors + assert_equal(S("abc"), S("---abc+++").strip("-+")) + assert_equal(S("abc"), S("+++abc---").strip("-+")) + assert_equal(S("abc"), S("+-+abc-+-").strip("-+")) + assert_equal(S(""), S("---+++").strip("-+")) + assert_equal(S("abc "), S("---abc ").strip("-")) + assert_equal(S(" abc"), S(" abc+++").strip("+")) + + # Test with multibyte characters + assert_equal(S("abc"), S("あああabcいいい").strip("あい")) + assert_equal(S("abc"), S("いいいabcあああ").strip("あい")) + + # Test with NUL characters + assert_equal(S("abc\0"), S("---abc\0--").strip("-")) + assert_equal(S("\0abc"), S("--\0abc---").strip("-")) + + # Test without modification + assert_equal(S("abc"), S("abc").strip("-+")) + assert_equal(S("abc"), S("abc").strip("")) + + # Test with range + assert_equal(S("abc"), S("012abc345").strip("0-9")) + assert_equal(S("abc"), S("012abc345").strip("^a-z")) + + # Test with multiple selectors + assert_equal(S("4abc56"), S("01234abc56789").strip("0-9", "^4-6")) + end + + def test_strip_bang_with_chars + a = S("---abc+++") + assert_equal(S("abc"), a.strip!("-+")) + assert_equal(S("abc"), a) + + a = S("+++abc---") + assert_equal(S("abc"), a.strip!("-+")) + assert_equal(S("abc"), a) + + a = S("abc") + assert_nil(a.strip!("-+")) + assert_equal(S("abc"), a) + + # Test with multibyte characters + a = S("あああabcいいい") + assert_equal(S("abc"), a.strip!("あい")) + assert_equal(S("abc"), a) + end + + def test_lstrip_with_selectors + assert_equal(S("abc+++"), S("---abc+++").lstrip("-")) + assert_equal(S("abc---"), S("+++abc---").lstrip("+")) + assert_equal(S("abc"), S("---abc").lstrip("-")) + assert_equal(S(""), S("---").lstrip("-")) + + # Test with multibyte characters + assert_equal(S("abcいいい"), S("あああabcいいい").lstrip("あ")) + + # Test with NUL characters + assert_equal(S("\0abc+++"), S("--\0abc+++").lstrip("-")) + + # Test without modification + assert_equal(S("abc"), S("abc").lstrip("-")) + + # Test with range + assert_equal(S("abc345"), S("012abc345").lstrip("0-9")) + + # Test with multiple selectors + assert_equal(S("4abc56789"), S("01234abc56789").lstrip("0-9", "^4-6")) + end + + def test_lstrip_bang_with_chars + a = S("---abc+++") + assert_equal(S("abc+++"), a.lstrip!("-")) + assert_equal(S("abc+++"), a) + + a = S("abc") + assert_nil(a.lstrip!("-")) + assert_equal(S("abc"), a) + end + + def test_rstrip_with_selectors + assert_equal(S("---abc"), S("---abc+++").rstrip("+")) + assert_equal(S("+++abc"), S("+++abc---").rstrip("-")) + assert_equal(S("abc"), S("abc+++").rstrip("+")) + assert_equal(S(""), S("+++").rstrip("+")) + + # Test with multibyte characters + assert_equal(S("あああabc"), S("あああabcいいい").rstrip("い")) + + # Test with NUL characters + assert_equal(S("---abc\0"), S("---abc\0++").rstrip("+")) + + # Test without modification + assert_equal(S("abc"), S("abc").rstrip("-")) + + # Test with range + assert_equal(S("012abc"), S("012abc345").rstrip("0-9")) + + # Test with multiple selectors + assert_equal(S("01234abc56"), S("01234abc56789").rstrip("0-9", "^4-6")) + end + + def test_rstrip_bang_with_chars + a = S("---abc+++") + assert_equal(S("---abc"), a.rstrip!("+")) + assert_equal(S("---abc"), a) + + a = S("abc") + assert_nil(a.rstrip!("+")) + assert_equal(S("abc"), a) + end + def test_sub assert_equal(S("h*llo"), S("hello").sub(/[aeiou]/, S('*'))) assert_equal(S("h<e>llo"), S("hello").sub(/([aeiou])/, S('<\1>'))) |
