summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-17 03:21:35 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-17 03:21:35 +0000
commitd46e2aea718920f535c1dc701cbd6b85a8e1fcac (patch)
tree3921a08d4b5ab5f3334abf34b3e4670f7486107f
parenta8f43986020ff906a99ba453521ab7c8b3f7e0d2 (diff)
* string.c (rb_str_init): introduce String.new(capacity: size)
[Feature #12024] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53850 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--NEWS4
-rw-r--r--string.c51
-rw-r--r--test/-ext-/string/test_capacity.rb19
-rw-r--r--test/ruby/test_string.rb12
5 files changed, 82 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index 518723e..b97adf7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Wed Feb 17 12:14:59 2016 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (rb_str_init): introduce String.new(capacity: size)
+ [Feature #12024]
+
Tue Feb 16 19:10:08 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/unicode/case-folding.rb, casefold.h: Used only first element
diff --git a/NEWS b/NEWS
index fe1a4dd..35d5bce 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,10 @@ with all sufficient information, see the ChangeLog file or Redmine
* Dir.empty?. [Feature #10121]
+* String
+
+ * String.new(capacity: size) [Feature #12024]
+
=== Stdlib updates (outstanding ones only)
* CSV
diff --git a/string.c b/string.c
index 51751f8..3518cf5 100644
--- a/string.c
+++ b/string.c
@@ -1350,33 +1350,72 @@ rb_str_resurrect(VALUE str)
* call-seq:
* String.new(str="") -> new_str
* String.new(str="", encoding: enc) -> new_str
+ * String.new(str="", capacity: size) -> new_str
*
* Returns a new string object containing a copy of <i>str</i>.
+ *
* The optional <i>enc</i> argument specifies the encoding of the new string.
* If not specified, the encoding of <i>str</i> (or ASCII-8BIT, if <i>str</i>
* is not specified) is used.
+ *
+ * The optional <i>size</i> argument specifies the size of internal buffer.
+ * This may improve performance, when the string will be concatenated many
+ * times (and call many realloc).
*/
static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
- static ID keyword_ids[1];
- VALUE orig, opt, enc;
+ static ID keyword_ids[2];
+ VALUE orig, opt, enc, vcapa;
+ VALUE kwargs[2];
int n;
- if (!keyword_ids[0])
+ if (!keyword_ids[0]) {
keyword_ids[0] = rb_id_encoding();
+ CONST_ID(keyword_ids[1], "capacity");
+ }
n = rb_scan_args(argc, argv, "01:", &orig, &opt);
- if (argc > 0 && n == 1)
- rb_str_replace(str, orig);
if (!NIL_P(opt)) {
- rb_get_kwargs(opt, keyword_ids, 0, 1, &enc);
+ rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
+ enc = kwargs[0];
+ vcapa = kwargs[1];
+ if (vcapa != Qundef && !NIL_P(vcapa)) {
+ long capa = NUM2LONG(vcapa);
+ if (capa < STR_BUF_MIN_SIZE) {
+ capa = STR_BUF_MIN_SIZE;
+ }
+ if (n == 1) {
+ long len = RSTRING_LEN(orig);
+ if (capa < len) {
+ capa = len;
+ }
+ RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
+ memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), RSTRING_LEN(orig));
+ RSTRING(str)->as.heap.len = len;
+ rb_enc_cr_str_exact_copy(str, orig);
+ }
+ else {
+ RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
+ RSTRING(str)->as.heap.ptr[0] = '\0';
+ }
+ FL_SET(str, STR_NOEMBED);
+ RSTRING(str)->as.heap.aux.capa = capa;
+ }
+ else if (n == 1) {
+ StringValue(orig);
+ str_replace(str, orig);
+ }
if (enc != Qundef && !NIL_P(enc)) {
rb_enc_associate(str, rb_to_encoding(enc));
ENC_CODERANGE_CLEAR(str);
}
}
+ else if (n == 1) {
+ StringValue(orig);
+ str_replace(str, orig);
+ }
return str;
}
diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb
index 54f3caf..48d2dea 100644
--- a/test/-ext-/string/test_capacity.rb
+++ b/test/-ext-/string/test_capacity.rb
@@ -4,16 +4,29 @@ require '-test-/string'
require 'rbconfig/sizeof'
class Test_StringCapacity < Test::Unit::TestCase
+ def capa(str)
+ Bug::String.capacity(str)
+ end
+
def test_capacity_embeded
size = RbConfig::SIZEOF['void*'] * 3 - 1
- assert_equal size, Bug::String.capacity('foo')
+ assert_equal size, capa('foo')
end
def test_capacity_shared
- assert_equal 0, Bug::String.capacity(:abcdefghijklmnopqrstuvwxyz.to_s)
+ assert_equal 0, capa(:abcdefghijklmnopqrstuvwxyz.to_s)
end
def test_capacity_normal
- assert_equal 128, Bug::String.capacity('1'*128)
+ assert_equal 128, capa('1'*128)
+ end
+
+ def test_s_new_capacity
+ assert_equal("", String.new(capacity: 1000))
+ assert_equal(String, String.new(capacity: 1000).class)
+ assert_equal(10000, capa(String.new(capacity: 10000)))
+
+ assert_equal("", String.new(capacity: -1000))
+ assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000)))
end
end
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index f1633e3..339e872 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -45,6 +45,18 @@ class TestString < Test::Unit::TestCase
src.force_encoding("euc-jp")
assert_equal(src, S(src, encoding: "utf-8"))
assert_equal(Encoding::UTF_8, S(src, encoding: "utf-8").encoding)
+
+ assert_equal("", S(capacity: 1000))
+ assert_equal(Encoding::ASCII_8BIT, S(capacity: 1000).encoding)
+
+ assert_equal("", S(capacity: 1000, encoding: "euc-jp"))
+ assert_equal(Encoding::EUC_JP, S(capacity: 1000, encoding: "euc-jp").encoding)
+
+ assert_equal("", S("", capacity: 1000))
+ assert_equal(__ENCODING__, S("", capacity: 1000).encoding)
+
+ assert_equal("", S("", capacity: 1000, encoding: "euc-jp"))
+ assert_equal(Encoding::EUC_JP, S("", capacity: 1000, encoding: "euc-jp").encoding)
end
def test_AREF # '[]'