From d46e2aea718920f535c1dc701cbd6b85a8e1fcac Mon Sep 17 00:00:00 2001 From: naruse Date: Wed, 17 Feb 2016 03:21:35 +0000 Subject: * string.c (rb_str_init): introduce String.new(capacity: size) [Feature #12024] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53850 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 ++++ NEWS | 4 +++ string.c | 51 +++++++++++++++++++++++++++++++++----- test/-ext-/string/test_capacity.rb | 19 +++++++++++--- test/ruby/test_string.rb | 12 +++++++++ 5 files changed, 82 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 518723e4cc..b97adf723b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Wed Feb 17 12:14:59 2016 NARUSE, Yui + + * string.c (rb_str_init): introduce String.new(capacity: size) + [Feature #12024] + Tue Feb 16 19:10:08 2016 Martin Duerst * enc/unicode/case-folding.rb, casefold.h: Used only first element diff --git a/NEWS b/NEWS index fe1a4dde65..35d5bce51f 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,10 @@ with all sufficient information, see the ChangeLog file or Redmine * Dir.empty?. [Feature #10121] +* String + + * String.new(capacity: size) [Feature #12024] + === Stdlib updates (outstanding ones only) * CSV diff --git a/string.c b/string.c index 51751f89b7..3518cf5601 100644 --- a/string.c +++ b/string.c @@ -1350,33 +1350,72 @@ rb_str_resurrect(VALUE str) * call-seq: * String.new(str="") -> new_str * String.new(str="", encoding: enc) -> new_str + * String.new(str="", capacity: size) -> new_str * * Returns a new string object containing a copy of str. + * * The optional enc argument specifies the encoding of the new string. * If not specified, the encoding of str (or ASCII-8BIT, if str * is not specified) is used. + * + * The optional size argument specifies the size of internal buffer. + * This may improve performance, when the string will be concatenated many + * times (and call many realloc). */ static VALUE rb_str_init(int argc, VALUE *argv, VALUE str) { - static ID keyword_ids[1]; - VALUE orig, opt, enc; + static ID keyword_ids[2]; + VALUE orig, opt, enc, vcapa; + VALUE kwargs[2]; int n; - if (!keyword_ids[0]) + if (!keyword_ids[0]) { keyword_ids[0] = rb_id_encoding(); + CONST_ID(keyword_ids[1], "capacity"); + } n = rb_scan_args(argc, argv, "01:", &orig, &opt); - if (argc > 0 && n == 1) - rb_str_replace(str, orig); if (!NIL_P(opt)) { - rb_get_kwargs(opt, keyword_ids, 0, 1, &enc); + rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs); + enc = kwargs[0]; + vcapa = kwargs[1]; + if (vcapa != Qundef && !NIL_P(vcapa)) { + long capa = NUM2LONG(vcapa); + if (capa < STR_BUF_MIN_SIZE) { + capa = STR_BUF_MIN_SIZE; + } + if (n == 1) { + long len = RSTRING_LEN(orig); + if (capa < len) { + capa = len; + } + RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1); + memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), RSTRING_LEN(orig)); + RSTRING(str)->as.heap.len = len; + rb_enc_cr_str_exact_copy(str, orig); + } + else { + RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1); + RSTRING(str)->as.heap.ptr[0] = '\0'; + } + FL_SET(str, STR_NOEMBED); + RSTRING(str)->as.heap.aux.capa = capa; + } + else if (n == 1) { + StringValue(orig); + str_replace(str, orig); + } if (enc != Qundef && !NIL_P(enc)) { rb_enc_associate(str, rb_to_encoding(enc)); ENC_CODERANGE_CLEAR(str); } } + else if (n == 1) { + StringValue(orig); + str_replace(str, orig); + } return str; } diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb index 54f3caf32e..48d2deadee 100644 --- a/test/-ext-/string/test_capacity.rb +++ b/test/-ext-/string/test_capacity.rb @@ -4,16 +4,29 @@ require '-test-/string' require 'rbconfig/sizeof' class Test_StringCapacity < Test::Unit::TestCase + def capa(str) + Bug::String.capacity(str) + end + def test_capacity_embeded size = RbConfig::SIZEOF['void*'] * 3 - 1 - assert_equal size, Bug::String.capacity('foo') + assert_equal size, capa('foo') end def test_capacity_shared - assert_equal 0, Bug::String.capacity(:abcdefghijklmnopqrstuvwxyz.to_s) + assert_equal 0, capa(:abcdefghijklmnopqrstuvwxyz.to_s) end def test_capacity_normal - assert_equal 128, Bug::String.capacity('1'*128) + assert_equal 128, capa('1'*128) + end + + def test_s_new_capacity + assert_equal("", String.new(capacity: 1000)) + assert_equal(String, String.new(capacity: 1000).class) + assert_equal(10000, capa(String.new(capacity: 10000))) + + assert_equal("", String.new(capacity: -1000)) + assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000))) end end diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index f1633e3d7b..339e8728ab 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -45,6 +45,18 @@ class TestString < Test::Unit::TestCase src.force_encoding("euc-jp") assert_equal(src, S(src, encoding: "utf-8")) assert_equal(Encoding::UTF_8, S(src, encoding: "utf-8").encoding) + + assert_equal("", S(capacity: 1000)) + assert_equal(Encoding::ASCII_8BIT, S(capacity: 1000).encoding) + + assert_equal("", S(capacity: 1000, encoding: "euc-jp")) + assert_equal(Encoding::EUC_JP, S(capacity: 1000, encoding: "euc-jp").encoding) + + assert_equal("", S("", capacity: 1000)) + assert_equal(__ENCODING__, S("", capacity: 1000).encoding) + + assert_equal("", S("", capacity: 1000, encoding: "euc-jp")) + assert_equal(Encoding::EUC_JP, S("", capacity: 1000, encoding: "euc-jp").encoding) end def test_AREF # '[]' -- cgit v1.2.3