1 files changed, 244 insertions, 103 deletions
diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb
index 1a4128f828..3c6d1864d1 100644
--- a/spec/ruby/core/string/split_spec.rb
+++ b/spec/ruby/core/string/split_spec.rb
@@ -1,21 +1,24 @@
 # -*- encoding: utf-8 -*-
-require File.expand_path('../../../spec_helper', __FILE__)
-require File.expand_path('../fixtures/classes.rb', __FILE__)
+require_relative '../../spec_helper'
+require_relative 'fixtures/classes'
 
 describe "String#split with String" do
-  with_feature :encoding do
-    it "throws an ArgumentError if the pattern is not a valid string" do
-      str = 'проверка'
-      broken_str = 'проверка'
-      broken_str.force_encoding('binary')
-      broken_str.chop!
-      broken_str.force_encoding('utf-8')
-      lambda { str.split(broken_str) }.should raise_error(ArgumentError)
-    end
+  it "throws an ArgumentError if the string  is not a valid" do
+    s = "\xDF".dup.force_encoding(Encoding::UTF_8)
 
-    it "splits on multibyte characters" do
-      "ありがりがとう".split("が").should == ["あり", "り", "とう"]
-    end
+    -> { s.split }.should raise_error(ArgumentError)
+    -> { s.split(':') }.should raise_error(ArgumentError)
+  end
+
+  it "throws an ArgumentError if the pattern is not a valid string" do
+    str = 'проверка'
+    broken_str = "\xDF".dup.force_encoding(Encoding::UTF_8)
+
+    -> { str.split(broken_str) }.should raise_error(ArgumentError)
+  end
+
+  it "splits on multibyte characters" do
+    "ありがりがとう".split("が").should == ["あり", "り", "とう"]
   end
 
   it "returns an array of substrings based on splitting on the given string" do
@@ -26,9 +29,35 @@ describe "String#split with String" do
     "1,2,,3,4,,".split(',').should == ["1", "2", "", "3", "4"]
     "1,2,,3,4,,".split(',', 0).should == ["1", "2", "", "3", "4"]
     "  a  b  c\nd  ".split("  ").should == ["", "a", "b", "c\nd"]
+    "  a  あ  c\nd  ".split("  ").should == ["", "a", "あ", "c\nd"]
     "hai".split("hai").should == []
     ",".split(",").should == []
     ",".split(",", 0).should == []
+    "あ".split("あ").should == []
+    "あ".split("あ", 0).should == []
+  end
+
+  it "does not suppress trailing empty fields when a positive limit is given" do
+    " 1 2 ".split(" ", 2).should == ["1", "2 "]
+    " 1 2 ".split(" ", 3).should == ["1", "2", ""]
+    " 1 2 ".split(" ", 4).should == ["1", "2", ""]
+    " 1 あ ".split(" ", 2).should == ["1", "あ "]
+    " 1 あ ".split(" ", 3).should == ["1", "あ", ""]
+    " 1 あ ".split(" ", 4).should == ["1", "あ", ""]
+
+    "1,2,".split(',', 2).should == ["1", "2,"]
+    "1,2,".split(',', 3).should == ["1", "2", ""]
+    "1,2,".split(',', 4).should == ["1", "2", ""]
+    "1,あ,".split(',', 2).should == ["1", "あ,"]
+    "1,あ,".split(',', 3).should == ["1", "あ", ""]
+    "1,あ,".split(',', 4).should == ["1", "あ", ""]
+
+    "1 2 ".split(/ /, 2).should == ["1", "2 "]
+    "1 2 ".split(/ /, 3).should == ["1", "2", ""]
+    "1 2 ".split(/ /, 4).should == ["1", "2", ""]
+    "1 あ ".split(/ /, 2).should == ["1", "あ "]
+    "1 あ ".split(/ /, 3).should == ["1", "あ", ""]
+    "1 あ ".split(/ /, 4).should == ["1", "あ", ""]
   end
 
   it "returns an array with one entry if limit is 1: the original string" do
@@ -64,26 +93,47 @@ describe "String#split with String" do
     ",".split(",", -1).should == ["", ""]
   end
 
+  it "raises a RangeError when the limit is larger than int" do
+    -> { "a,b".split(" ", 2147483649) }.should raise_error(RangeError)
+  end
+
   it "defaults to $; when string isn't given or nil" do
-    begin
+    suppress_warning do
       old_fs = $;
+      begin
+        [",", ":", "", "XY", nil].each do |fs|
+          $; = fs
 
-      [",", ":", "", "XY", nil].each do |fs|
-        $; = fs
+          ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
+            expected = str.split(fs || " ")
 
-        ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
-          expected = str.split(fs || " ")
+            str.split(nil).should == expected
+            str.split.should == expected
 
-          str.split(nil).should == expected
-          str.split.should == expected
+            str.split(nil, -1).should == str.split(fs || " ", -1)
+            str.split(nil, 0).should == str.split(fs || " ", 0)
+            str.split(nil, 2).should == str.split(fs || " ", 2)
+          end
+        end
+      ensure
+        $; = old_fs
+      end
+    end
 
-          str.split(nil, -1).should == str.split(fs || " ", -1)
-          str.split(nil, 0).should == str.split(fs || " ", 0)
-          str.split(nil, 2).should == str.split(fs || " ", 2)
+    context "when $; is not nil" do
+      before do
+        suppress_warning do
+          @old_value, $; = $;, 'foobar'
         end
       end
-    ensure
-      $; = old_fs
+
+      after do
+        $; = @old_value
+      end
+
+      it "warns" do
+        -> { "".split }.should complain(/warning: \$; is set to non-nil value/)
+      end
     end
   end
 
@@ -142,12 +192,12 @@ describe "String#split with String" do
     "foo".split("bar", 3).should == ["foo"]
   end
 
-  it "returns subclass instances based on self" do
+  it "returns String instances based on self" do
     ["", "x.y.z.", "  x  y  "].each do |str|
       ["", ".", " "].each do |pat|
         [-1, 0, 1, 2].each do |limit|
           StringSpecs::MyString.new(str).split(pat, limit).each do |x|
-            x.should be_an_instance_of(StringSpecs::MyString)
+            x.should be_an_instance_of(String)
           end
 
           str.split(StringSpecs::MyString.new(pat), limit).each do |x|
@@ -158,32 +208,32 @@ describe "String#split with String" do
     end
   end
 
-  it "does not call constructor on created subclass instances" do
-    # can't call should_not_receive on an object that doesn't yet exist
-    # so failure here is signalled by exception, not expectation failure
+  it "returns an empty array when whitespace is split on whitespace" do
+    " ".split(" ").should == []
+    " \n ".split(" ").should == []
+    "  ".split(" ").should == []
+    " \t ".split(" ").should == []
+  end
 
-    s = StringSpecs::StringWithRaisingConstructor.new('silly:string')
-    s.split(':').first.should == 'silly'
+  it "doesn't split on non-ascii whitespace" do
+    "a\u{2008}b".split(" ").should == ["a\u{2008}b"]
   end
 
-  it "taints the resulting strings if self is tainted" do
-    ["", "x.y.z.", "  x  y  "].each do |str|
-      ["", ".", " "].each do |pat|
-        [-1, 0, 1, 2].each do |limit|
-          str.dup.taint.split(pat).each do |x|
-            x.tainted?.should == true
-          end
+  it "returns Strings in the same encoding as self" do
+    strings = "hello world".encode("US-ASCII").split(" ")
 
-          str.split(pat.dup.taint).each do |x|
-            x.tainted?.should == false
-          end
-        end
-      end
-    end
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[1].encoding.should == Encoding::US_ASCII
   end
 end
 
 describe "String#split with Regexp" do
+  it "throws an ArgumentError if the string  is not a valid" do
+    s = "\xDF".dup.force_encoding(Encoding::UTF_8)
+
+    -> { s.split(/./) }.should raise_error(ArgumentError)
+  end
+
   it "divides self on regexp matches" do
     " now's  the time".split(/ /).should == ["", "now's", "", "the", "time"]
     " x\ny ".split(/ /).should == ["", "x\ny"]
@@ -238,25 +288,26 @@ describe "String#split with Regexp" do
   end
 
   it "defaults to $; when regexp isn't given or nil" do
-    begin
+    suppress_warning do
       old_fs = $;
+      begin
+        [/,/, /:/, //, /XY/, /./].each do |fs|
+          $; = fs
 
-      [/,/, /:/, //, /XY/, /./].each do |fs|
-        $; = fs
+          ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
+            expected = str.split(fs)
 
-        ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
-          expected = str.split(fs)
+            str.split(nil).should == expected
+            str.split.should == expected
 
-          str.split(nil).should == expected
-          str.split.should == expected
-
-          str.split(nil, -1).should == str.split(fs, -1)
-          str.split(nil, 0).should == str.split(fs, 0)
-          str.split(nil, 2).should == str.split(fs, 2)
+            str.split(nil, -1).should == str.split(fs, -1)
+            str.split(nil, 0).should == str.split(fs, 0)
+            str.split(nil, 2).should == str.split(fs, 2)
+          end
         end
+      ensure
+        $; = old_fs
       end
-    ensure
-      $; = old_fs
     end
   end
 
@@ -316,8 +367,8 @@ describe "String#split with Regexp" do
   end
 
   it "returns a type error if limit can't be converted to an integer" do
-    lambda {"1.2.3.4".split(".", "three")}.should raise_error(TypeError)
-    lambda {"1.2.3.4".split(".", nil)    }.should raise_error(TypeError)
+    -> {"1.2.3.4".split(".", "three")}.should raise_error(TypeError)
+    -> {"1.2.3.4".split(".", nil)    }.should raise_error(TypeError)
   end
 
   it "doesn't set $~" do
@@ -335,71 +386,161 @@ describe "String#split with Regexp" do
     "foo".split(/bar/, 3).should == ["foo"]
   end
 
-  it "returns subclass instances based on self" do
+  it "returns String instances based on self" do
     ["", "x:y:z:", "  x  y  "].each do |str|
       [//, /:/, /\s+/].each do |pat|
         [-1, 0, 1, 2].each do |limit|
           StringSpecs::MyString.new(str).split(pat, limit).each do |x|
-            x.should be_an_instance_of(StringSpecs::MyString)
+            x.should be_an_instance_of(String)
           end
         end
       end
     end
   end
 
-  it "does not call constructor on created subclass instances" do
-    # can't call should_not_receive on an object that doesn't yet exist
-    # so failure here is signalled by exception, not expectation failure
+  it "returns Strings in the same encoding as self" do
+    ary = "а б в".split
+    encodings = ary.map { |s| s.encoding }
+    encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
+  end
 
-    s = StringSpecs::StringWithRaisingConstructor.new('silly:string')
-    s.split(/:/).first.should == 'silly'
+  it "splits a string on each character for a multibyte encoding and empty split" do
+    "That's why eﬃciency could not be helped".split("").size.should == 39
   end
 
-  it "taints the resulting strings if self is tainted" do
-    ["", "x:y:z:", "  x  y  "].each do |str|
-      [//, /:/, /\s+/].each do |pat|
-        [-1, 0, 1, 2].each do |limit|
-          str.dup.taint.split(pat, limit).each do |x|
-            # See the spec below for why the conditional is here
-            x.tainted?.should be_true unless x.empty?
-          end
-        end
-      end
-    end
+  it "returns an ArgumentError if an invalid UTF-8 string is supplied" do
+    broken_str = +'проверка' # in russian, means "test"
+    broken_str.force_encoding('binary')
+    broken_str.chop!
+    broken_str.force_encoding('utf-8')
+    ->{ broken_str.split(/\r\n|\r|\n/) }.should raise_error(ArgumentError)
   end
 
-  it "taints an empty string if self is tainted" do
-    ":".taint.split(//, -1).last.tainted?.should be_true
+  # See https://bugs.ruby-lang.org/issues/12689 and https://github.com/jruby/jruby/issues/4868
+  it "allows concurrent Regexp calls in a shared context" do
+    str = 'a,b,c,d,e'
+
+    p = proc { str.split(/,/) }
+    results = 10.times.map { Thread.new { x = nil; 100.times { x = p.call }; x } }.map(&:value)
+
+    results.should == [%w[a b c d e]] * 10
   end
 
-  it "doesn't taints the resulting strings if the Regexp is tainted" do
-    ["", "x:y:z:", "  x  y  "].each do |str|
-      [//, /:/, /\s+/].each do |pat|
-        [-1, 0, 1, 2].each do |limit|
-          str.split(pat.dup.taint, limit).each do |x|
-            x.tainted?.should be_false
-          end
-        end
-      end
+  context "when a block is given" do
+    it "yields each split substring with default pattern" do
+      a = []
+      returned_object = "chunky bacon".split { |str| a << str.capitalize }
+
+      returned_object.should == "chunky bacon"
+      a.should == ["Chunky", "Bacon"]
     end
-  end
 
-  it "retains the encoding of the source string" do
-    ary = "а б в".split
-    encodings = ary.map { |s| s.encoding }
-    encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
+    it "yields each split substring with default pattern for a lazy substring" do
+      a = []
+      returned_object = "chunky bacon"[1...-1].split { |str| a << str.capitalize }
+
+      returned_object.should == "hunky baco"
+      a.should == ["Hunky", "Baco"]
+    end
+
+    it "yields each split substring with default pattern for a non-ASCII string" do
+      a = []
+      returned_object = "l'été arrive bientôt".split { |str| a << str }
+
+      returned_object.should == "l'été arrive bientôt"
+      a.should == ["l'été", "arrive", "bientôt"]
+    end
+
+    it "yields each split substring with default pattern for a non-ASCII lazy substring" do
+      a = []
+      returned_object = "l'été arrive bientôt"[1...-1].split { |str| a << str }
+
+      returned_object.should == "'été arrive bientô"
+      a.should == ["'été", "arrive", "bientô"]
+    end
+
+    it "yields the string when limit is 1" do
+      a = []
+      returned_object = "chunky bacon".split("", 1) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky bacon"
+      a.should == ["Chunky bacon"]
+    end
+
+    it "yields each split letter" do
+      a = []
+      returned_object = "chunky".split("", 0) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky"
+      a.should == %w(C H U N K Y)
+    end
+
+    it "yields each split substring with a pattern" do
+      a = []
+      returned_object = "chunky-bacon".split("-", 0) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky-bacon"
+      a.should == ["Chunky", "Bacon"]
+    end
+
+    it "yields each split substring with empty regexp pattern" do
+      a = []
+      returned_object = "chunky".split(//) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky"
+      a.should == %w(C H U N K Y)
+    end
+
+    it "yields each split substring with empty regexp pattern and limit" do
+      a = []
+      returned_object = "chunky".split(//, 3) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky"
+      a.should == %w(C H Unky)
+    end
+
+    it "yields each split substring with a regexp pattern" do
+      a = []
+      returned_object = "chunky:bacon".split(/:/) { |str| a << str.capitalize }
+
+      returned_object.should == "chunky:bacon"
+      a.should == ["Chunky", "Bacon"]
+    end
+
+    it "returns a string as is (and doesn't call block) if it is empty" do
+      a = []
+      returned_object = "".split { |str| a << str.capitalize }
+
+      returned_object.should == ""
+      a.should == []
+    end
   end
 
+  describe "for a String subclass" do
+    it "yields instances of String" do
+      a = []
+      StringSpecs::MyString.new("a|b").split("|") { |str| a << str }
+      first, last = a
 
-  it "splits a string on each character for a multibyte encoding and empty split" do
-    "That's why eﬃciency could not be helped".split("").size.should == 39
+      first.should be_an_instance_of(String)
+      first.should == "a"
+
+      last.should be_an_instance_of(String)
+      last.should == "b"
+    end
   end
 
-  it "returns an ArgumentError if an invalid UTF-8 string is supplied" do
-    broken_str = 'проверка' # in russian, means "test"
-    broken_str.force_encoding('binary')
-    broken_str.chop!
-    broken_str.force_encoding('utf-8')
-    lambda{ broken_str.split(/\r\n|\r|\n/) }.should raise_error(ArgumentError)
+  it "raises a TypeError when not called with nil, String, or Regexp" do
+    -> { "hello".split(42) }.should raise_error(TypeError)
+    -> { "hello".split(:ll) }.should raise_error(TypeError)
+    -> { "hello".split(false) }.should raise_error(TypeError)
+    -> { "hello".split(Object.new) }.should raise_error(TypeError)
+  end
+
+  it "returns Strings in the same encoding as self" do
+    strings = "hello world".encode("US-ASCII").split(/ /)
+
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[1].encoding.should == Encoding::US_ASCII
   end
 end