merge revision(s) cd5e6cc0ea48353c88d921b885b552dc76da255c,bbf54ec334fe2edd7669a944d88d17efde49a412: [Backport #19307]

Update to ruby/mspec@fef9b81 --- spec/mspec/tool/tag_from_output.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Update to ruby/spec@9d69b95 --- spec/ruby/core/array/keep_if_spec.rb | 1 + spec/ruby/core/array/pack/c_spec.rb | 14 +- spec/ruby/core/array/pack/shared/basic.rb | 40 ++++ spec/ruby/core/array/pack/shared/float.rb | 66 ++++++- spec/ruby/core/array/pack/shared/integer.rb | 96 +++++++-- spec/ruby/core/array/pack/shared/numeric_basic.rb | 10 +- spec/ruby/core/array/pack/shared/unicode.rb | 14 +- spec/ruby/core/array/pack/w_spec.rb | 14 +- spec/ruby/core/array/shared/unshift.rb | 18 ++ spec/ruby/core/array/values_at_spec.rb | 1 + spec/ruby/core/array/zip_spec.rb | 6 + spec/ruby/core/class/subclasses_spec.rb | 22 +++ spec/ruby/core/dir/home_spec.rb | 44 +++-- spec/ruby/core/dir/mkdir_spec.rb | 18 +- spec/ruby/core/enumerable/zip_spec.rb | 5 + spec/ruby/core/float/comparison_spec.rb | 35 ++-- spec/ruby/core/float/divmod_spec.rb | 2 +- spec/ruby/core/float/gt_spec.rb | 21 ++ spec/ruby/core/float/gte_spec.rb | 21 ++ spec/ruby/core/float/lt_spec.rb | 21 ++ spec/ruby/core/float/lte_spec.rb | 21 ++ spec/ruby/core/float/shared/equal.rb | 21 ++ spec/ruby/core/io/gets_spec.rb | 4 + spec/ruby/core/io/lineno_spec.rb | 9 +- spec/ruby/core/io/new_spec.rb | 2 + spec/ruby/core/io/readline_spec.rb | 4 + spec/ruby/core/io/readlines_spec.rb | 4 + spec/ruby/core/io/shared/each.rb | 4 + spec/ruby/core/io/shared/new.rb | 2 + spec/ruby/core/io/shared/pos.rb | 8 +- spec/ruby/core/io/shared/readlines.rb | 4 + spec/ruby/core/io/sysseek_spec.rb | 2 +- spec/ruby/core/kernel/shared/load.rb | 31 ++- spec/ruby/core/kernel/singleton_class_spec.rb | 2 + spec/ruby/core/marshal/dump_spec.rb | 45 ++++- spec/ruby/core/marshal/fixtures/classes.rb | 4 + spec/ruby/core/matchdata/values_at_spec.rb | 73 ++++++- spec/ruby/core/module/fixtures/classes.rb | 1 + spec/ruby/core/module/include_spec.rb | 4 +- spec/ruby/core/module/prepend_spec.rb | 12 ++ .../ruby/core/objectspace/define_finalizer_spec.rb | 22 +++ spec/ruby/core/process/constants_spec.rb | 1 + spec/ruby/core/process/detach_spec.rb | 29 +++ spec/ruby/core/process/spawn_spec.rb | 10 + spec/ruby/core/process/times_spec.rb | 2 +- spec/ruby/core/queue/initialize_spec.rb | 13 +- spec/ruby/core/refinement/import_methods_spec.rb | 34 ++++ spec/ruby/core/refinement/include_spec.rb | 27 +++ spec/ruby/core/refinement/prepend_spec.rb | 27 +++ spec/ruby/core/regexp/initialize_spec.rb | 2 +- spec/ruby/core/signal/trap_spec.rb | 12 ++ spec/ruby/core/string/byteslice_spec.rb | 6 + spec/ruby/core/string/capitalize_spec.rb | 4 + spec/ruby/core/string/chars_spec.rb | 7 +- spec/ruby/core/string/chomp_spec.rb | 4 + spec/ruby/core/string/chop_spec.rb | 4 + spec/ruby/core/string/clone_spec.rb | 4 + spec/ruby/core/string/delete_prefix_spec.rb | 4 + spec/ruby/core/string/delete_spec.rb | 4 + spec/ruby/core/string/delete_suffix_spec.rb | 4 + spec/ruby/core/string/downcase_spec.rb | 4 + spec/ruby/core/string/dump_spec.rb | 10 +- spec/ruby/core/string/dup_spec.rb | 4 + spec/ruby/core/string/lines_spec.rb | 1 - spec/ruby/core/string/reverse_spec.rb | 4 + spec/ruby/core/string/scan_spec.rb | 6 + spec/ruby/core/string/scrub_spec.rb | 10 + spec/ruby/core/string/shared/each_line.rb | 6 + spec/ruby/core/string/shared/partition.rb | 15 ++ spec/ruby/core/string/shared/slice.rb | 13 +- spec/ruby/core/string/shared/strip.rb | 4 + spec/ruby/core/string/shared/succ.rb | 4 + spec/ruby/core/string/split_spec.rb | 17 +- spec/ruby/core/string/squeeze_spec.rb | 5 + spec/ruby/core/string/swapcase_spec.rb | 4 + spec/ruby/core/string/undump_spec.rb | 2 +- spec/ruby/core/string/unpack/b_spec.rb | 28 ++- spec/ruby/core/string/unpack/c_spec.rb | 14 +- spec/ruby/core/string/unpack/h_spec.rb | 28 ++- spec/ruby/core/string/unpack/shared/basic.rb | 28 --- spec/ruby/core/string/unpack/shared/float.rb | 60 +++++- spec/ruby/core/string/unpack/shared/integer.rb | 88 +++++++-- spec/ruby/core/string/unpack/shared/unicode.rb | 14 +- spec/ruby/core/string/unpack/w_spec.rb | 14 +- spec/ruby/core/string/unpack1_spec.rb | 12 +- spec/ruby/core/string/unpack_spec.rb | 34 ++++ spec/ruby/core/string/upcase_spec.rb | 4 + spec/ruby/core/string/valid_encoding/utf_8_spec.rb | 214 +++++++++++++++++++++ spec/ruby/core/struct/values_at_spec.rb | 55 +++++- spec/ruby/core/symbol/shared/id2name.rb | 7 + spec/ruby/core/time/at_spec.rb | 16 ++ spec/ruby/core/time/localtime_spec.rb | 16 +- spec/ruby/core/time/new_spec.rb | 94 +++++++-- spec/ruby/core/time/shared/gmtime.rb | 4 +- spec/ruby/core/time/shared/time_params.rb | 11 +- spec/ruby/core/time/strftime_spec.rb | 40 +++- spec/ruby/core/time/utc_spec.rb | 41 +++- spec/ruby/core/time/zone_spec.rb | 20 +- spec/ruby/core/tracepoint/inspect_spec.rb | 9 + spec/ruby/fixtures/code/load_wrap_fixture.rb | 12 ++ spec/ruby/fixtures/code/wrap_fixture.rb | 9 - spec/ruby/language/case_spec.rb | 4 +- .../ruby/language/regexp/character_classes_spec.rb | 5 + spec/ruby/library/coverage/running_spec.rb | 20 ++ spec/ruby/library/date/civil_spec.rb | 7 +- spec/ruby/library/objectspace/fixtures/trace.rb | 5 + spec/ruby/library/objectspace/trace_spec.rb | 15 ++ spec/ruby/library/openssl/x509/name/verify_spec.rb | 4 +- spec/ruby/library/stringio/initialize_spec.rb | 85 ++++++++ spec/ruby/library/stringio/new_spec.rb | 8 + spec/ruby/library/stringio/shared/write.rb | 22 +++ spec/ruby/optional/capi/ext/io_spec.c | 43 +++++ spec/ruby/optional/capi/io_spec.rb | 15 ++ spec/ruby/shared/rational/Rational.rb | 48 ++--- 114 files changed, 1963 insertions(+), 245 deletions(-) create mode 100644 spec/ruby/core/marshal/fixtures/classes.rb create mode 100644 spec/ruby/core/refinement/import_methods_spec.rb create mode 100644 spec/ruby/core/refinement/include_spec.rb create mode 100644 spec/ruby/core/refinement/prepend_spec.rb create mode 100644 spec/ruby/core/string/unpack_spec.rb create mode 100644 spec/ruby/core/string/valid_encoding/utf_8_spec.rb create mode 100644 spec/ruby/fixtures/code/load_wrap_fixture.rb delete mode 100644 spec/ruby/fixtures/code/wrap_fixture.rb create mode 100644 spec/ruby/library/coverage/running_spec.rb create mode 100644 spec/ruby/library/objectspace/fixtures/trace.rb create mode 100644 spec/ruby/library/objectspace/trace_spec.rb create mode 100644 spec/ruby/library/stringio/new_spec.rb
author: nagachika <nagachika@ruby-lang.org> 2023-06-24 16:59:30 +0900
committer: nagachika <nagachika@ruby-lang.org> 2023-06-24 16:59:30 +0900
commit: 9fca561980c6d024229d72600180b20f3f77536f (patch)
tree: ce07a2046854dd93d669f91243569c38a5671dbd /spec/ruby/core/string/valid_encoding
parent: f89101fa3610a7b977801d60b85bcc1ea2f6dee4 (diff)
1 files changed, 214 insertions, 0 deletions
diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
new file mode 100644
index 0000000000..a14c3af830
--- /dev/null
+++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
@@ -0,0 +1,214 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../../spec_helper'
+
+describe "String#valid_encoding? and UTF-8" do
+  def utf8(bytes)
+    bytes.pack("C*").force_encoding("UTF-8")
+  end
+
+  describe "1-byte character" do
+    it "is valid if is in format 0xxxxxxx" do
+      utf8([0b00000000]).valid_encoding?.should == true
+      utf8([0b01111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if is not in format 0xxxxxxx" do
+      utf8([0b10000000]).valid_encoding?.should == false
+      utf8([0b11111111]).valid_encoding?.should == false
+    end
+  end
+
+  describe "2-bytes character" do
+    it "is valid if in format [110xxxxx 10xxxxx]" do
+      utf8([0b11000010, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11000010, 0b10111111]).valid_encoding?.should == true
+
+      utf8([0b11011111, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11011111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 110xxxxx" do
+      utf8([0b00000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11100010, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11000010, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b11000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do
+      utf8([0b11000000, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11000001, 0b10111111]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11000010, 0b10000000]
+      utf8(bytes[1..1]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11000010, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+
+  describe "3-bytes character" do
+    it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do
+      utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 1110xxxx" do
+      utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the third byte is not in format 10xxxxxx" do
+      utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do
+      utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do
+      utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false
+      utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false
+
+      utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1
+      utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8(bytes[2..3]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8([bytes[0], bytes[2]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second and the third bytes are missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+
+  describe "4-bytes character" do
+    it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 11110xxx" do
+      utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the third byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the forth byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do
+      utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do
+      utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+
+      utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8(bytes[1..3]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second and the third bytes are missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8([bytes[0], bytes[3]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second, the third and the fourth bytes are missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+end
author	nagachika <nagachika@ruby-lang.org>	2023-06-24 16:59:30 +0900
committer	nagachika <nagachika@ruby-lang.org>	2023-06-24 16:59:30 +0900
commit	9fca561980c6d024229d72600180b20f3f77536f (patch)
tree	ce07a2046854dd93d669f91243569c38a5671dbd /spec/ruby/core/string/valid_encoding
parent	f89101fa3610a7b977801d60b85bcc1ea2f6dee4 (diff)