diff options
| author | nagachika <nagachika@ruby-lang.org> | 2023-06-24 16:59:30 +0900 |
|---|---|---|
| committer | nagachika <nagachika@ruby-lang.org> | 2023-06-24 16:59:30 +0900 |
| commit | 9fca561980c6d024229d72600180b20f3f77536f (patch) | |
| tree | ce07a2046854dd93d669f91243569c38a5671dbd /spec/ruby/core/string/valid_encoding | |
| parent | f89101fa3610a7b977801d60b85bcc1ea2f6dee4 (diff) | |
merge revision(s) cd5e6cc0ea48353c88d921b885b552dc76da255c,bbf54ec334fe2edd7669a944d88d17efde49a412: [Backport #19307]
Update to ruby/mspec@fef9b81
---
spec/mspec/tool/tag_from_output.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Update to ruby/spec@9d69b95
---
spec/ruby/core/array/keep_if_spec.rb | 1 +
spec/ruby/core/array/pack/c_spec.rb | 14 +-
spec/ruby/core/array/pack/shared/basic.rb | 40 ++++
spec/ruby/core/array/pack/shared/float.rb | 66 ++++++-
spec/ruby/core/array/pack/shared/integer.rb | 96 +++++++--
spec/ruby/core/array/pack/shared/numeric_basic.rb | 10 +-
spec/ruby/core/array/pack/shared/unicode.rb | 14 +-
spec/ruby/core/array/pack/w_spec.rb | 14 +-
spec/ruby/core/array/shared/unshift.rb | 18 ++
spec/ruby/core/array/values_at_spec.rb | 1 +
spec/ruby/core/array/zip_spec.rb | 6 +
spec/ruby/core/class/subclasses_spec.rb | 22 +++
spec/ruby/core/dir/home_spec.rb | 44 +++--
spec/ruby/core/dir/mkdir_spec.rb | 18 +-
spec/ruby/core/enumerable/zip_spec.rb | 5 +
spec/ruby/core/float/comparison_spec.rb | 35 ++--
spec/ruby/core/float/divmod_spec.rb | 2 +-
spec/ruby/core/float/gt_spec.rb | 21 ++
spec/ruby/core/float/gte_spec.rb | 21 ++
spec/ruby/core/float/lt_spec.rb | 21 ++
spec/ruby/core/float/lte_spec.rb | 21 ++
spec/ruby/core/float/shared/equal.rb | 21 ++
spec/ruby/core/io/gets_spec.rb | 4 +
spec/ruby/core/io/lineno_spec.rb | 9 +-
spec/ruby/core/io/new_spec.rb | 2 +
spec/ruby/core/io/readline_spec.rb | 4 +
spec/ruby/core/io/readlines_spec.rb | 4 +
spec/ruby/core/io/shared/each.rb | 4 +
spec/ruby/core/io/shared/new.rb | 2 +
spec/ruby/core/io/shared/pos.rb | 8 +-
spec/ruby/core/io/shared/readlines.rb | 4 +
spec/ruby/core/io/sysseek_spec.rb | 2 +-
spec/ruby/core/kernel/shared/load.rb | 31 ++-
spec/ruby/core/kernel/singleton_class_spec.rb | 2 +
spec/ruby/core/marshal/dump_spec.rb | 45 ++++-
spec/ruby/core/marshal/fixtures/classes.rb | 4 +
spec/ruby/core/matchdata/values_at_spec.rb | 73 ++++++-
spec/ruby/core/module/fixtures/classes.rb | 1 +
spec/ruby/core/module/include_spec.rb | 4 +-
spec/ruby/core/module/prepend_spec.rb | 12 ++
.../ruby/core/objectspace/define_finalizer_spec.rb | 22 +++
spec/ruby/core/process/constants_spec.rb | 1 +
spec/ruby/core/process/detach_spec.rb | 29 +++
spec/ruby/core/process/spawn_spec.rb | 10 +
spec/ruby/core/process/times_spec.rb | 2 +-
spec/ruby/core/queue/initialize_spec.rb | 13 +-
spec/ruby/core/refinement/import_methods_spec.rb | 34 ++++
spec/ruby/core/refinement/include_spec.rb | 27 +++
spec/ruby/core/refinement/prepend_spec.rb | 27 +++
spec/ruby/core/regexp/initialize_spec.rb | 2 +-
spec/ruby/core/signal/trap_spec.rb | 12 ++
spec/ruby/core/string/byteslice_spec.rb | 6 +
spec/ruby/core/string/capitalize_spec.rb | 4 +
spec/ruby/core/string/chars_spec.rb | 7 +-
spec/ruby/core/string/chomp_spec.rb | 4 +
spec/ruby/core/string/chop_spec.rb | 4 +
spec/ruby/core/string/clone_spec.rb | 4 +
spec/ruby/core/string/delete_prefix_spec.rb | 4 +
spec/ruby/core/string/delete_spec.rb | 4 +
spec/ruby/core/string/delete_suffix_spec.rb | 4 +
spec/ruby/core/string/downcase_spec.rb | 4 +
spec/ruby/core/string/dump_spec.rb | 10 +-
spec/ruby/core/string/dup_spec.rb | 4 +
spec/ruby/core/string/lines_spec.rb | 1 -
spec/ruby/core/string/reverse_spec.rb | 4 +
spec/ruby/core/string/scan_spec.rb | 6 +
spec/ruby/core/string/scrub_spec.rb | 10 +
spec/ruby/core/string/shared/each_line.rb | 6 +
spec/ruby/core/string/shared/partition.rb | 15 ++
spec/ruby/core/string/shared/slice.rb | 13 +-
spec/ruby/core/string/shared/strip.rb | 4 +
spec/ruby/core/string/shared/succ.rb | 4 +
spec/ruby/core/string/split_spec.rb | 17 +-
spec/ruby/core/string/squeeze_spec.rb | 5 +
spec/ruby/core/string/swapcase_spec.rb | 4 +
spec/ruby/core/string/undump_spec.rb | 2 +-
spec/ruby/core/string/unpack/b_spec.rb | 28 ++-
spec/ruby/core/string/unpack/c_spec.rb | 14 +-
spec/ruby/core/string/unpack/h_spec.rb | 28 ++-
spec/ruby/core/string/unpack/shared/basic.rb | 28 ---
spec/ruby/core/string/unpack/shared/float.rb | 60 +++++-
spec/ruby/core/string/unpack/shared/integer.rb | 88 +++++++--
spec/ruby/core/string/unpack/shared/unicode.rb | 14 +-
spec/ruby/core/string/unpack/w_spec.rb | 14 +-
spec/ruby/core/string/unpack1_spec.rb | 12 +-
spec/ruby/core/string/unpack_spec.rb | 34 ++++
spec/ruby/core/string/upcase_spec.rb | 4 +
spec/ruby/core/string/valid_encoding/utf_8_spec.rb | 214 +++++++++++++++++++++
spec/ruby/core/struct/values_at_spec.rb | 55 +++++-
spec/ruby/core/symbol/shared/id2name.rb | 7 +
spec/ruby/core/time/at_spec.rb | 16 ++
spec/ruby/core/time/localtime_spec.rb | 16 +-
spec/ruby/core/time/new_spec.rb | 94 +++++++--
spec/ruby/core/time/shared/gmtime.rb | 4 +-
spec/ruby/core/time/shared/time_params.rb | 11 +-
spec/ruby/core/time/strftime_spec.rb | 40 +++-
spec/ruby/core/time/utc_spec.rb | 41 +++-
spec/ruby/core/time/zone_spec.rb | 20 +-
spec/ruby/core/tracepoint/inspect_spec.rb | 9 +
spec/ruby/fixtures/code/load_wrap_fixture.rb | 12 ++
spec/ruby/fixtures/code/wrap_fixture.rb | 9 -
spec/ruby/language/case_spec.rb | 4 +-
.../ruby/language/regexp/character_classes_spec.rb | 5 +
spec/ruby/library/coverage/running_spec.rb | 20 ++
spec/ruby/library/date/civil_spec.rb | 7 +-
spec/ruby/library/objectspace/fixtures/trace.rb | 5 +
spec/ruby/library/objectspace/trace_spec.rb | 15 ++
spec/ruby/library/openssl/x509/name/verify_spec.rb | 4 +-
spec/ruby/library/stringio/initialize_spec.rb | 85 ++++++++
spec/ruby/library/stringio/new_spec.rb | 8 +
spec/ruby/library/stringio/shared/write.rb | 22 +++
spec/ruby/optional/capi/ext/io_spec.c | 43 +++++
spec/ruby/optional/capi/io_spec.rb | 15 ++
spec/ruby/shared/rational/Rational.rb | 48 ++---
114 files changed, 1963 insertions(+), 245 deletions(-)
create mode 100644 spec/ruby/core/marshal/fixtures/classes.rb
create mode 100644 spec/ruby/core/refinement/import_methods_spec.rb
create mode 100644 spec/ruby/core/refinement/include_spec.rb
create mode 100644 spec/ruby/core/refinement/prepend_spec.rb
create mode 100644 spec/ruby/core/string/unpack_spec.rb
create mode 100644 spec/ruby/core/string/valid_encoding/utf_8_spec.rb
create mode 100644 spec/ruby/fixtures/code/load_wrap_fixture.rb
delete mode 100644 spec/ruby/fixtures/code/wrap_fixture.rb
create mode 100644 spec/ruby/library/coverage/running_spec.rb
create mode 100644 spec/ruby/library/objectspace/fixtures/trace.rb
create mode 100644 spec/ruby/library/objectspace/trace_spec.rb
create mode 100644 spec/ruby/library/stringio/new_spec.rb
Diffstat (limited to 'spec/ruby/core/string/valid_encoding')
| -rw-r--r-- | spec/ruby/core/string/valid_encoding/utf_8_spec.rb | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb new file mode 100644 index 0000000000..a14c3af830 --- /dev/null +++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb @@ -0,0 +1,214 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe "String#valid_encoding? and UTF-8" do + def utf8(bytes) + bytes.pack("C*").force_encoding("UTF-8") + end + + describe "1-byte character" do + it "is valid if is in format 0xxxxxxx" do + utf8([0b00000000]).valid_encoding?.should == true + utf8([0b01111111]).valid_encoding?.should == true + end + + it "is not valid if is not in format 0xxxxxxx" do + utf8([0b10000000]).valid_encoding?.should == false + utf8([0b11111111]).valid_encoding?.should == false + end + end + + describe "2-bytes character" do + it "is valid if in format [110xxxxx 10xxxxx]" do + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true + utf8([0b11000010, 0b10111111]).valid_encoding?.should == true + + utf8([0b11011111, 0b10000000]).valid_encoding?.should == true + utf8([0b11011111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 110xxxxx" do + utf8([0b00000010, 0b10000000]).valid_encoding?.should == false + utf8([0b00100010, 0b10000000]).valid_encoding?.should == false + utf8([0b01000010, 0b10000000]).valid_encoding?.should == false + utf8([0b01100010, 0b10000000]).valid_encoding?.should == false + utf8([0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b10100010, 0b10000000]).valid_encoding?.should == false + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11100010, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11000010, 0b00000000]).valid_encoding?.should == false + utf8([0b11000010, 0b01000000]).valid_encoding?.should == false + utf8([0b11000010, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do + utf8([0b11000000, 0b10111111]).valid_encoding?.should == false + utf8([0b11000001, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[1..1]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "3-bytes character" do + it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true + utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true + utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 1110xxxx" do + utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do + utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do + utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false + utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false + + utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1 + utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1 + end + + it "is not valid if the first byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[2..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8([bytes[0], bytes[2]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "4-bytes character" do + it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 11110xxx" do + utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the forth byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do + utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do + utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false + + utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[1..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second, the third and the fourth bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end +end |
