From f7e266e6d2ccad63e4245a106a80c82ef2b38cbf Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Fri, 17 Dec 2021 06:05:31 -0600 Subject: Enhanced RDoc for case mapping (#5245) Adds file doc/case_mapping.rdoc, which describes case mapping and provides a link target that methods doc can link to. Revises: String#capitalize String#capitalize! String#casecmp String#casecmp? String#downcase String#downcase! String#swapcase String#swapcase! String#upcase String#upcase! Symbol#capitalize Symbol#casecmp Symbol#casecmp? Symbol#downcase Symbol#swapcase Symbol#upcase --- string.c | 313 +++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 173 insertions(+), 140 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 2b461c20fa..eaf7f7f897 100644 --- a/string.c +++ b/string.c @@ -3702,13 +3702,13 @@ static VALUE str_casecmp_p(VALUE str1, VALUE str2); /* * call-seq: - * casecmp(other_str) -> -1, 0, 1, or nil + * casecmp(other_string) -> -1, 0, 1, or nil * - * Compares +self+ and +other_string+, ignoring case, and returning: + * Compares self.downcase and other_string.downcase; returns: * - * - -1 if +other_string+ is larger. + * - -1 if other_string.downcase is larger. * - 0 if the two are equal. - * - 1 if +other_string+ is smaller. + * - 1 if other_string.downcase is smaller. * - +nil+ if the two are incomparable. * * Examples: @@ -3720,6 +3720,10 @@ static VALUE str_casecmp_p(VALUE str1, VALUE str2); * 'foo'.casecmp('FOO') # => 0 * 'foo'.casecmp(1) # => nil * + * See {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#casecmp?. + * */ static VALUE @@ -3806,6 +3810,10 @@ str_casecmp(VALUE str1, VALUE str2) * * 'foo'.casecmp?(1) # => nil * + * See {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#casecmp. + * */ static VALUE @@ -7151,13 +7159,21 @@ upcase_single(VALUE str) /* * call-seq: - * str.upcase! -> str or nil - * str.upcase!([options]) -> str or nil + * upcase!(*options) -> self or nil * - * Upcases the contents of str, returning nil if no changes - * were made. + * Upcases the characters in +self+; + * returns +self+ if any changes were made, +nil+ otherwise: + * + * s = 'Hello World!' # => "Hello World!" + * s.upcase! # => "HELLO WORLD!" + * s # => "HELLO WORLD!" + * s.upcase! # => nil + * + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#upcase, String#downcase, String#downcase!. * - * See String#downcase for meaning of +options+ and use with different encodings. */ static VALUE @@ -7185,15 +7201,18 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.upcase -> new_str - * str.upcase([options]) -> new_str + * upcase(*options) -> string + * + * Returns a string containing the upcased characters in +self+: + * + * s = 'Hello World!' # => "Hello World!" + * s.upcase # => "HELLO WORLD!" * - * Returns a copy of str with all lowercase letters replaced with their - * uppercase counterparts. + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. * - * See String#downcase for meaning of +options+ and use with different encodings. + * Related: String#upcase!, String#downcase, String#downcase!. * - * "hEllO".upcase #=> "HELLO" */ static VALUE @@ -7242,13 +7261,21 @@ downcase_single(VALUE str) /* * call-seq: - * str.downcase! -> str or nil - * str.downcase!([options]) -> str or nil + * downcase!(*options) -> self or nil + * + * Downcases the characters in +self+; + * returns +self+ if any changes were made, +nil+ otherwise: + * + * s = 'Hello World!' # => "Hello World!" + * s.downcase! # => "hello world!" + * s # => "hello world!" + * s.downcase! # => nil + * + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. * - * Downcases the contents of str, returning nil if no - * changes were made. + * Related: String#downcase, String#upcase, String#upcase!. * - * See String#downcase for meaning of +options+ and use with different encodings. */ static VALUE @@ -7276,52 +7303,18 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.downcase -> new_str - * str.downcase([options]) -> new_str - * - * Returns a copy of str with all uppercase letters replaced with their - * lowercase counterparts. Which letters exactly are replaced, and by which - * other letters, depends on the presence or absence of options, and on the - * +encoding+ of the string. - * - * The meaning of the +options+ is as follows: - * - * No option :: - * Full Unicode case mapping, suitable for most languages - * (see :turkic and :lithuanian options below for exceptions). - * Context-dependent case mapping as described in Table 3-14 of the - * Unicode standard is currently not supported. - * :ascii :: - * Only the ASCII region, i.e. the characters ``A'' to ``Z'' and - * ``a'' to ``z'', are affected. - * This option cannot be combined with any other option. - * :turkic :: - * Full Unicode case mapping, adapted for Turkic languages - * (Turkish, Azerbaijani, ...). This means that upper case I is mapped to - * lower case dotless i, and so on. - * :lithuanian :: - * Currently, just full Unicode case mapping. In the future, full Unicode - * case mapping adapted for Lithuanian (keeping the dot on the lower case - * i even if there is an accent on top). - * :fold :: - * Only available on +downcase+ and +downcase!+. Unicode case folding, - * which is more far-reaching than Unicode case mapping. - * This option currently cannot be combined with any other option - * (i.e. there is currently no variant for turkic languages). - * - * Please note that several assumptions that are valid for ASCII-only case - * conversions do not hold for more general case conversions. For example, - * the length of the result may not be the same as the length of the input - * (neither in characters nor in bytes), some roundtrip assumptions - * (e.g. str.downcase == str.upcase.downcase) may not apply, and Unicode - * normalization (i.e. String#unicode_normalize) is not necessarily maintained - * by case mapping operations. - * - * Non-ASCII case mapping/folding is currently supported for UTF-8, - * UTF-16BE/LE, UTF-32BE/LE, and ISO-8859-1~16 Strings/Symbols. - * This support will be extended to other encodings. - * - * "hEllO".downcase #=> "hello" + * downcase(*options) -> string + * + * Returns a string containing the downcased characters in +self+: + * + * s = 'Hello World!' # => "Hello World!" + * s.downcase # => "hello world!" + * + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#downcase!, String#upcase, String#upcase!. + * */ static VALUE @@ -7352,20 +7345,22 @@ rb_str_downcase(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.capitalize! -> str or nil - * str.capitalize!([options]) -> str or nil + * capitalize!(*options) -> self or nil + * + * Upcases the first character in +self+; + * downcases the remaining characters; + * returns +self+ if any changes were made, +nil+ otherwise: + * + * s = 'hello World!' # => "hello World!" + * s.capitalize! # => "Hello world!" + * s # => "Hello world!" + * s.capitalize! # => nil * - * Modifies str by converting the first character to uppercase and the - * remainder to lowercase. Returns nil if no changes are made. - * There is an exception for modern Georgian (mkhedruli/MTAVRULI), where - * the result is the same as for String#downcase, to avoid mixed case. + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. * - * See String#downcase for meaning of +options+ and use with different encodings. + * Related: String#capitalize. * - * a = "hello" - * a.capitalize! #=> "Hello" - * a #=> "Hello" - * a.capitalize! #=> nil */ static VALUE @@ -7390,17 +7385,20 @@ rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.capitalize -> new_str - * str.capitalize([options]) -> new_str + * capitalize(*options) -> string * - * Returns a copy of str with the first character converted to uppercase - * and the remainder to lowercase. + * Returns a string containing the characters in +self+; + * the first character is upcased; + * the remaining characters are downcased: * - * See String#downcase for meaning of +options+ and use with different encodings. + * s = 'hello World!' # => "hello World!" + * s.capitalize # => "Hello world!" + * + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#capitalize!. * - * "hello".capitalize #=> "Hello" - * "HELLO".capitalize #=> "Hello" - * "123ABC".capitalize #=> "123abc" */ static VALUE @@ -7426,14 +7424,22 @@ rb_str_capitalize(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.swapcase! -> str or nil - * str.swapcase!([options]) -> str or nil + * swapcase!(*options) -> self or nil * - * Equivalent to String#swapcase, but modifies the receiver in place, - * returning str, or nil if no changes were made. + * Upcases each lowercase character in +self+; + * downcases uppercase character; + * returns +self+ if any changes were made, +nil+ otherwise: + * + * s = 'Hello World!' # => "Hello World!" + * s.swapcase! # => "hELLO wORLD!" + * s # => "Hello World!" + * ''.swapcase! # => nil + * + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#swapcase. * - * See String#downcase for meaning of +options+ and use with - * different encodings. */ static VALUE @@ -7457,16 +7463,20 @@ rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * str.swapcase -> new_str - * str.swapcase([options]) -> new_str + * swapcase(*options) -> string + * + * Returns a string containing the characters in +self+, with cases reversed; + * each uppercase character is downcased; + * each lowercase character is upcased: * - * Returns a copy of str with uppercase alphabetic characters converted - * to lowercase and lowercase characters converted to uppercase. + * s = 'Hello World!' # => "Hello World!" + * s.swapcase # => "hELLO wORLD!" * - * See String#downcase for meaning of +options+ and use with different encodings. + * The casing may be affected by the given +options+; + * see {Case Mapping}[doc/case_mapping_rdoc.html]. + * + * Related: String#swapcase!. * - * "Hello".swapcase #=> "hELLO" - * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11" */ static VALUE @@ -11502,23 +11512,29 @@ sym_cmp(VALUE sym, VALUE other) } /* - * call-seq: - * sym.casecmp(other_symbol) -> -1, 0, +1, or nil + * call-seq: + * casecmp(other_symbol) -> -1, 0, 1, or nil + * + * Case-insensitive version of {Symbol#<=>}[#method-i-3C-3D-3E]: * - * Case-insensitive version of Symbol#<=>. - * Currently, case-insensitivity only works on characters A-Z/a-z, - * not all of Unicode. This is different from Symbol#casecmp?. + * :aBcDeF.casecmp(:abcde) # => 1 + * :aBcDeF.casecmp(:abcdef) # => 0 + * :aBcDeF.casecmp(:abcdefg) # => -1 + * :abcdef.casecmp(:ABCDEF) # => 0 * - * :aBcDeF.casecmp(:abcde) #=> 1 - * :aBcDeF.casecmp(:abcdef) #=> 0 - * :aBcDeF.casecmp(:abcdefg) #=> -1 - * :abcdef.casecmp(:ABCDEF) #=> 0 + * Returns +nil+ if the two symbols have incompatible encodings, + * or if +other_symbol+ is not a symbol: * - * +nil+ is returned if the two symbols have incompatible encodings, - * or if +other_symbol+ is not a symbol. + * sym = "\u{e4 f6 fc}".encode("ISO-8859-1").to_sym + * other_sym = :"\u{c4 d6 dc}" + * sym.casecmp(other_sym) # => nil + * :foo.casecmp(2) # => nil + * + * Currently, case-insensitivity only works on characters A-Z/a-z, + * not all of Unicode. This is different from Symbol#casecmp?. + * + * Related: Symbol#casecmp?. * - * :foo.casecmp(2) #=> nil - * "\u{e4 f6 fc}".encode("ISO-8859-1").to_sym.casecmp(:"\u{c4 d6 dc}") #=> nil */ static VALUE @@ -11531,23 +11547,30 @@ sym_casecmp(VALUE sym, VALUE other) } /* - * call-seq: - * sym.casecmp?(other_symbol) -> true, false, or nil + * call-seq: + * casecmp?(other_symbol) -> true, false, or nil + * + * Returns +true+ if +sym+ and +other_symbol+ are equal after + * Unicode case folding, +false+ if they are not equal: + * + * :aBcDeF.casecmp?(:abcde) # => false + * :aBcDeF.casecmp?(:abcdef) # => true + * :aBcDeF.casecmp?(:abcdefg) # => false + * :abcdef.casecmp?(:ABCDEF) # => true + * :"\u{e4 f6 fc}".casecmp?(:"\u{c4 d6 dc}") #=> true + * + * Returns +nil+ if the two symbols have incompatible encodings, + * or if +other_symbol+ is not a symbol: * - * Returns +true+ if +sym+ and +other_symbol+ are equal after - * Unicode case folding, +false+ if they are not equal. + * sym = "\u{e4 f6 fc}".encode("ISO-8859-1").to_sym + * other_sym = :"\u{c4 d6 dc}" + * sym.casecmp?(other_sym) # => nil + * :foo.casecmp?(2) # => nil * - * :aBcDeF.casecmp?(:abcde) #=> false - * :aBcDeF.casecmp?(:abcdef) #=> true - * :aBcDeF.casecmp?(:abcdefg) #=> false - * :abcdef.casecmp?(:ABCDEF) #=> true - * :"\u{e4 f6 fc}".casecmp?(:"\u{c4 d6 dc}") #=> true + * See {Case Mapping}[doc/case_mapping_rdoc.html]. * - * +nil+ is returned if the two symbols have incompatible encodings, - * or if +other_symbol+ is not a symbol. + * Related: Symbol#casecmp. * - * :foo.casecmp?(2) #=> nil - * "\u{e4 f6 fc}".encode("ISO-8859-1").to_sym.casecmp?(:"\u{c4 d6 dc}") #=> nil */ static VALUE @@ -11644,11 +11667,13 @@ sym_empty(VALUE sym) } /* - * call-seq: - * sym.upcase -> symbol - * sym.upcase([options]) -> symbol + * call-seq: + * upcase(*options) -> symbol + * + * Equivalent to sym.to_s.upcase.to_sym. + * + * See String#upcase. * - * Same as sym.to_s.upcase.intern. */ static VALUE @@ -11658,11 +11683,15 @@ sym_upcase(int argc, VALUE *argv, VALUE sym) } /* - * call-seq: - * sym.downcase -> symbol - * sym.downcase([options]) -> symbol + * call-seq: + * downcase(*options) -> symbol + * + * Equivalent to sym.to_s.downcase.to_sym. + * + * See String#downcase. + * + * Related: Symbol#upcase. * - * Same as sym.to_s.downcase.intern. */ static VALUE @@ -11672,11 +11701,13 @@ sym_downcase(int argc, VALUE *argv, VALUE sym) } /* - * call-seq: - * sym.capitalize -> symbol - * sym.capitalize([options]) -> symbol + * call-seq: + * capitalize(*options) -> symbol + * + * Equivalent to sym.to_s.capitalize.to_sym. + * + * See String#capitalize. * - * Same as sym.to_s.capitalize.intern. */ static VALUE @@ -11686,11 +11717,13 @@ sym_capitalize(int argc, VALUE *argv, VALUE sym) } /* - * call-seq: - * sym.swapcase -> symbol - * sym.swapcase([options]) -> symbol + * call-seq: + * swapcase(*options) -> symbol + * + * Equivalent to sym.to_s.swapcase.to_sym. + * + * See String#swapcase. * - * Same as sym.to_s.swapcase.intern. */ static VALUE -- cgit v1.2.3