MG added RDoc comments for re.c

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5309 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: dave <dave@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-12-26 15:58:28 +0000
committer: dave <dave@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-12-26 15:58:28 +0000
commit: d1fd7bdf484fbda6092c241989a89afb6882650a (patch)
tree: bfae18c92f91a23e4e0407182974a25e3dcc4d42 /re.c
parent: b9a79bc0202072aeda560d13ff71dae4ea49721c (diff)
1 files changed, 419 insertions, 0 deletions
diff --git a/re.c b/re.c
index 29f5a89d16..577a9ad745 100644
--- a/re.c
+++ b/re.c
@@ -364,6 +364,16 @@ rb_reg_desc(s, len, re)
     return str;
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.source   => str
+ *  
+ *  Returns the original string of the pattern.
+ *     
+ *     /ab+c/ix.source   #=> "ab+c"
+ */
+
 static VALUE
 rb_reg_source(re)
     VALUE re;
@@ -384,6 +394,27 @@ rb_reg_inspect(re)
     return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.to_s   => str
+ *  
+ *  Returns a string containing the regular expression and its options (usng the
+ *  <code>(?xxx:yyy)</code> notation. This string can be fed back in to
+ *  <code>Regexp::new</code> to a regular expression with the same semantics as
+ *  the original. (However, <code>Regexp#==</code> may not return true when
+ *  comparing the two, as the source of the regular expression itself may
+ *  differ, as the example shows).  <code>Regexp#inspect</code> produces a
+ *  generally more readable version of <i>rxp</i>.
+ *     
+ *     r1 = /ab+c/ix         #=> /ab+c/ix
+ *     s1 = r1.to_s          #=> "(?ix-m:ab+c)"
+ *     r2 = Regexp.new(s1)   #=> /(?ix-m:ab+c)/
+ *     r1 == r2              #=> false
+ *     r1.source             #=> "ab+c"
+ *     r2.source             #=> "(?ix-m:ab+c)"
+ */
+
 static VALUE
 rb_reg_to_s(re)
     VALUE re;
@@ -490,6 +521,14 @@ rb_reg_raise(s, len, err, re)
 	rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING(desc)->ptr);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.casefold?   => true or false
+ *  
+ *  Returns the value of the case-insensitive flag.
+ */
+
 static VALUE
 rb_reg_casefold_p(re)
     VALUE re;
@@ -499,6 +538,30 @@ rb_reg_casefold_p(re)
     return Qfalse;
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.options   => fixnum
+ *  
+ *  Returns the set of bits corresponding to the options used when creating this
+ *  Regexp (see <code>Regexp::new</code> for details. Note that additional bits
+ *  may be set in the returned options: these are used internally by the regular
+ *  expression code. These extra bits are ignored if the options are passed to
+ *  <code>Regexp::new</code>.
+ *     
+ *     Regexp::IGNORECASE                  #=> 1
+ *     Regexp::EXTENDED                    #=> 2
+ *     Regexp::MULTILINE                   #=> 4
+ *     
+ *     /cat/.options                       #=> 128
+ *     /cat/ix.options                     #=> 131
+ *     Regexp.new('cat', true).options     #=> 129
+ *     Regexp.new('cat', 0, 's').options   #=> 384
+ *     
+ *     r = /cat/ix
+ *     Regexp.new(r.source, r.options)     #=> /cat/ix
+ */
+
 static VALUE
 rb_reg_options_m(re)
     VALUE re;
@@ -507,6 +570,14 @@ rb_reg_options_m(re)
     return INT2NUM(options);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.kcode   => str
+ *  
+ *  Returns the character set code for the regexp.
+ */
+
 static VALUE
 rb_reg_kcode_m(re)
     VALUE re;
@@ -564,6 +635,20 @@ make_regexp(s, len, flags)
     return rp;
 }
 
+
+/*
+ *  Document-class: MatchData
+ *
+ *  <code>MatchData</code> is the type of the special variable <code>$~</code>,
+ *  and is the type of the object returned by <code>Regexp#match</code> and
+ *  <code>Regexp#last_match</code>. It encapsulates all the results of a pattern
+ *  match, results normally accessed through the special variables
+ *  <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
+ *  <code>$2</code>, and so on. <code>Matchdata</code> is also known as
+ *  <code>MatchingData</code>.
+ *
+ */
+
 static VALUE rb_cMatch;
 
 static VALUE match_alloc _((VALUE));
@@ -599,6 +684,19 @@ match_init_copy(obj, orig)
     return obj;
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.length   => integer
+ *     mtch.size     => integer
+ *  
+ *  Returns the number of elements in the match array.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.length   #=> 5
+ *     m.size     #=> 5
+ */
+
 static VALUE
 match_size(match)
     VALUE match;
@@ -606,6 +704,19 @@ match_size(match)
     return INT2FIX(RMATCH(match)->regs->num_regs);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.offset(n)   => array
+ *  
+ *  Returns a two-element array containing the beginning and ending offsets of
+ *  the <em>n</em>th match.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.offset(0)   #=> [1, 7]
+ *     m.offset(4)   #=> [6, 7]
+ */
+
 static VALUE
 match_offset(match, n)
     VALUE match, n;
@@ -622,6 +733,19 @@ match_offset(match, n)
 			INT2FIX(RMATCH(match)->regs->end[i]));
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.begin(n)   => integer
+ *  
+ *  Returns the offset of the start of the <em>n</em>th element of the match
+ *  array in the string.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.begin(0)   #=> 1
+ *     m.begin(2)   #=> 2
+ */
+
 static VALUE
 match_begin(match, n)
     VALUE match, n;
@@ -637,6 +761,19 @@ match_begin(match, n)
     return INT2FIX(RMATCH(match)->regs->beg[i]);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.end(n)   => integer
+ *  
+ *  Returns the offset of the character immediately following the end of the
+ *  <em>n</em>th element of the match array in the string.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.end(0)   #=> 7
+ *     m.end(2)   #=> 3
+ */
+
 static VALUE
 match_end(match, n)
     VALUE match, n;
@@ -846,6 +983,18 @@ rb_reg_last_match(match)
     return rb_reg_nth_match(0, match);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.pre_match   => str
+ *  
+ *  Returns the portion of the original string before the current match.
+ *  Equivalent to the special variable <code>$`</code>.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.pre_match   #=> "T"
+ */
+
 VALUE
 rb_reg_match_pre(match)
     VALUE match;
@@ -859,6 +1008,18 @@ rb_reg_match_pre(match)
     return str;
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.post_match   => str
+ *  
+ *  Returns the portion of the original string after the current match.
+ *  Equivalent to the special variable <code>$'</code>.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ *     m.post_match   #=> ": The Movie"
+ */
+
 VALUE
 rb_reg_match_post(match)
     VALUE match;
@@ -938,6 +1099,33 @@ match_array(match, start)
     return ary;
 }
 
+
+/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the
+   second example to prevent the '*' followed by a '/' from ending the
+   comment. */
+
+/*
+ *  call-seq:
+ *     mtch.to_a   => anArray
+ *  
+ *  Returns the array of matches.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.to_a   #=> ["HX1138", "H", "X", "113", "8"]
+ *     
+ *  Because <code>to_a</code> is called when exanding
+ *  <code>*</code><em>variable</em>, there's a useful assignment
+ *  shortcut for extracting matched fields. This is slightly slower than
+ *  accessing the fields directly (as an intermediate array is
+ *  generated).
+ *     
+ *     all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
+ *     all   #=> "HX1138"
+ *     f1    #=> "H"
+ *     f2    #=> "X"
+ *     f3    #=> "113"
+ */
+
 static VALUE
 match_to_a(match)
     VALUE match;
@@ -945,6 +1133,19 @@ match_to_a(match)
     return match_array(match, 0);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.captures   => array
+ *
+ *  Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
+ *
+ *     f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
+ *     f1    #=> "H"
+ *     f2    #=> "X"
+ *     f3    #=> "113"
+ *     f4    #=> "8"
+ */
 static VALUE
 match_captures(match)
     VALUE match;
@@ -952,6 +1153,26 @@ match_captures(match)
     return match_array(match, 1);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch[i]               => obj
+ *     mtch[start, length]   => array
+ *     mtch[range]           => array
+ *  
+ *  Match Reference---<code>MatchData</code> acts as an array, and may be
+ *  accessed using the normal array indexing techniques.  <i>mtch</i>[0] is
+ *  equivalent to the special variable <code>$&</code>, and returns the entire
+ *  matched string.  <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
+ *  of the matched backreferences (portions of the pattern between parentheses).
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m[0]       #=> "HX1138"
+ *     m[1, 2]    #=> ["H", "X"]
+ *     m[1..3]    #=> ["H", "X", "113"]
+ *     m[-3, 2]   #=> ["X", "113"]
+ */
+
 static VALUE
 match_aref(argc, argv, match)
     int argc;
@@ -977,6 +1198,19 @@ match_entry(match, n)
     return rb_reg_nth_match(n, match);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.select([index]*)   => array
+ *  
+ *  Uses each <i>index</i> to access the matching values, returning an array of
+ *  the corresponding matches.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ *     m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
+ *     m.select(0, 2, -2)   #=> ["HX1138", "X", "113"]
+ */
+
 static VALUE
 match_values_at(argc, argv, match)
     int argc;
@@ -986,6 +1220,19 @@ match_values_at(argc, argv, match)
     return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry);
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.select([index]*)   => array
+ *  
+ *  Uses each <i>index</i> to access the matching values, returning an
+ *  array of the corresponding matches.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ *     m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
+ *     m.select(0, 2, -2)   #=> ["HX1138", "X", "113"]
+ */
+
 static VALUE
 match_select(argc, argv, match)
     int argc;
@@ -1013,6 +1260,17 @@ match_select(argc, argv, match)
     }
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.to_s   => str
+ *  
+ *  Returns the entire matched string.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.to_s   #=> "HX1138"
+ */
+
 static VALUE
 match_to_s(match)
     VALUE match;
@@ -1025,6 +1283,17 @@ match_to_s(match)
     return str;
 }
 
+
+/*
+ *  call-seq:
+ *     mtch.string   => str
+ *  
+ *  Returns a frozen copy of the string passed in to <code>match</code>.
+ *     
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.string   #=> "THX1138."
+ */
+
 static VALUE
 match_string(match)
     VALUE match;
@@ -1166,6 +1435,21 @@ rb_reg_hash(re)
     return INT2FIX(hashval);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp == other_rxp      => true or false
+ *     rxp.eql?(other_rxp)   => true or false
+ *  
+ *  Equality---Two regexps are equal if their patterns are identical, they have
+ *  the same character set code, and their <code>casefold?</code> values are the
+ *  same.
+ *     
+ *     /abc/  == /abc/x   #=> false
+ *     /abc/  == /abc/i   #=> false
+ *     /abc/u == /abc/n   #=> false
+ */
+
 static VALUE
 rb_reg_equal(re1, re2)
     VALUE re1, re2;
@@ -1182,6 +1466,18 @@ rb_reg_equal(re1, re2)
     return Qfalse;
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.match(str)   => matchdata or nil
+ *  
+ *  Returns a <code>MatchData</code> object describing the match, or
+ *  <code>nil</code> if there was no match. This is equivalent to retrieving the
+ *  value of the special variable <code>$~</code> following a normal match.
+ *     
+ *     /(.)(.)(.)/.match("abc")[2]   #=> "b"
+ */
+
 VALUE
 rb_reg_match(re, str)
     VALUE re, str;
@@ -1200,6 +1496,25 @@ rb_reg_match(re, str)
     return LONG2FIX(start);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp === str   => true or false
+ *  
+ *  Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
+ *     
+ *     a = "HELLO"
+ *     case a
+ *     when /^a-z*$/; print "Lower case\n"
+ *     when /^A-Z*$/; print "Upper case\n"
+ *     else;            print "Mixed case\n"
+ *     end
+ *     
+ *  <em>produces:</em>
+ *     
+ *     Upper case
+ */
+
 VALUE
 rb_reg_eqq(re, str)
     VALUE re, str;
@@ -1221,6 +1536,18 @@ rb_reg_eqq(re, str)
     return Qtrue;
 }
 
+
+/*
+ *  call-seq:
+ *     ~ rxp   => integer or nil
+ *  
+ *  Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
+ *  Equivalent to <code><i>rxp</i> =~ $_</code>.
+ *     
+ *     $_ = "input data"
+ *     ~ /at/   #=> 7
+ */
+
 VALUE
 rb_reg_match2(re)
     VALUE re;
@@ -1240,6 +1567,18 @@ rb_reg_match2(re)
     return LONG2FIX(start);
 }
 
+
+/*
+ *  call-seq:
+ *     rxp.match(str)   => matchdata or nil
+ *  
+ *  Returns a <code>MatchData</code> object describing the match, or
+ *  <code>nil</code> if there was no match. This is equivalent to retrieving the
+ *  value of the special variable <code>$~</code> following a normal match.
+ *     
+ *     /(.)(.)(.)/.match("abc")[2]   #=> "b"
+ */
+
 static VALUE
 rb_reg_match_m(re, str)
     VALUE re, str;
@@ -1252,6 +1591,30 @@ rb_reg_match_m(re, str)
     return result;
 }
 
+/*
+ *  call-seq:
+ *     Regexp.new(string [, options [, lang]])       => regexp
+ *     Regexp.new(regexp)                            => regexp
+ *	   Regexp.compile(string [, options [, lang]])   => regexp
+ *     Regexp.compile(regexp)                        => regexp
+ *  
+ *  Constructs a new regular expression from <i>pattern</i>, which can be either
+ *  a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
+ *  options are propagated, and new options may not be specified (a change as of
+ *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
+ *  more of the constants <code>Regexp::EXTENDED</code>,
+ *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::POSIXLINE</code>,
+ *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not
+ *  <code>nil</code>, the regexp will be case insensitive. The <i>lang</i>
+ *  parameter enables multibyte support for the regexp: `n', `N' = none, `e',
+ *  `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8.
+ *     
+ *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/
+ *     r2 = Regexp.new('cat', true)               #=> /cat/i
+ *     r3 = Regexp.new('dog', Regexp::EXTENDED)   #=> /dog/x
+ *     r4 = Regexp.new(r2)                        #=> /cat/i
+ */
+
 static VALUE
 rb_reg_initialize_m(argc, argv, self)
     int argc;
@@ -1411,6 +1774,19 @@ rb_reg_quote(str)
     return tmp;
 }
 
+
+/*
+ *  call-seq:
+ *     Regexp.escape(str)   => new_str
+ *     Regexp.quote(str)    => new_str
+ *  
+ *  Escapes any characters that would have special meaning in a regular
+ *  expression. For any string,
+ *  <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
+ *     
+ *     Regexp.escape('\\*?{}.')   #=> \\\\\*\?\{\}\.
+ */
+
 static VALUE
 rb_reg_s_quote(argc, argv)
     int argc;
@@ -1480,6 +1856,21 @@ rb_reg_options(re)
     return options;
 }
 
+
+/*
+ *  call-seq:
+ *     Regexp.union([pattern]*)   => new_str
+ *  
+ *  Return a <code>Regexp</code> object that is the union of the given
+ *  <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
+ *  can be Regexp objects, in which case their options will be preserved, or
+ *  Strings. If no arguments are given, returns <code>/(?!)/</code>.
+ *     
+ *     Regexp.union                         #=> /(?!)/
+ *     Regexp.union("penzance")             #=> /penzance/
+ *     Regexp.union("skiing", "sledding")   #=> /skiing|sledding/
+ *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/
+ */
 static VALUE
 rb_reg_s_union(argc, argv)
     int argc;
@@ -1756,6 +2147,23 @@ match_setter(val)
     rb_backref_set(val);
 }
 
+/*
+ *  call-seq:
+ *     Regexp.last_match           => matchdata
+ *     Regexp.last_match(fixnum)   => str
+ *  
+ *  The first form returns the <code>MatchData</code> object generated by the
+ *  last successful pattern match. Equivalent to reading the global variable
+ *  <code>$~</code>. The second form returns the nth field in this
+ *  <code>MatchData</code> object.
+ *     
+ *     /c(.)t/ =~ 'cat'       #=> 0
+ *     Regexp.last_match      #=> #<MatchData:0x401b3d30>
+ *     Regexp.last_match(0)   #=> "cat"
+ *     Regexp.last_match(1)   #=> "a"
+ *     Regexp.last_match(2)   #=> nil
+ */
+
 static VALUE
 rb_reg_s_last_match(argc, argv)
     int argc;
@@ -1769,6 +2177,17 @@ rb_reg_s_last_match(argc, argv)
     return match_getter();
 }
 
+
+/*
+ *  Document-class: Regexp
+ *
+ *  A <code>Regexp</code> holds a regular expression, used to match a pattern
+ *  against strings. Regexps are created using the <code>/.../</code> and
+ *  <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
+ *  constructor.
+ *
+ */
+
 void
 Init_Regexp()
 {
author	dave <dave@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-12-26 15:58:28 +0000
committer	dave <dave@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-12-26 15:58:28 +0000
commit	d1fd7bdf484fbda6092c241989a89afb6882650a (patch)
tree	bfae18c92f91a23e4e0407182974a25e3dcc4d42 /re.c
parent	b9a79bc0202072aeda560d13ff71dae4ea49721c (diff)