From 6f77e4b4ba78f0d74d297219cae251161b466a48 Mon Sep 17 00:00:00 2001 From: aamine Date: Wed, 18 Feb 2004 17:38:33 +0000 Subject: * ext/strscan/strscan.c: merge documentation from 1.8 branch. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5774 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/strscan/strscan.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 451 insertions(+), 5 deletions(-) (limited to 'ext/strscan/strscan.c') diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index e79df47635..39d3d8bcb6 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -194,6 +194,12 @@ strscan_s_allocate(klass) return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p); } +/* + * call-seq: StringScanner.new(string, dup = false) + * + * Creates a new StringScanner object to scan over the given +string+. + * +dup+ argument is obsolete and not used now. + */ static VALUE strscan_initialize(argc, argv, self) int argc; @@ -216,6 +222,11 @@ strscan_initialize(argc, argv, self) Instance Methods ======================================================================= */ +/* + * call-seq: StringScanner.must_C_version + * + * This method is defined for backward compatibility. + */ static VALUE strscan_s_mustc(self) VALUE self; @@ -223,6 +234,9 @@ strscan_s_mustc(self) return self; } +/* + * Reset the scan pointer (index 0) and clear matching data. + */ static VALUE strscan_reset(self) VALUE self; @@ -235,6 +249,13 @@ strscan_reset(self) return self; } +/* + * call-seq: + * terminate + * clear + * + * Set the scan pointer to the end of the string and clear matching data. + */ static VALUE strscan_terminate(self) VALUE self; @@ -247,6 +268,9 @@ strscan_terminate(self) return self; } +/* + * Returns the string being scanned. + */ static VALUE strscan_clear(self) VALUE self; @@ -265,6 +289,12 @@ strscan_get_string(self) return p->str; } +/* + * call-seq: string=(str) + * + * Changes the string being scanned to +str+ and resets the scanner. + * Returns +str+. + */ static VALUE strscan_set_string(self, str) VALUE self, str; @@ -280,6 +310,20 @@ strscan_set_string(self, str) return str; } +/* + * call-seq: + * concat(str) + * <<(str) + * + * Appends +str+ to the string being scanned. + * This method does not affect scan pointer. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan(/Fri /) + * s << " +1000 GMT" + * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT" + * s.scan(/Dec/) # -> "Dec" + */ static VALUE strscan_concat(self, str) VALUE self, str; @@ -292,6 +336,20 @@ strscan_concat(self, str) return self; } +/* + * Returns the position of the scan pointer. In the 'reset' position, this + * value is zero. In the 'terminated' position (i.e. the string is exhausted), + * this value is the length of the string. + * + * In short, it's a 1-based index into the string. + * + * s = StringScanner.new('test string') + * s.pos # -> 0 + * s.scan_until /str/ # -> "test str" + * s.pos # -> 8 + * s.terminate # -> # + * s.pos # -> 11 + */ static VALUE strscan_get_pos(self) VALUE self; @@ -302,6 +360,15 @@ strscan_get_pos(self) return INT2FIX(p->curr); } +/* + * call-seq: pos=(n) + * + * Modify the scan pointer. + * + * s = StringScanner.new('test string') + * s.pos = 7 # -> 7 + * s.rest # -> "ring" + */ static VALUE strscan_set_pos(self, v) VALUE self, v; @@ -371,6 +438,27 @@ strscan_do_scan(self, regex, succptr, getstr, headonly) } } +/* + * call-seq: + * scanner.scan(pattern) => String + * + */ + +/* + * call-seq: scan(pattern) + * + * Tries to match with +pattern+ at the current position. If there's a match, + * the scanner advances the "scan pointer" and returns the matched string. + * Otherwise, the scanner returns +nil+. + * + * s = StringScanner.new('test string') + * p s.scan(/\w+/) # -> "test" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * p s.scan(/./) # -> nil + * + */ static VALUE strscan_scan(self, re) VALUE self, re; @@ -378,6 +466,17 @@ strscan_scan(self, re) return strscan_do_scan(self, re, 1, 1, 1); } +/* + * call-seq: match?(pattern) + * + * Tests whether the given +pattern+ is matched from the current scan pointer. + * Returns the length of the match, or +nil+. The scan pointer is not advanced. + * + * s = StringScanner.new('test string') + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\s+/) # -> nil + */ static VALUE strscan_match_p(self, re) VALUE self, re; @@ -385,6 +484,23 @@ strscan_match_p(self, re) return strscan_do_scan(self, re, 0, 0, 1); } +/* + * call-seq: skip(pattern) + * + * Attempts to skip over the given +pattern+ beginning with the scan pointer. + * If it matches, the scan pointer is advanced to the end of the match, and the + * length of the match is returned. Otherwise, +nil+ is returned. + * + * It's similar to #scan, but without returning the matched string. + * + * s = StringScanner.new('test string') + * p s.skip(/\w+/) # -> 4 + * p s.skip(/\w+/) # -> nil + * p s.skip(/\s+/) # -> 1 + * p s.skip(/\w+/) # -> 6 + * p s.skip(/./) # -> nil + * + */ static VALUE strscan_skip(self, re) VALUE self, re; @@ -392,6 +508,21 @@ strscan_skip(self, re) return strscan_do_scan(self, re, 1, 0, 1); } +/* + * call-seq: check(pattern) + * + * This returns the value that #scan would return, without advancing the scan + * pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check /Fri/ # -> "Fri" + * s.pos # -> 0 + * s.matched # -> "Fri" + * s.check /12/ # -> nil + * s.matched # -> nil + * + * Mnemonic: it "checks" to see whether a #scan will return a value. + */ static VALUE strscan_check(self, re) VALUE self, re; @@ -399,6 +530,16 @@ strscan_check(self, re) return strscan_do_scan(self, re, 0, 1, 1); } +/* + * call-seq: scan_full(pattern, return_string_p, advance_pointer_p) + * + * Tests whether the given +pattern+ is matched from the current scan pointer. + * Returns the matched string if +return_string_p+ is true. + * Advances the scan pointer if +advance_pointer_p+ is true. + * The match register is affected. + * + * "full" means "#scan with full parameters". + */ static VALUE strscan_scan_full(self, re, s, f) VALUE self, re, s, f; @@ -407,6 +548,18 @@ strscan_scan_full(self, re, s, f) } +/* + * call-seq: scan_until(pattern) + * + * Scans the string _until_ the +pattern+ is matched. Returns the substring up + * to and including the end of the match, advancing the scan pointer to that + * location. If there is no match, +nil+ is returned. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan_until(/1/) # -> "Fri Dec 1" + * s.pre_match # -> "Fri Dec " + * s.scan_until(/XYZ/) # -> nil + */ static VALUE strscan_scan_until(self, re) VALUE self, re; @@ -414,6 +567,19 @@ strscan_scan_until(self, re) return strscan_do_scan(self, re, 1, 1, 0); } +/* + * call-seq: exist?(pattern) + * + * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string, + * without advancing the scan pointer. This predicates whether a #scan_until + * will return a value. + * + * s = StringScanner.new('test string') + * s.exist? /s/ # -> 3 + * s.scan /test/ # -> "test" + * s.exist? /s/ # -> 6 + * s.exist? /e/ # -> nil + */ static VALUE strscan_exist_p(self, re) VALUE self, re; @@ -421,6 +587,22 @@ strscan_exist_p(self, re) return strscan_do_scan(self, re, 0, 0, 0); } +/* + * call-seq: skip_until(pattern) + * + * Advances the scan pointer until +pattern+ is matched and consumed. Returns + * the number of bytes advanced, or +nil+ if no match was found. + * + * Look ahead to match +pattern+, and advance the scan pointer to the _end_ + * of the match. Return the number of characters advanced, or +nil+ if the + * match was unsuccessful. + * + * It's similar to #scan_until, but without returning the intervening string. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.skip_until /12/ # -> 10 + * s # + */ static VALUE strscan_skip_until(self, re) VALUE self, re; @@ -428,6 +610,19 @@ strscan_skip_until(self, re) return strscan_do_scan(self, re, 1, 0, 0); } +/* + * call-seq: check_until(pattern) + * + * This returns the value that #scan_until would return, without advancing the + * scan pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check_until /12/ # -> "Fri Dec 12" + * s.pos # -> 0 + * s.matched # -> 12 + * + * Mnemonic: it "checks" to see whether a #scan_until will return a value. + */ static VALUE strscan_check_until(self, re) VALUE self, re; @@ -435,6 +630,15 @@ strscan_check_until(self, re) return strscan_do_scan(self, re, 0, 1, 0); } +/* + * call-seq: search_full(pattern, return_string_p, advance_pointer_p) + * + * Scans the string _until_ the +pattern+ is matched. + * Returns the matched string if +return_string_p+ is true, otherwise + * returns the number of bytes advanced. + * Advances the scan pointer if +advance_pointer_p+, otherwise not. + * This method does affect the match register. + */ static VALUE strscan_search_full(self, re, s, f) VALUE self, re, s, f; @@ -457,6 +661,14 @@ adjust_registers_to_matched(p) p->regs.end[0] = p->curr - p->prev; } +/* + * Scans one character and returns it. + * + * s = StringScanner.new('ab') + * s.getch # => "a" + * s.getch # => "b" + * s.getch # => nil + */ static VALUE strscan_getch(self) VALUE self; @@ -480,6 +692,14 @@ strscan_getch(self) p->prev + p->regs.end[0]); } +/* + * Scans one byte and returns it. Similar to, but not the same as, #getch. + * + * s = StringScanner.new('ab') + * s.get_byte # => "a" + * s.get_byte # => "b" + * s.get_byte # => nil + */ static VALUE strscan_get_byte(self) VALUE self; @@ -507,7 +727,17 @@ strscan_getbyte(self) return strscan_get_byte(self); } - +/* + * call-seq: peek(len) + * + * Extracts a string corresponding to string[pos,len], without + * advancing the scan pointer. + * + * s = StringScanner.new('test string') + * s.peek(7) # => "test st" + * s.peek(7) # => "test st" + * + */ static VALUE strscan_peek(self, vlen) VALUE self, vlen; @@ -526,6 +756,17 @@ strscan_peek(self, vlen) return extract_beg_len(p, p->curr, len); } +/* + * Set the scan pointer to the previous position. Only one previous position is + * remembered, and it changes with each scanning operation. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # => "test" + * s.unscan + * s.scan(/../) # => "te" + * s.scan(/\d/) # => nil + * s.unscan # ScanError: cannot unscan: prev match had failed + */ static VALUE strscan_peep(self, vlen) VALUE self, vlen; @@ -549,6 +790,18 @@ strscan_unscan(self) return self; } +/* + * Returns +true+ iff the scan pointer is at the beginning of the line. + * + * s = StringScanner.new("test\ntest\n") + * s.bol? # => true + * s.scan(/te/) + * s.bol? # => false + * s.scan(/st\n/) + * s.bol? # => true + * s.terminate + * s.bol? # => true + */ static VALUE strscan_bol_p(self) VALUE self; @@ -561,6 +814,16 @@ strscan_bol_p(self) return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse; } +/* + * Returns +true+ if the scan pointer is at the end of the string. + * + * s = StringScanner.new('test string') + * p s.eos? # => false + * s.scan(/test/) + * p s.eos? # => false + * s.terminate + * p s.eos? # => true + */ static VALUE strscan_eos_p(self) VALUE self; @@ -574,14 +837,26 @@ strscan_eos_p(self) return Qfalse; } +/* + * Equivalent to #eos?. + * This method is obsolete, use #eos? instead. + */ static VALUE strscan_empty_p(self) VALUE self; { - rb_warning("StringScanner#empty_p is obsolete; use #eos? instead"); + rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); return strscan_eos_p(self); } +/* + * Returns true iff there is more data in the string. See #eos?. + * This method is obsolete; use #eos? instead. + * + * s = StringScanner.new('test string') + * s.eos? # These two + * s.rest? # are opposites. + */ static VALUE strscan_rest_p(self) VALUE self; @@ -595,7 +870,15 @@ strscan_rest_p(self) return Qtrue; } - +/* + * Returns +true+ iff the last match was successful. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # => 4 + * s.matched? # => true + * s.match?(/\d+/) # => nil + * s.matched? # => false + */ static VALUE strscan_matched_p(self) VALUE self; @@ -609,6 +892,13 @@ strscan_matched_p(self) return Qfalse; } +/* + * Returns the last matched string. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # -> 4 + * s.matched # -> "test" + */ static VALUE strscan_matched(self) VALUE self; @@ -622,6 +912,16 @@ strscan_matched(self) p->prev + p->regs.end[0]); } +/* + * Returns the size of the most recent match (see #matched), or +nil+ if there + * was no recent match. + * + * s = StringScanner.new('test string') + * s.check /\w+/ # -> "test" + * s.matched_size # -> 4 + * s.check /\d+/ # -> nil + * s.matched_size # -> nil + */ static VALUE strscan_matched_size(self) VALUE self; @@ -634,6 +934,20 @@ strscan_matched_size(self) return INT2NUM(p->regs.end[0] - p->regs.beg[0]); } +/* + * call-seq: [](n) + * + * Return the n-th subgroup in the most recent match. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 " + * s[0] # -> "Fri Dec 12 " + * s[1] # -> "Fri" + * s[2] # -> "Dec" + * s[3] # -> "12" + * s.post_match # -> "1975 14:39" + * s.pre_match # -> "" + */ static VALUE strscan_aref(self, idx) VALUE self, idx; @@ -655,6 +969,15 @@ strscan_aref(self, idx) p->prev + p->regs.end[i]); } +/* + * Return the pre-match (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ static VALUE strscan_pre_match(self) VALUE self; @@ -667,6 +990,15 @@ strscan_pre_match(self) return extract_range(p, 0, p->prev + p->regs.beg[0]); } +/* + * Return the post-match (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ static VALUE strscan_post_match(self) VALUE self; @@ -679,7 +1011,10 @@ strscan_post_match(self) return extract_range(p, p->prev + p->regs.end[0], S_LEN(p)); } - +/* + * Returns the "rest" of the string (i.e. everything after the scan pointer). + * If there is no more data, it returns "". + */ static VALUE strscan_rest(self) VALUE self; @@ -693,6 +1028,9 @@ strscan_rest(self) return extract_range(p, p->curr, S_LEN(p)); } +/* + * s.rest_size is equivalent to s.rest.size. + */ static VALUE strscan_rest_size(self) VALUE self; @@ -709,10 +1047,20 @@ strscan_rest_size(self) return INT2FIX(i); } - #define INSPECT_LENGTH 5 #define BUFSIZE 256 +/* + * Returns a string that represents the StringScanner object, showing: + * - the current position + * - the size of the string + * - the characters surrounding the scan pointer + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.inspect # -> '#' + * s.scan_until /12/ # -> "Fri Dec 12" + * s.inspect # -> '#' + */ static VALUE strscan_inspect(self) VALUE self; @@ -796,6 +1144,104 @@ inspect2(p) Ruby Interface ======================================================================= */ +/* + * StringScanner provides for lexical scanning operations on a String. Here is + * an example of its usage: + * + * s = StringScanner.new('This is an example string') + * s.eos? # -> false + * + * p s.scan(/\w+/) # -> "This" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> "is" + * s.eos? # -> false + * + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "an" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "example" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * s.eos? # -> true + * + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> nil + * + * Scanning a string means remembering the position of a scan pointer, + * which is just an index. The scan pointer effectively points _between_ + * characters. (XXX: get this right - is it between or not?) + * + * Given the string "test string", here are the pertinent scan pointer + * positions: + * + * t e s t s t r i n g + * 0 1 2 ... 1 + * 0 + * + * When you #scan for a pattern (a regular expression), the match must occur + * at the character after the scan pointer. If you use #scan_until, then the + * match can occur anywhere after the scan pointer. In both cases, the scan + * pointer moves just beyond the last character of the match, ready to + * scan again from the next character onwards. This is demonstrated by the + * example above. + * + * == Method Categories + * + * There are other methods besides the plain scanners. You can look ahead in + * the string without actually scanning. You can access the most recent match. + * You can modify the string being scanned, reset or terminate the scanner, + * find out or change the position of the scan pointer, skip ahead, and so on. + * + * === Advancing the Scan Pointer + * + * getch + * getbyte + * scan + * scan_until + * skip + * skip_until + * + * === Looking Ahead + * + * check + * check_until + * exist? + * match? + * peek + * + * === Finding Where we Are + * + * bol? + * eos? + * pos + * + * === Setting Where we Are + * + * reset + * terminate + * pos= + * + * === Match Data + * + * matched + * matched? + * matched_size + * [] + * pre_match + * post_match + * + * === Miscellaneous + * + * << + * concat + * string + * string= + * unscan + * + * There are aliases to several of the methods. + */ void Init_strscan() { -- cgit v1.2.3