diff options
author | gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-02-18 13:49:43 +0000 |
---|---|---|
committer | gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-02-18 13:49:43 +0000 |
commit | f3e86d49989aff1eef4caeea68f9498728bf6b55 (patch) | |
tree | f15a7414b66ffd256108d70cd73cbb7bbb47440a /ext/strscan | |
parent | 688ec3f8ee114b8c9f477788dd7855355ee11494 (diff) |
* ext/strscan/strscan.c: documented
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@5767 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/strscan')
-rw-r--r-- | ext/strscan/strscan.c | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 590f729e9f..2ea1251057 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -190,6 +190,13 @@ strscan_s_allocate(klass) return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p); } +/* + * call-seq: StringScanner.new(string, dup) + * + * Creates a new StringScanner object to scan over the given +string+. If + * +dup+ is +true+, a copy of the string is used instead. Either way, the string + * is frozen before scanning commences. + */ static VALUE strscan_initialize(argc, argv, self) int argc; @@ -212,6 +219,9 @@ strscan_initialize(argc, argv, self) Instance Methods ======================================================================= */ +/* + * FIXME: prevent this method from being documented. + */ static VALUE strscan_s_mustc(self) VALUE self; @@ -219,6 +229,9 @@ strscan_s_mustc(self) return self; } +/* + * Reset the scan pointer (index 0) and clear matching data. + */ static VALUE strscan_reset(self) VALUE self; @@ -231,6 +244,13 @@ strscan_reset(self) return self; } +/* + * call-seq: + * terminate + * clear + * + * Set the scan pointer to the end of the string and clear matching data. + */ static VALUE strscan_terminate(self) VALUE self; @@ -243,6 +263,9 @@ strscan_terminate(self) return self; } +/* + * Returns the string being scanned. + */ static VALUE strscan_get_string(self) VALUE self; @@ -253,6 +276,12 @@ strscan_get_string(self) return p->str; } +/* + * call-seq: string=(str) + * + * Changes the string being scanned to +str+ and resets the scanner. Returns + * +str+. + */ static VALUE strscan_set_string(self, str) VALUE self, str; @@ -268,6 +297,17 @@ strscan_set_string(self, str) return str; } +/* + * call-seq: + * concat(str) + * << (str) + * + * Appends +str+ to the string being scanned. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s << " +1000 GMT" + * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT" + */ static VALUE strscan_concat(self, str) VALUE self, str; @@ -280,6 +320,20 @@ strscan_concat(self, str) return self; } +/* + * Returns the position of the scan pointer. In the 'reset' position, this + * value is zero. In the 'terminated' position (i.e. the string is exhausted), + * this value is the length of the string. + * + * In short, it's a 1-based index into the string. + * + * s = StringScanner.new('test string') + * s.pos # -> 0 + * s.scan_until /str/ # -> "test str" + * s.pos # -> 8 + * s.terminate # -> #<StringScanner fin> + * s.pos # -> 11 + */ static VALUE strscan_get_pos(self) VALUE self; @@ -290,6 +344,15 @@ strscan_get_pos(self) return INT2FIX(p->curr); } +/* + * call-seq: pos=(n) + * + * Modify the scan pointer. + * + * s = StringScanner.new('test string') + * s.pos = 7 # -> 7 + * s.rest # -> "ring" + */ static VALUE strscan_set_pos(self, v) VALUE self, v; @@ -359,6 +422,28 @@ strscan_do_scan(self, regex, succptr, getstr, headonly) } } +/* + * call-seq: + * scanner.scan(pattern) => String + * + */ + +/* + * call-seq: + * scan(pattern) + * + * Tries to match with +pattern+ at the current position. If there's a match, + * the scanner advances the "scan pointer" and returns the matched string. + * Otherwise, the scanner returns +nil+. + * + * s = StringScanner.new('test string') + * p s.scan(/\w+/) # -> "test" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * p s.scan(/./) # -> nil + * + */ static VALUE strscan_scan(self, re) VALUE self, re; @@ -366,6 +451,17 @@ strscan_scan(self, re) return strscan_do_scan(self, re, 1, 1, 1); } +/* + * call-seq: match?(pattern) + * + * Tests whether the given +pattern+ is matched from the current scan pointer. + * Returns the length of the match, or +nil+. The scan pointer is not advanced. + * + * s = StringScanner.new('test string') + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\s+/) # -> nil + */ static VALUE strscan_match_p(self, re) VALUE self, re; @@ -373,6 +469,23 @@ strscan_match_p(self, re) return strscan_do_scan(self, re, 0, 0, 1); } +/* + * call-seq: skip(pattern) + * + * Attempts to skip over the given +pattern+ beginning with the scan pointer. + * If it matches, the scan pointer is advanced to the end of the match, and the + * length of the match is returned. Otherwise, +nil+ is returned. + * + * It's similar to #scan, but without returning the matched string. + * + * s = StringScanner.new('test string') + * p s.skip(/\w+/) # -> 4 + * p s.skip(/\w+/) # -> nil + * p s.skip(/\s+/) # -> 1 + * p s.skip(/\w+/) # -> 6 + * p s.skip(/./) # -> nil + * + */ static VALUE strscan_skip(self, re) VALUE self, re; @@ -380,6 +493,21 @@ strscan_skip(self, re) return strscan_do_scan(self, re, 1, 0, 1); } +/* + * call-seq: check(pattern) + * + * This returns the value that #scan would return, without advancing the scan + * pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check /Fri/ # -> "Fri" + * s.pos # -> 0 + * s.matched # -> "Fri" + * s.check /12/ # -> nil + * s.matched # -> nil + * + * Mnemonic: it "checks" to see whether a #scan will return a value. + */ static VALUE strscan_check(self, re) VALUE self, re; @@ -387,6 +515,9 @@ strscan_check(self, re) return strscan_do_scan(self, re, 0, 1, 1); } +/* + * DOCUMENTATION + */ static VALUE strscan_scan_full(self, re, s, f) VALUE self, re, s, f; @@ -395,6 +526,18 @@ strscan_scan_full(self, re, s, f) } +/* + * call-seq: scan_until(pattern) + * + * Scans the string _until_ the +pattern+ is matched. Returns the substring up + * to and including the end of the match, advancing the scan pointer to that + * location. If there is no match, +nil+ is returned. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan_until(/1/) # -> "Fri Dec 1" + * s.pre_match # -> "Fri Dec " + * s.scan_until(/XYZ/) # -> nil + */ static VALUE strscan_scan_until(self, re) VALUE self, re; @@ -402,6 +545,19 @@ strscan_scan_until(self, re) return strscan_do_scan(self, re, 1, 1, 0); } +/* + * call-seq: exist?(pattern) + * + * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string, + * without advancing the scan pointer. This predicates whether a #scan_until + * will return a value. + * + * s = StringScanner.new('test string') + * s.exist? /s/ # -> 3 + * s.scan /test/ # -> "test" + * s.exist? /s/ # -> 6 + * s.exist? /e/ # -> nil + */ static VALUE strscan_exist_p(self, re) VALUE self, re; @@ -409,6 +565,22 @@ strscan_exist_p(self, re) return strscan_do_scan(self, re, 0, 0, 0); } +/* + * call-seq: skip_until(pattern) + * + * Advances the scan pointer until +pattern+ is matched and consumed. Returns + * the number of characters advanced, or +nil+ if no match was found. + * + * Look ahead to match +pattern+, and advance the scan pointer to the _end_ of the + * match. Return the number of characters advanced, or +nil+ if the match was + * unsuccessful. + * + * It's similar to #scan_until, but without returning the intervening string. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.skip_until /12/ # -> 10 + * s # + */ static VALUE strscan_skip_until(self, re) VALUE self, re; @@ -416,6 +588,19 @@ strscan_skip_until(self, re) return strscan_do_scan(self, re, 1, 0, 0); } +/* + * call-seq: check_until(pattern) + * + * This returns the value that #scan_until would return, without advancing the + * scan pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check_until /12/ # -> "Fri Dec 12" + * s.pos # -> 0 + * s.matched # -> 12 + * + * Mnemonic: it "checks" to see whether a #scan_until will return a value. + */ static VALUE strscan_check_until(self, re) VALUE self, re; @@ -423,6 +608,9 @@ strscan_check_until(self, re) return strscan_do_scan(self, re, 0, 1, 0); } +/* + * DOCUMENTATION + */ static VALUE strscan_search_full(self, re, s, f) VALUE self, re, s, f; @@ -445,6 +633,14 @@ adjust_registers_to_matched(p) p->regs.end[0] = p->curr - p->prev; } +/* + * Scans one character and returns it. + * + * s = StringScanner.new('ab') + * s.getch # => 'a' + * s.getch # => 'b' + * s.getch # => nil + */ static VALUE strscan_getch(self) VALUE self; @@ -468,6 +664,9 @@ strscan_getch(self) p->prev + p->regs.end[0]); } +/* + * Scans one byte and returns it. Similar to, but not the same as, #getch. + */ static VALUE strscan_get_byte(self) VALUE self; @@ -488,6 +687,17 @@ strscan_get_byte(self) } +/* + * call-seq: peek(len) + * + * Extracts a string corresponding to <tt>string[pos,len]</tt>, without + * advancing the scan pointer. + * + * s = StringScanner.new('test string') + * s.peek(7) # -> "test st" + * s.peek(7) # -> "test st" + * + */ static VALUE strscan_peek(self, vlen) VALUE self, vlen; @@ -506,6 +716,17 @@ strscan_peek(self, vlen) return extract_beg_len(p, p->curr, len); } +/* + * Set the scan pointer to the previous position. Only one previous position is + * remembered, and it changes with each scanning operation. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.unscan + * s.scan(/../) # -> "te" + * s.scan(/\d/) # -> nil + * s.unscan # ScanError: cannot unscan: prev match had failed + */ static VALUE strscan_unscan(self) VALUE self; @@ -521,6 +742,13 @@ strscan_unscan(self) return self; } +/* + * Returns +true+ iff the scan pointer is at the beginning of the string. + * + * s = StringScanner.new('test string') + * s.bol? # These two + * s.pos == 0 # are equivalent. + */ static VALUE strscan_bol_p(self) VALUE self; @@ -533,6 +761,9 @@ strscan_bol_p(self) return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse; } +/* + * Returns +true+ if the scan pointer is at the end of the string. + */ static VALUE strscan_eos_p(self) VALUE self; @@ -546,6 +777,13 @@ strscan_eos_p(self) return Qfalse; } +/* + * Returns true iff there is more data in the string. See #eos?. + * + * s = StringScanner.new('test string') + * s.eos? # These two + * s.rest? # are opposites. + */ static VALUE strscan_rest_p(self) VALUE self; @@ -560,6 +798,15 @@ strscan_rest_p(self) } +/* + * Returns +true+ iff the last match was successful. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # -> 4 + * s.matched? # -> true + * s.match?(/\d+/) # -> nil + * s.matched? # -> false + */ static VALUE strscan_matched_p(self) VALUE self; @@ -573,6 +820,13 @@ strscan_matched_p(self) return Qfalse; } +/* + * Returns the last matched string. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # -> 4 + * s.matched # -> "test" + */ static VALUE strscan_matched(self) VALUE self; @@ -586,6 +840,16 @@ strscan_matched(self) p->prev + p->regs.end[0]); } +/* + * Returns the size of the most recent match (see #matched), or +nil+ if there + * was no recent match. + * + * s = StringScanner.new('test string') + * s.check /\w+/ # -> "test" + * s.matched_size # -> 4 + * s.check /\d+/ # -> nil + * s.matched_size # -> nil + */ static VALUE strscan_matched_size(self) VALUE self; @@ -598,6 +862,20 @@ strscan_matched_size(self) return INT2NUM(p->regs.end[0] - p->regs.beg[0]); } +/* + * call-seq: [](n) + * + * Return the n-th subgroup in the most recent match. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 " + * s[0] # -> "Fri Dec 12 " + * s[1] # -> "Fri" + * s[2] # -> "Dec" + * s[3] # -> "12" + * s.post_match # -> "1975 14:39" + * s.pre_match # -> "" + */ static VALUE strscan_aref(self, idx) VALUE self, idx; @@ -619,6 +897,15 @@ strscan_aref(self, idx) p->prev + p->regs.end[i]); } +/* + * Return the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ static VALUE strscan_pre_match(self) VALUE self; @@ -631,6 +918,15 @@ strscan_pre_match(self) return extract_range(p, 0, p->prev + p->regs.beg[0]); } +/* + * Return the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ static VALUE strscan_post_match(self) VALUE self; @@ -644,6 +940,10 @@ strscan_post_match(self) } +/* + * Returns the "rest" of the string (i.e. everything after the scan pointer). + * If there is no more data, it returns <tt>""</tt>. + */ static VALUE strscan_rest(self) VALUE self; @@ -657,6 +957,9 @@ strscan_rest(self) return extract_range(p, p->curr, S_LEN(p)); } +/* + * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>. + */ static VALUE strscan_rest_size(self) VALUE self; @@ -677,6 +980,17 @@ strscan_rest_size(self) #define INSPECT_LENGTH 5 #define BUFSIZE 256 +/* + * Returns a string that represents the StringScanner object, showing: + * - the current position + * - the size of the string + * - the characters surrounding the scan pointer + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">' + * s.scan_until /12/ # -> "Fri Dec 12" + * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">' + */ static VALUE strscan_inspect(self) VALUE self; @@ -761,6 +1075,108 @@ inspect2(p) Ruby Interface ======================================================================= */ +/* + * StringScanner provides for lexical scanning operations on a String. Here is + * an example of its usage: + * + * s = StringScanner.new('This is an example string') + * s.eos? # -> false + * + * p s.scan(/\w+/) # -> "This" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> "is" + * s.eos? # -> false + * + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "an" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "example" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * s.eos? # -> true + * + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> nil + * + * Scanning a string means remembering the position of a <i>scan pointer</i>, + * which is just an index. The scan pointer effectively points _between_ + * characters. (XXX: get this right - is it between or not?) + * + * Given the string "test string", here are the pertinent scan pointer + * positions: + * + * t e s t s t r i n g + * 0 1 2 ... 1 + * 0 + * + * When you #scan for a pattern (a regular expression), the match must occur + * at the character after the scan pointer. If you use #scan_until, then the + * match can occur anywhere after the scan pointer. In both cases, the scan + * pointer moves <i>just beyond</i> the last character of the match, ready to + * scan again from the next character onwards. This is demonstrated by the + * example above. + * + * == Method Categories + * + * There are other methods besides the plain scanners. You can look ahead in + * the string without actually scanning. You can access the most recent match. + * You can modify the string being scanned, reset or terminate the scanner, + * find out or change the position of the scan pointer, skip ahead, and so on. + * + * === Advancing the Scan Pointer + * + * getch + * getbyte + * scan + * scan_until + * skip + * skip_until + * + * === Looking Ahead + * + * check + * check_until + * exist? + * match? + * peek + * + * === Finding Where we Are + * + * bol? + * eos? + * rest? + * rest_size + * + * === Setting Where we Are + * + * reset + * terminate + * pos= + * + * === Match Data + * + * matched + * matched? + * matched_size + * pre_match + * post_match + * + * === Miscellaneous + * + * << + * string= + * string + * unscan + * + * === Unknown + * + * scan_full + * search_full + * + * There are aliases to several of the methods. + */ void Init_strscan() { |