summaryrefslogtreecommitdiff
path: root/ext/strscan
diff options
context:
space:
mode:
authorgsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-02-18 13:49:43 +0000
committergsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-02-18 13:49:43 +0000
commitf3e86d49989aff1eef4caeea68f9498728bf6b55 (patch)
treef15a7414b66ffd256108d70cd73cbb7bbb47440a /ext/strscan
parent688ec3f8ee114b8c9f477788dd7855355ee11494 (diff)
* ext/strscan/strscan.c: documented
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@5767 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/strscan')
-rw-r--r--ext/strscan/strscan.c416
1 files changed, 416 insertions, 0 deletions
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 590f729e9f..2ea1251057 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -190,6 +190,13 @@ strscan_s_allocate(klass)
return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p);
}
+/*
+ * call-seq: StringScanner.new(string, dup)
+ *
+ * Creates a new StringScanner object to scan over the given +string+. If
+ * +dup+ is +true+, a copy of the string is used instead. Either way, the string
+ * is frozen before scanning commences.
+ */
static VALUE
strscan_initialize(argc, argv, self)
int argc;
@@ -212,6 +219,9 @@ strscan_initialize(argc, argv, self)
Instance Methods
======================================================================= */
+/*
+ * FIXME: prevent this method from being documented.
+ */
static VALUE
strscan_s_mustc(self)
VALUE self;
@@ -219,6 +229,9 @@ strscan_s_mustc(self)
return self;
}
+/*
+ * Reset the scan pointer (index 0) and clear matching data.
+ */
static VALUE
strscan_reset(self)
VALUE self;
@@ -231,6 +244,13 @@ strscan_reset(self)
return self;
}
+/*
+ * call-seq:
+ * terminate
+ * clear
+ *
+ * Set the scan pointer to the end of the string and clear matching data.
+ */
static VALUE
strscan_terminate(self)
VALUE self;
@@ -243,6 +263,9 @@ strscan_terminate(self)
return self;
}
+/*
+ * Returns the string being scanned.
+ */
static VALUE
strscan_get_string(self)
VALUE self;
@@ -253,6 +276,12 @@ strscan_get_string(self)
return p->str;
}
+/*
+ * call-seq: string=(str)
+ *
+ * Changes the string being scanned to +str+ and resets the scanner. Returns
+ * +str+.
+ */
static VALUE
strscan_set_string(self, str)
VALUE self, str;
@@ -268,6 +297,17 @@ strscan_set_string(self, str)
return str;
}
+/*
+ * call-seq:
+ * concat(str)
+ * << (str)
+ *
+ * Appends +str+ to the string being scanned.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s << " +1000 GMT"
+ * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
+ */
static VALUE
strscan_concat(self, str)
VALUE self, str;
@@ -280,6 +320,20 @@ strscan_concat(self, str)
return self;
}
+/*
+ * Returns the position of the scan pointer. In the 'reset' position, this
+ * value is zero. In the 'terminated' position (i.e. the string is exhausted),
+ * this value is the length of the string.
+ *
+ * In short, it's a 1-based index into the string.
+ *
+ * s = StringScanner.new('test string')
+ * s.pos # -> 0
+ * s.scan_until /str/ # -> "test str"
+ * s.pos # -> 8
+ * s.terminate # -> #<StringScanner fin>
+ * s.pos # -> 11
+ */
static VALUE
strscan_get_pos(self)
VALUE self;
@@ -290,6 +344,15 @@ strscan_get_pos(self)
return INT2FIX(p->curr);
}
+/*
+ * call-seq: pos=(n)
+ *
+ * Modify the scan pointer.
+ *
+ * s = StringScanner.new('test string')
+ * s.pos = 7 # -> 7
+ * s.rest # -> "ring"
+ */
static VALUE
strscan_set_pos(self, v)
VALUE self, v;
@@ -359,6 +422,28 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
}
}
+/*
+ * call-seq:
+ * scanner.scan(pattern) => String
+ *
+ */
+
+/*
+ * call-seq:
+ * scan(pattern)
+ *
+ * Tries to match with +pattern+ at the current position. If there's a match,
+ * the scanner advances the "scan pointer" and returns the matched string.
+ * Otherwise, the scanner returns +nil+.
+ *
+ * s = StringScanner.new('test string')
+ * p s.scan(/\w+/) # -> "test"
+ * p s.scan(/\w+/) # -> nil
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "string"
+ * p s.scan(/./) # -> nil
+ *
+ */
static VALUE
strscan_scan(self, re)
VALUE self, re;
@@ -366,6 +451,17 @@ strscan_scan(self, re)
return strscan_do_scan(self, re, 1, 1, 1);
}
+/*
+ * call-seq: match?(pattern)
+ *
+ * Tests whether the given +pattern+ is matched from the current scan pointer.
+ * Returns the length of the match, or +nil+. The scan pointer is not advanced.
+ *
+ * s = StringScanner.new('test string')
+ * p s.match?(/\w+/) # -> 4
+ * p s.match?(/\w+/) # -> 4
+ * p s.match?(/\s+/) # -> nil
+ */
static VALUE
strscan_match_p(self, re)
VALUE self, re;
@@ -373,6 +469,23 @@ strscan_match_p(self, re)
return strscan_do_scan(self, re, 0, 0, 1);
}
+/*
+ * call-seq: skip(pattern)
+ *
+ * Attempts to skip over the given +pattern+ beginning with the scan pointer.
+ * If it matches, the scan pointer is advanced to the end of the match, and the
+ * length of the match is returned. Otherwise, +nil+ is returned.
+ *
+ * It's similar to #scan, but without returning the matched string.
+ *
+ * s = StringScanner.new('test string')
+ * p s.skip(/\w+/) # -> 4
+ * p s.skip(/\w+/) # -> nil
+ * p s.skip(/\s+/) # -> 1
+ * p s.skip(/\w+/) # -> 6
+ * p s.skip(/./) # -> nil
+ *
+ */
static VALUE
strscan_skip(self, re)
VALUE self, re;
@@ -380,6 +493,21 @@ strscan_skip(self, re)
return strscan_do_scan(self, re, 1, 0, 1);
}
+/*
+ * call-seq: check(pattern)
+ *
+ * This returns the value that #scan would return, without advancing the scan
+ * pointer. The match register is affected, though.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.check /Fri/ # -> "Fri"
+ * s.pos # -> 0
+ * s.matched # -> "Fri"
+ * s.check /12/ # -> nil
+ * s.matched # -> nil
+ *
+ * Mnemonic: it "checks" to see whether a #scan will return a value.
+ */
static VALUE
strscan_check(self, re)
VALUE self, re;
@@ -387,6 +515,9 @@ strscan_check(self, re)
return strscan_do_scan(self, re, 0, 1, 1);
}
+/*
+ * DOCUMENTATION
+ */
static VALUE
strscan_scan_full(self, re, s, f)
VALUE self, re, s, f;
@@ -395,6 +526,18 @@ strscan_scan_full(self, re, s, f)
}
+/*
+ * call-seq: scan_until(pattern)
+ *
+ * Scans the string _until_ the +pattern+ is matched. Returns the substring up
+ * to and including the end of the match, advancing the scan pointer to that
+ * location. If there is no match, +nil+ is returned.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.scan_until(/1/) # -> "Fri Dec 1"
+ * s.pre_match # -> "Fri Dec "
+ * s.scan_until(/XYZ/) # -> nil
+ */
static VALUE
strscan_scan_until(self, re)
VALUE self, re;
@@ -402,6 +545,19 @@ strscan_scan_until(self, re)
return strscan_do_scan(self, re, 1, 1, 0);
}
+/*
+ * call-seq: exist?(pattern)
+ *
+ * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
+ * without advancing the scan pointer. This predicates whether a #scan_until
+ * will return a value.
+ *
+ * s = StringScanner.new('test string')
+ * s.exist? /s/ # -> 3
+ * s.scan /test/ # -> "test"
+ * s.exist? /s/ # -> 6
+ * s.exist? /e/ # -> nil
+ */
static VALUE
strscan_exist_p(self, re)
VALUE self, re;
@@ -409,6 +565,22 @@ strscan_exist_p(self, re)
return strscan_do_scan(self, re, 0, 0, 0);
}
+/*
+ * call-seq: skip_until(pattern)
+ *
+ * Advances the scan pointer until +pattern+ is matched and consumed. Returns
+ * the number of characters advanced, or +nil+ if no match was found.
+ *
+ * Look ahead to match +pattern+, and advance the scan pointer to the _end_ of the
+ * match. Return the number of characters advanced, or +nil+ if the match was
+ * unsuccessful.
+ *
+ * It's similar to #scan_until, but without returning the intervening string.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.skip_until /12/ # -> 10
+ * s #
+ */
static VALUE
strscan_skip_until(self, re)
VALUE self, re;
@@ -416,6 +588,19 @@ strscan_skip_until(self, re)
return strscan_do_scan(self, re, 1, 0, 0);
}
+/*
+ * call-seq: check_until(pattern)
+ *
+ * This returns the value that #scan_until would return, without advancing the
+ * scan pointer. The match register is affected, though.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.check_until /12/ # -> "Fri Dec 12"
+ * s.pos # -> 0
+ * s.matched # -> 12
+ *
+ * Mnemonic: it "checks" to see whether a #scan_until will return a value.
+ */
static VALUE
strscan_check_until(self, re)
VALUE self, re;
@@ -423,6 +608,9 @@ strscan_check_until(self, re)
return strscan_do_scan(self, re, 0, 1, 0);
}
+/*
+ * DOCUMENTATION
+ */
static VALUE
strscan_search_full(self, re, s, f)
VALUE self, re, s, f;
@@ -445,6 +633,14 @@ adjust_registers_to_matched(p)
p->regs.end[0] = p->curr - p->prev;
}
+/*
+ * Scans one character and returns it.
+ *
+ * s = StringScanner.new('ab')
+ * s.getch # => 'a'
+ * s.getch # => 'b'
+ * s.getch # => nil
+ */
static VALUE
strscan_getch(self)
VALUE self;
@@ -468,6 +664,9 @@ strscan_getch(self)
p->prev + p->regs.end[0]);
}
+/*
+ * Scans one byte and returns it. Similar to, but not the same as, #getch.
+ */
static VALUE
strscan_get_byte(self)
VALUE self;
@@ -488,6 +687,17 @@ strscan_get_byte(self)
}
+/*
+ * call-seq: peek(len)
+ *
+ * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
+ * advancing the scan pointer.
+ *
+ * s = StringScanner.new('test string')
+ * s.peek(7) # -> "test st"
+ * s.peek(7) # -> "test st"
+ *
+ */
static VALUE
strscan_peek(self, vlen)
VALUE self, vlen;
@@ -506,6 +716,17 @@ strscan_peek(self, vlen)
return extract_beg_len(p, p->curr, len);
}
+/*
+ * Set the scan pointer to the previous position. Only one previous position is
+ * remembered, and it changes with each scanning operation.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # -> "test"
+ * s.unscan
+ * s.scan(/../) # -> "te"
+ * s.scan(/\d/) # -> nil
+ * s.unscan # ScanError: cannot unscan: prev match had failed
+ */
static VALUE
strscan_unscan(self)
VALUE self;
@@ -521,6 +742,13 @@ strscan_unscan(self)
return self;
}
+/*
+ * Returns +true+ iff the scan pointer is at the beginning of the string.
+ *
+ * s = StringScanner.new('test string')
+ * s.bol? # These two
+ * s.pos == 0 # are equivalent.
+ */
static VALUE
strscan_bol_p(self)
VALUE self;
@@ -533,6 +761,9 @@ strscan_bol_p(self)
return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
}
+/*
+ * Returns +true+ if the scan pointer is at the end of the string.
+ */
static VALUE
strscan_eos_p(self)
VALUE self;
@@ -546,6 +777,13 @@ strscan_eos_p(self)
return Qfalse;
}
+/*
+ * Returns true iff there is more data in the string. See #eos?.
+ *
+ * s = StringScanner.new('test string')
+ * s.eos? # These two
+ * s.rest? # are opposites.
+ */
static VALUE
strscan_rest_p(self)
VALUE self;
@@ -560,6 +798,15 @@ strscan_rest_p(self)
}
+/*
+ * Returns +true+ iff the last match was successful.
+ *
+ * s = StringScanner.new('test string')
+ * s.match?(/\w+/) # -> 4
+ * s.matched? # -> true
+ * s.match?(/\d+/) # -> nil
+ * s.matched? # -> false
+ */
static VALUE
strscan_matched_p(self)
VALUE self;
@@ -573,6 +820,13 @@ strscan_matched_p(self)
return Qfalse;
}
+/*
+ * Returns the last matched string.
+ *
+ * s = StringScanner.new('test string')
+ * s.match?(/\w+/) # -> 4
+ * s.matched # -> "test"
+ */
static VALUE
strscan_matched(self)
VALUE self;
@@ -586,6 +840,16 @@ strscan_matched(self)
p->prev + p->regs.end[0]);
}
+/*
+ * Returns the size of the most recent match (see #matched), or +nil+ if there
+ * was no recent match.
+ *
+ * s = StringScanner.new('test string')
+ * s.check /\w+/ # -> "test"
+ * s.matched_size # -> 4
+ * s.check /\d+/ # -> nil
+ * s.matched_size # -> nil
+ */
static VALUE
strscan_matched_size(self)
VALUE self;
@@ -598,6 +862,20 @@ strscan_matched_size(self)
return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
}
+/*
+ * call-seq: [](n)
+ *
+ * Return the n-th subgroup in the most recent match.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
+ * s[0] # -> "Fri Dec 12 "
+ * s[1] # -> "Fri"
+ * s[2] # -> "Dec"
+ * s[3] # -> "12"
+ * s.post_match # -> "1975 14:39"
+ * s.pre_match # -> ""
+ */
static VALUE
strscan_aref(self, idx)
VALUE self, idx;
@@ -619,6 +897,15 @@ strscan_aref(self, idx)
p->prev + p->regs.end[i]);
}
+/*
+ * Return the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # -> "test"
+ * s.scan(/\s+/) # -> " "
+ * s.pre_match # -> "test"
+ * s.post_match # -> "string"
+ */
static VALUE
strscan_pre_match(self)
VALUE self;
@@ -631,6 +918,15 @@ strscan_pre_match(self)
return extract_range(p, 0, p->prev + p->regs.beg[0]);
}
+/*
+ * Return the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # -> "test"
+ * s.scan(/\s+/) # -> " "
+ * s.pre_match # -> "test"
+ * s.post_match # -> "string"
+ */
static VALUE
strscan_post_match(self)
VALUE self;
@@ -644,6 +940,10 @@ strscan_post_match(self)
}
+/*
+ * Returns the "rest" of the string (i.e. everything after the scan pointer).
+ * If there is no more data, it returns <tt>""</tt>.
+ */
static VALUE
strscan_rest(self)
VALUE self;
@@ -657,6 +957,9 @@ strscan_rest(self)
return extract_range(p, p->curr, S_LEN(p));
}
+/*
+ * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
+ */
static VALUE
strscan_rest_size(self)
VALUE self;
@@ -677,6 +980,17 @@ strscan_rest_size(self)
#define INSPECT_LENGTH 5
#define BUFSIZE 256
+/*
+ * Returns a string that represents the StringScanner object, showing:
+ * - the current position
+ * - the size of the string
+ * - the characters surrounding the scan pointer
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
+ * s.scan_until /12/ # -> "Fri Dec 12"
+ * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
+ */
static VALUE
strscan_inspect(self)
VALUE self;
@@ -761,6 +1075,108 @@ inspect2(p)
Ruby Interface
======================================================================= */
+/*
+ * StringScanner provides for lexical scanning operations on a String. Here is
+ * an example of its usage:
+ *
+ * s = StringScanner.new('This is an example string')
+ * s.eos? # -> false
+ *
+ * p s.scan(/\w+/) # -> "This"
+ * p s.scan(/\w+/) # -> nil
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\s+/) # -> nil
+ * p s.scan(/\w+/) # -> "is"
+ * s.eos? # -> false
+ *
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "an"
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "example"
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "string"
+ * s.eos? # -> true
+ *
+ * p s.scan(/\s+/) # -> nil
+ * p s.scan(/\w+/) # -> nil
+ *
+ * Scanning a string means remembering the position of a <i>scan pointer</i>,
+ * which is just an index. The scan pointer effectively points _between_
+ * characters. (XXX: get this right - is it between or not?)
+ *
+ * Given the string "test string", here are the pertinent scan pointer
+ * positions:
+ *
+ * t e s t s t r i n g
+ * 0 1 2 ... 1
+ * 0
+ *
+ * When you #scan for a pattern (a regular expression), the match must occur
+ * at the character after the scan pointer. If you use #scan_until, then the
+ * match can occur anywhere after the scan pointer. In both cases, the scan
+ * pointer moves <i>just beyond</i> the last character of the match, ready to
+ * scan again from the next character onwards. This is demonstrated by the
+ * example above.
+ *
+ * == Method Categories
+ *
+ * There are other methods besides the plain scanners. You can look ahead in
+ * the string without actually scanning. You can access the most recent match.
+ * You can modify the string being scanned, reset or terminate the scanner,
+ * find out or change the position of the scan pointer, skip ahead, and so on.
+ *
+ * === Advancing the Scan Pointer
+ *
+ * getch
+ * getbyte
+ * scan
+ * scan_until
+ * skip
+ * skip_until
+ *
+ * === Looking Ahead
+ *
+ * check
+ * check_until
+ * exist?
+ * match?
+ * peek
+ *
+ * === Finding Where we Are
+ *
+ * bol?
+ * eos?
+ * rest?
+ * rest_size
+ *
+ * === Setting Where we Are
+ *
+ * reset
+ * terminate
+ * pos=
+ *
+ * === Match Data
+ *
+ * matched
+ * matched?
+ * matched_size
+ * pre_match
+ * post_match
+ *
+ * === Miscellaneous
+ *
+ * <<
+ * string=
+ * string
+ * unscan
+ *
+ * === Unknown
+ *
+ * scan_full
+ * search_full
+ *
+ * There are aliases to several of the methods.
+ */
void
Init_strscan()
{