summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/strscan/strscan.c456
1 files changed, 451 insertions, 5 deletions
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index e79df47635..39d3d8bcb6 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -194,6 +194,12 @@ strscan_s_allocate(klass)
return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p);
}
+/*
+ * call-seq: StringScanner.new(string, dup = false)
+ *
+ * Creates a new StringScanner object to scan over the given +string+.
+ * +dup+ argument is obsolete and not used now.
+ */
static VALUE
strscan_initialize(argc, argv, self)
int argc;
@@ -216,6 +222,11 @@ strscan_initialize(argc, argv, self)
Instance Methods
======================================================================= */
+/*
+ * call-seq: StringScanner.must_C_version
+ *
+ * This method is defined for backward compatibility.
+ */
static VALUE
strscan_s_mustc(self)
VALUE self;
@@ -223,6 +234,9 @@ strscan_s_mustc(self)
return self;
}
+/*
+ * Reset the scan pointer (index 0) and clear matching data.
+ */
static VALUE
strscan_reset(self)
VALUE self;
@@ -235,6 +249,13 @@ strscan_reset(self)
return self;
}
+/*
+ * call-seq:
+ * terminate
+ * clear
+ *
+ * Set the scan pointer to the end of the string and clear matching data.
+ */
static VALUE
strscan_terminate(self)
VALUE self;
@@ -247,6 +268,9 @@ strscan_terminate(self)
return self;
}
+/*
+ * Returns the string being scanned.
+ */
static VALUE
strscan_clear(self)
VALUE self;
@@ -265,6 +289,12 @@ strscan_get_string(self)
return p->str;
}
+/*
+ * call-seq: string=(str)
+ *
+ * Changes the string being scanned to +str+ and resets the scanner.
+ * Returns +str+.
+ */
static VALUE
strscan_set_string(self, str)
VALUE self, str;
@@ -280,6 +310,20 @@ strscan_set_string(self, str)
return str;
}
+/*
+ * call-seq:
+ * concat(str)
+ * <<(str)
+ *
+ * Appends +str+ to the string being scanned.
+ * This method does not affect scan pointer.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.scan(/Fri /)
+ * s << " +1000 GMT"
+ * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
+ * s.scan(/Dec/) # -> "Dec"
+ */
static VALUE
strscan_concat(self, str)
VALUE self, str;
@@ -292,6 +336,20 @@ strscan_concat(self, str)
return self;
}
+/*
+ * Returns the position of the scan pointer. In the 'reset' position, this
+ * value is zero. In the 'terminated' position (i.e. the string is exhausted),
+ * this value is the length of the string.
+ *
+ * In short, it's a 1-based index into the string.
+ *
+ * s = StringScanner.new('test string')
+ * s.pos # -> 0
+ * s.scan_until /str/ # -> "test str"
+ * s.pos # -> 8
+ * s.terminate # -> #<StringScanner fin>
+ * s.pos # -> 11
+ */
static VALUE
strscan_get_pos(self)
VALUE self;
@@ -302,6 +360,15 @@ strscan_get_pos(self)
return INT2FIX(p->curr);
}
+/*
+ * call-seq: pos=(n)
+ *
+ * Modify the scan pointer.
+ *
+ * s = StringScanner.new('test string')
+ * s.pos = 7 # -> 7
+ * s.rest # -> "ring"
+ */
static VALUE
strscan_set_pos(self, v)
VALUE self, v;
@@ -371,6 +438,27 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
}
}
+/*
+ * call-seq:
+ * scanner.scan(pattern) => String
+ *
+ */
+
+/*
+ * call-seq: scan(pattern)
+ *
+ * Tries to match with +pattern+ at the current position. If there's a match,
+ * the scanner advances the "scan pointer" and returns the matched string.
+ * Otherwise, the scanner returns +nil+.
+ *
+ * s = StringScanner.new('test string')
+ * p s.scan(/\w+/) # -> "test"
+ * p s.scan(/\w+/) # -> nil
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "string"
+ * p s.scan(/./) # -> nil
+ *
+ */
static VALUE
strscan_scan(self, re)
VALUE self, re;
@@ -378,6 +466,17 @@ strscan_scan(self, re)
return strscan_do_scan(self, re, 1, 1, 1);
}
+/*
+ * call-seq: match?(pattern)
+ *
+ * Tests whether the given +pattern+ is matched from the current scan pointer.
+ * Returns the length of the match, or +nil+. The scan pointer is not advanced.
+ *
+ * s = StringScanner.new('test string')
+ * p s.match?(/\w+/) # -> 4
+ * p s.match?(/\w+/) # -> 4
+ * p s.match?(/\s+/) # -> nil
+ */
static VALUE
strscan_match_p(self, re)
VALUE self, re;
@@ -385,6 +484,23 @@ strscan_match_p(self, re)
return strscan_do_scan(self, re, 0, 0, 1);
}
+/*
+ * call-seq: skip(pattern)
+ *
+ * Attempts to skip over the given +pattern+ beginning with the scan pointer.
+ * If it matches, the scan pointer is advanced to the end of the match, and the
+ * length of the match is returned. Otherwise, +nil+ is returned.
+ *
+ * It's similar to #scan, but without returning the matched string.
+ *
+ * s = StringScanner.new('test string')
+ * p s.skip(/\w+/) # -> 4
+ * p s.skip(/\w+/) # -> nil
+ * p s.skip(/\s+/) # -> 1
+ * p s.skip(/\w+/) # -> 6
+ * p s.skip(/./) # -> nil
+ *
+ */
static VALUE
strscan_skip(self, re)
VALUE self, re;
@@ -392,6 +508,21 @@ strscan_skip(self, re)
return strscan_do_scan(self, re, 1, 0, 1);
}
+/*
+ * call-seq: check(pattern)
+ *
+ * This returns the value that #scan would return, without advancing the scan
+ * pointer. The match register is affected, though.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.check /Fri/ # -> "Fri"
+ * s.pos # -> 0
+ * s.matched # -> "Fri"
+ * s.check /12/ # -> nil
+ * s.matched # -> nil
+ *
+ * Mnemonic: it "checks" to see whether a #scan will return a value.
+ */
static VALUE
strscan_check(self, re)
VALUE self, re;
@@ -399,6 +530,16 @@ strscan_check(self, re)
return strscan_do_scan(self, re, 0, 1, 1);
}
+/*
+ * call-seq: scan_full(pattern, return_string_p, advance_pointer_p)
+ *
+ * Tests whether the given +pattern+ is matched from the current scan pointer.
+ * Returns the matched string if +return_string_p+ is true.
+ * Advances the scan pointer if +advance_pointer_p+ is true.
+ * The match register is affected.
+ *
+ * "full" means "#scan with full parameters".
+ */
static VALUE
strscan_scan_full(self, re, s, f)
VALUE self, re, s, f;
@@ -407,6 +548,18 @@ strscan_scan_full(self, re, s, f)
}
+/*
+ * call-seq: scan_until(pattern)
+ *
+ * Scans the string _until_ the +pattern+ is matched. Returns the substring up
+ * to and including the end of the match, advancing the scan pointer to that
+ * location. If there is no match, +nil+ is returned.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.scan_until(/1/) # -> "Fri Dec 1"
+ * s.pre_match # -> "Fri Dec "
+ * s.scan_until(/XYZ/) # -> nil
+ */
static VALUE
strscan_scan_until(self, re)
VALUE self, re;
@@ -414,6 +567,19 @@ strscan_scan_until(self, re)
return strscan_do_scan(self, re, 1, 1, 0);
}
+/*
+ * call-seq: exist?(pattern)
+ *
+ * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
+ * without advancing the scan pointer. This predicates whether a #scan_until
+ * will return a value.
+ *
+ * s = StringScanner.new('test string')
+ * s.exist? /s/ # -> 3
+ * s.scan /test/ # -> "test"
+ * s.exist? /s/ # -> 6
+ * s.exist? /e/ # -> nil
+ */
static VALUE
strscan_exist_p(self, re)
VALUE self, re;
@@ -421,6 +587,22 @@ strscan_exist_p(self, re)
return strscan_do_scan(self, re, 0, 0, 0);
}
+/*
+ * call-seq: skip_until(pattern)
+ *
+ * Advances the scan pointer until +pattern+ is matched and consumed. Returns
+ * the number of bytes advanced, or +nil+ if no match was found.
+ *
+ * Look ahead to match +pattern+, and advance the scan pointer to the _end_
+ * of the match. Return the number of characters advanced, or +nil+ if the
+ * match was unsuccessful.
+ *
+ * It's similar to #scan_until, but without returning the intervening string.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.skip_until /12/ # -> 10
+ * s #
+ */
static VALUE
strscan_skip_until(self, re)
VALUE self, re;
@@ -428,6 +610,19 @@ strscan_skip_until(self, re)
return strscan_do_scan(self, re, 1, 0, 0);
}
+/*
+ * call-seq: check_until(pattern)
+ *
+ * This returns the value that #scan_until would return, without advancing the
+ * scan pointer. The match register is affected, though.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.check_until /12/ # -> "Fri Dec 12"
+ * s.pos # -> 0
+ * s.matched # -> 12
+ *
+ * Mnemonic: it "checks" to see whether a #scan_until will return a value.
+ */
static VALUE
strscan_check_until(self, re)
VALUE self, re;
@@ -435,6 +630,15 @@ strscan_check_until(self, re)
return strscan_do_scan(self, re, 0, 1, 0);
}
+/*
+ * call-seq: search_full(pattern, return_string_p, advance_pointer_p)
+ *
+ * Scans the string _until_ the +pattern+ is matched.
+ * Returns the matched string if +return_string_p+ is true, otherwise
+ * returns the number of bytes advanced.
+ * Advances the scan pointer if +advance_pointer_p+, otherwise not.
+ * This method does affect the match register.
+ */
static VALUE
strscan_search_full(self, re, s, f)
VALUE self, re, s, f;
@@ -457,6 +661,14 @@ adjust_registers_to_matched(p)
p->regs.end[0] = p->curr - p->prev;
}
+/*
+ * Scans one character and returns it.
+ *
+ * s = StringScanner.new('ab')
+ * s.getch # => "a"
+ * s.getch # => "b"
+ * s.getch # => nil
+ */
static VALUE
strscan_getch(self)
VALUE self;
@@ -480,6 +692,14 @@ strscan_getch(self)
p->prev + p->regs.end[0]);
}
+/*
+ * Scans one byte and returns it. Similar to, but not the same as, #getch.
+ *
+ * s = StringScanner.new('ab')
+ * s.get_byte # => "a"
+ * s.get_byte # => "b"
+ * s.get_byte # => nil
+ */
static VALUE
strscan_get_byte(self)
VALUE self;
@@ -507,7 +727,17 @@ strscan_getbyte(self)
return strscan_get_byte(self);
}
-
+/*
+ * call-seq: peek(len)
+ *
+ * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
+ * advancing the scan pointer.
+ *
+ * s = StringScanner.new('test string')
+ * s.peek(7) # => "test st"
+ * s.peek(7) # => "test st"
+ *
+ */
static VALUE
strscan_peek(self, vlen)
VALUE self, vlen;
@@ -526,6 +756,17 @@ strscan_peek(self, vlen)
return extract_beg_len(p, p->curr, len);
}
+/*
+ * Set the scan pointer to the previous position. Only one previous position is
+ * remembered, and it changes with each scanning operation.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # => "test"
+ * s.unscan
+ * s.scan(/../) # => "te"
+ * s.scan(/\d/) # => nil
+ * s.unscan # ScanError: cannot unscan: prev match had failed
+ */
static VALUE
strscan_peep(self, vlen)
VALUE self, vlen;
@@ -549,6 +790,18 @@ strscan_unscan(self)
return self;
}
+/*
+ * Returns +true+ iff the scan pointer is at the beginning of the line.
+ *
+ * s = StringScanner.new("test\ntest\n")
+ * s.bol? # => true
+ * s.scan(/te/)
+ * s.bol? # => false
+ * s.scan(/st\n/)
+ * s.bol? # => true
+ * s.terminate
+ * s.bol? # => true
+ */
static VALUE
strscan_bol_p(self)
VALUE self;
@@ -561,6 +814,16 @@ strscan_bol_p(self)
return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
}
+/*
+ * Returns +true+ if the scan pointer is at the end of the string.
+ *
+ * s = StringScanner.new('test string')
+ * p s.eos? # => false
+ * s.scan(/test/)
+ * p s.eos? # => false
+ * s.terminate
+ * p s.eos? # => true
+ */
static VALUE
strscan_eos_p(self)
VALUE self;
@@ -574,14 +837,26 @@ strscan_eos_p(self)
return Qfalse;
}
+/*
+ * Equivalent to #eos?.
+ * This method is obsolete, use #eos? instead.
+ */
static VALUE
strscan_empty_p(self)
VALUE self;
{
- rb_warning("StringScanner#empty_p is obsolete; use #eos? instead");
+ rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
return strscan_eos_p(self);
}
+/*
+ * Returns true iff there is more data in the string. See #eos?.
+ * This method is obsolete; use #eos? instead.
+ *
+ * s = StringScanner.new('test string')
+ * s.eos? # These two
+ * s.rest? # are opposites.
+ */
static VALUE
strscan_rest_p(self)
VALUE self;
@@ -595,7 +870,15 @@ strscan_rest_p(self)
return Qtrue;
}
-
+/*
+ * Returns +true+ iff the last match was successful.
+ *
+ * s = StringScanner.new('test string')
+ * s.match?(/\w+/) # => 4
+ * s.matched? # => true
+ * s.match?(/\d+/) # => nil
+ * s.matched? # => false
+ */
static VALUE
strscan_matched_p(self)
VALUE self;
@@ -609,6 +892,13 @@ strscan_matched_p(self)
return Qfalse;
}
+/*
+ * Returns the last matched string.
+ *
+ * s = StringScanner.new('test string')
+ * s.match?(/\w+/) # -> 4
+ * s.matched # -> "test"
+ */
static VALUE
strscan_matched(self)
VALUE self;
@@ -622,6 +912,16 @@ strscan_matched(self)
p->prev + p->regs.end[0]);
}
+/*
+ * Returns the size of the most recent match (see #matched), or +nil+ if there
+ * was no recent match.
+ *
+ * s = StringScanner.new('test string')
+ * s.check /\w+/ # -> "test"
+ * s.matched_size # -> 4
+ * s.check /\d+/ # -> nil
+ * s.matched_size # -> nil
+ */
static VALUE
strscan_matched_size(self)
VALUE self;
@@ -634,6 +934,20 @@ strscan_matched_size(self)
return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
}
+/*
+ * call-seq: [](n)
+ *
+ * Return the n-th subgroup in the most recent match.
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
+ * s[0] # -> "Fri Dec 12 "
+ * s[1] # -> "Fri"
+ * s[2] # -> "Dec"
+ * s[3] # -> "12"
+ * s.post_match # -> "1975 14:39"
+ * s.pre_match # -> ""
+ */
static VALUE
strscan_aref(self, idx)
VALUE self, idx;
@@ -655,6 +969,15 @@ strscan_aref(self, idx)
p->prev + p->regs.end[i]);
}
+/*
+ * Return the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # -> "test"
+ * s.scan(/\s+/) # -> " "
+ * s.pre_match # -> "test"
+ * s.post_match # -> "string"
+ */
static VALUE
strscan_pre_match(self)
VALUE self;
@@ -667,6 +990,15 @@ strscan_pre_match(self)
return extract_range(p, 0, p->prev + p->regs.beg[0]);
}
+/*
+ * Return the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
+ *
+ * s = StringScanner.new('test string')
+ * s.scan(/\w+/) # -> "test"
+ * s.scan(/\s+/) # -> " "
+ * s.pre_match # -> "test"
+ * s.post_match # -> "string"
+ */
static VALUE
strscan_post_match(self)
VALUE self;
@@ -679,7 +1011,10 @@ strscan_post_match(self)
return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
}
-
+/*
+ * Returns the "rest" of the string (i.e. everything after the scan pointer).
+ * If there is no more data, it returns <tt>""</tt>.
+ */
static VALUE
strscan_rest(self)
VALUE self;
@@ -693,6 +1028,9 @@ strscan_rest(self)
return extract_range(p, p->curr, S_LEN(p));
}
+/*
+ * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
+ */
static VALUE
strscan_rest_size(self)
VALUE self;
@@ -709,10 +1047,20 @@ strscan_rest_size(self)
return INT2FIX(i);
}
-
#define INSPECT_LENGTH 5
#define BUFSIZE 256
+/*
+ * Returns a string that represents the StringScanner object, showing:
+ * - the current position
+ * - the size of the string
+ * - the characters surrounding the scan pointer
+ *
+ * s = StringScanner.new("Fri Dec 12 1975 14:39")
+ * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
+ * s.scan_until /12/ # -> "Fri Dec 12"
+ * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
+ */
static VALUE
strscan_inspect(self)
VALUE self;
@@ -796,6 +1144,104 @@ inspect2(p)
Ruby Interface
======================================================================= */
+/*
+ * StringScanner provides for lexical scanning operations on a String. Here is
+ * an example of its usage:
+ *
+ * s = StringScanner.new('This is an example string')
+ * s.eos? # -> false
+ *
+ * p s.scan(/\w+/) # -> "This"
+ * p s.scan(/\w+/) # -> nil
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\s+/) # -> nil
+ * p s.scan(/\w+/) # -> "is"
+ * s.eos? # -> false
+ *
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "an"
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "example"
+ * p s.scan(/\s+/) # -> " "
+ * p s.scan(/\w+/) # -> "string"
+ * s.eos? # -> true
+ *
+ * p s.scan(/\s+/) # -> nil
+ * p s.scan(/\w+/) # -> nil
+ *
+ * Scanning a string means remembering the position of a <i>scan pointer</i>,
+ * which is just an index. The scan pointer effectively points _between_
+ * characters. (XXX: get this right - is it between or not?)
+ *
+ * Given the string "test string", here are the pertinent scan pointer
+ * positions:
+ *
+ * t e s t s t r i n g
+ * 0 1 2 ... 1
+ * 0
+ *
+ * When you #scan for a pattern (a regular expression), the match must occur
+ * at the character after the scan pointer. If you use #scan_until, then the
+ * match can occur anywhere after the scan pointer. In both cases, the scan
+ * pointer moves <i>just beyond</i> the last character of the match, ready to
+ * scan again from the next character onwards. This is demonstrated by the
+ * example above.
+ *
+ * == Method Categories
+ *
+ * There are other methods besides the plain scanners. You can look ahead in
+ * the string without actually scanning. You can access the most recent match.
+ * You can modify the string being scanned, reset or terminate the scanner,
+ * find out or change the position of the scan pointer, skip ahead, and so on.
+ *
+ * === Advancing the Scan Pointer
+ *
+ * getch
+ * getbyte
+ * scan
+ * scan_until
+ * skip
+ * skip_until
+ *
+ * === Looking Ahead
+ *
+ * check
+ * check_until
+ * exist?
+ * match?
+ * peek
+ *
+ * === Finding Where we Are
+ *
+ * bol?
+ * eos?
+ * pos
+ *
+ * === Setting Where we Are
+ *
+ * reset
+ * terminate
+ * pos=
+ *
+ * === Match Data
+ *
+ * matched
+ * matched?
+ * matched_size
+ * []
+ * pre_match
+ * post_match
+ *
+ * === Miscellaneous
+ *
+ * <<
+ * concat
+ * string
+ * string=
+ * unscan
+ *
+ * There are aliases to several of the methods.
+ */
void
Init_strscan()
{