summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraamine <aamine@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-12-16 15:18:11 +0000
committeraamine <aamine@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-12-16 15:18:11 +0000
commit6ef31af2d178fb4916150d317ceafe156fccdf65 (patch)
tree5e253ca9cb7a3116e5f501d0cd55bffa367980ee
parentff9f067f8977c64f1a09e676a28ae1043b932a51 (diff)
introduce some new methods
* ext/strscan/strscan.c: new method StringScanner#beginning_of_line? (alias #bol?) * ext/strscan/strscan.c: new method StringScanner#concat and #<<. * ext/strscan/strscan.c: StringScanner#new(str) does not duplicate nor freeze STR (allow destructive modification). * test/strscan/test_stringscanner.rb: test new methods above. * test/strscan/test_stringscanner.rb: test destructive string modification. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5201 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog15
-rw-r--r--ext/strscan/strscan.c106
-rw-r--r--test/strscan/test_stringscanner.rb93
3 files changed, 173 insertions, 41 deletions
diff --git a/ChangeLog b/ChangeLog
index 332f15a947..f75b27aaa3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+Wed Dec 17 00:16:14 2003 Minero Aoki <aamine@loveruby.net>
+
+ * ext/strscan/strscan.c: new method
+ StringScanner#beginning_of_line? (alias #bol?)
+
+ * ext/strscan/strscan.c: new method StringScanner#concat and #<<.
+
+ * ext/strscan/strscan.c: StringScanner#new(str) does not duplicate
+ nor freeze STR (allow destructive modification).
+
+ * test/strscan/test_stringscanner.rb: test new methods above.
+
+ * test/strscan/test_stringscanner.rb: test destructive string
+ modification.
+
Tue Dec 16 21:20:47 2003 Tanaka Akira <akr@m17n.org>
* lib/pp.rb: don't use local variable `pp'.
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 7169c2e163..590f729e9f 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -52,10 +52,10 @@ struct strscanner
#define MATCHED(s) (s)->flags |= FLAG_MATCHED
#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
-#define S_PTR(s) (RSTRING((s)->str)->ptr)
+#define S_PBEG(s) (RSTRING((s)->str)->ptr)
#define S_LEN(s) (RSTRING((s)->str)->len)
-#define S_END(s) (S_PTR(s) + S_LEN(s))
-#define CURPTR(s) (S_PTR(s) + (s)->curr)
+#define S_PEND(s) (S_PBEG(s) + S_LEN(s))
+#define CURPTR(s) (S_PBEG(s) + (s)->curr)
#define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
#define EOS_P(s) ((s)->curr >= RSTRING(p->str)->len)
@@ -82,6 +82,7 @@ static VALUE strscan_s_mustc _((VALUE self));
static VALUE strscan_terminate _((VALUE self));
static VALUE strscan_get_string _((VALUE self));
static VALUE strscan_set_string _((VALUE self, VALUE str));
+static VALUE strscan_concat _((VALUE self, VALUE str));
static VALUE strscan_get_pos _((VALUE self));
static VALUE strscan_set_pos _((VALUE self, VALUE pos));
static VALUE strscan_do_scan _((VALUE self, VALUE regex,
@@ -102,6 +103,7 @@ static VALUE strscan_getch _((VALUE self));
static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_unscan _((VALUE self));
+static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
static VALUE strscan_rest_p _((VALUE self));
static VALUE strscan_matched_p _((VALUE self));
@@ -114,8 +116,8 @@ static VALUE strscan_rest _((VALUE self));
static VALUE strscan_rest_size _((VALUE self));
static VALUE strscan_inspect _((VALUE self));
-static char* inspect_before _((struct strscanner *p, char *buf));
-static char* inspect_after _((struct strscanner *p, char *buf));
+static VALUE inspect1 _((struct strscanner *p));
+static VALUE inspect2 _((struct strscanner *p));
/* =======================================================================
Utils
@@ -135,7 +137,10 @@ extract_range(p, beg_i, end_i)
struct strscanner *p;
long beg_i, end_i;
{
- return infect(rb_str_new(S_PTR(p) + beg_i, end_i - beg_i), p);
+ if (beg_i > S_LEN(p)) return Qnil;
+ if (end_i > S_LEN(p))
+ end_i = S_LEN(p);
+ return infect(rb_str_new(S_PBEG(p) + beg_i, end_i - beg_i), p);
}
static VALUE
@@ -143,7 +148,10 @@ extract_beg_len(p, beg_i, len)
struct strscanner *p;
long beg_i, len;
{
- return infect(rb_str_new(S_PTR(p) + beg_i, len), p);
+ if (beg_i > S_LEN(p)) return Qnil;
+ if (beg_i + len > S_LEN(p))
+ len = S_LEN(p) - beg_i;
+ return infect(rb_str_new(S_PBEG(p) + beg_i, len), p);
}
@@ -192,11 +200,9 @@ strscan_initialize(argc, argv, self)
VALUE str, need_dup;
Data_Get_Struct(self, struct strscanner, p);
- if (rb_scan_args(argc, argv, "11", &str, &need_dup) == 1)
- need_dup = Qtrue;
+ rb_scan_args(argc, argv, "11", &str, &need_dup);
StringValue(str);
- p->str = RTEST(need_dup) ? rb_str_dup(str) : str;
- rb_obj_freeze(p->str);
+ p->str = str;
return self;
}
@@ -263,6 +269,18 @@ strscan_set_string(self, str)
}
static VALUE
+strscan_concat(self, str)
+ VALUE self, str;
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ StringValue(str);
+ rb_str_append(p->str, str);
+ return self;
+}
+
+static VALUE
strscan_get_pos(self)
VALUE self;
{
@@ -304,6 +322,9 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
+ if (EOS_P(p)) {
+ return Qnil;
+ }
strscan_prepare_re(regex);
if (headonly) {
ret = re_match(RREGEXP(regex)->ptr,
@@ -485,7 +506,6 @@ strscan_peek(self, vlen)
return extract_beg_len(p, p->curr, len);
}
-
static VALUE
strscan_unscan(self)
VALUE self;
@@ -501,6 +521,17 @@ strscan_unscan(self)
return self;
}
+static VALUE
+strscan_bol_p(self)
+ VALUE self;
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ if (CURPTR(p) > S_PEND(p)) return Qnil;
+ if (p->curr == 0) return Qtrue;
+ return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
+}
static VALUE
strscan_eos_p(self)
@@ -652,9 +683,9 @@ strscan_inspect(self)
{
struct strscanner *p;
char buf[BUFSIZE];
- char buf_before[16];
- char buf_after[16];
long len;
+ VALUE result;
+ VALUE a, b;
Data_Get_Struct(self, struct strscanner, p);
if (NIL_P(p->str)) {
@@ -667,24 +698,33 @@ strscan_inspect(self)
rb_class2name(CLASS_OF(self)));
return infect(rb_str_new(buf, len), p);
}
- len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s@%s>",
+ if (p->curr == 0) {
+ b = inspect2(p);
+ len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld @ %s>",
+ rb_class2name(CLASS_OF(self)),
+ p->curr, S_LEN(p),
+ RSTRING(b)->ptr);
+ return infect(rb_str_new(buf, len), p);
+ }
+ a = inspect1(p);
+ b = inspect2(p);
+ len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s @ %s>",
rb_class2name(CLASS_OF(self)),
p->curr, S_LEN(p),
- inspect_before(p, buf_before),
- inspect_after(p, buf_after));
+ RSTRING(a)->ptr,
+ RSTRING(b)->ptr);
return infect(rb_str_new(buf, len), p);
}
-static char*
-inspect_before(p, buf)
+static VALUE
+inspect1(p)
struct strscanner *p;
- char *buf;
{
+ char buf[BUFSIZE];
char *bp = buf;
long len;
- if (p->curr == 0) return "";
- *bp++ = '"';
+ if (p->curr == 0) return rb_str_new2("");
if (p->curr > INSPECT_LENGTH) {
strcpy(bp, "..."); bp += 3;
len = INSPECT_LENGTH;
@@ -693,22 +733,18 @@ inspect_before(p, buf)
len = p->curr;
}
memcpy(bp, CURPTR(p) - len, len); bp += len;
- *bp++ = '"';
- *bp++ = ' ';
- *bp++ = '\0';
- return buf;
+ return rb_str_dump(rb_str_new(buf, bp - buf));
}
-static char*
-inspect_after(p, buf)
+static VALUE
+inspect2(p)
struct strscanner *p;
- char *buf;
{
+ char buf[BUFSIZE];
char *bp = buf;
long len;
- *bp++ = ' ';
- *bp++ = '"';
+ if (EOS_P(p)) return rb_str_new2("");
len = S_LEN(p) - p->curr;
if (len > INSPECT_LENGTH) {
len = INSPECT_LENGTH;
@@ -718,9 +754,7 @@ inspect_after(p, buf)
else {
memcpy(bp, CURPTR(p), len); bp += len;
}
- *bp++ = '"';
- *bp++ = '\0';
- return buf;
+ return rb_str_dump(rb_str_new(buf, bp - buf));
}
/* =======================================================================
@@ -756,6 +790,8 @@ Init_strscan()
rb_define_method(StringScanner, "clear", strscan_terminate, 0);
rb_define_method(StringScanner, "string", strscan_get_string, 0);
rb_define_method(StringScanner, "string=", strscan_set_string, 1);
+ rb_define_method(StringScanner, "concat", strscan_concat, 1);
+ rb_define_method(StringScanner, "<<", strscan_concat, 1);
rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
@@ -781,6 +817,8 @@ Init_strscan()
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
+ rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
+ rb_define_method(StringScanner, "bol?", strscan_bol_p, 0);
rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
rb_define_method(StringScanner, "empty?", strscan_eos_p, 0);
rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb
index 2bc37bc544..b9f7f7ddae 100644
--- a/test/strscan/test_stringscanner.rb
+++ b/test/strscan/test_stringscanner.rb
@@ -5,14 +5,11 @@
require 'strscan'
require 'test/unit'
-
class TestStringScanner < Test::Unit::TestCase
-
def test_s_new
s = StringScanner.new('test string')
assert_instance_of StringScanner, s
assert_equal false, s.eos?
- assert_equal true, s.string.frozen?
assert_equal false, s.tainted?
str = 'test string'
@@ -21,7 +18,6 @@ class TestStringScanner < Test::Unit::TestCase
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_same str, s.string
- assert_equal true, s.string.frozen?
assert_equal true, s.string.tainted?
str = 'test string'
@@ -30,8 +26,8 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal true, s.string.tainted?
end
-if VERSION >= '1.7.0'
UNINIT_ERROR = ArgumentError
+
def test_s_allocate
s = StringScanner.allocate
assert_equal '#<StringScanner (uninitialized)>', s.inspect.sub(/StringScanner_C/, 'StringScanner')
@@ -42,7 +38,6 @@ if VERSION >= '1.7.0'
assert_nothing_raised(UNINIT_ERROR) { s.eos? }
assert_equal false, s.eos?
end
-end
def test_s_mustc
assert_nothing_raised(NotImplementedError) {
@@ -70,6 +65,9 @@ end
s.get_byte
assert_equal '#<StringScanner 1/11 "t" @ "est s...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
assert_equal true, s.inspect.tainted?
+
+ s = StringScanner.new("\n")
+ assert_equal '#<StringScanner 0/1 @ "\n">', s.inspect
end
def test_eos?
@@ -85,6 +83,51 @@ end
assert_equal true, s.eos?
s.scan(/\w+/)
assert_equal true, s.eos?
+
+ s = StringScanner.new('test')
+ s.scan(/te/)
+ s.string.replace ''
+ assert_equal true, s.eos?
+ end
+
+ def test_bol?
+ s = StringScanner.new("a\nbbb\n\ncccc\nddd\r\neee")
+ assert_equal true, s.bol?
+ assert_equal true, s.bol?
+ s.scan(/a/)
+ assert_equal false, s.bol?
+ assert_equal false, s.bol?
+ s.scan(/\n/)
+ assert_equal true, s.bol?
+ s.scan(/b/)
+ assert_equal false, s.bol?
+ s.scan(/b/)
+ assert_equal false, s.bol?
+ s.scan(/b/)
+ assert_equal false, s.bol?
+ s.scan(/\n/)
+ assert_equal true, s.bol?
+ s.unscan
+ assert_equal false, s.bol?
+ s.scan(/\n/)
+ s.scan(/\n/)
+ assert_equal true, s.bol?
+ s.scan(/c+\n/)
+ assert_equal true, s.bol?
+ s.scan(/d+\r\n/)
+ assert_equal true, s.bol?
+ s.scan(/e+/)
+ assert_equal false, s.bol?
+ end
+
+ def test_string
+ s = StringScanner.new('test')
+ assert_equal 'test', s.string
+ s.string = 'a'
+ assert_equal 'a', s.string
+ s.scan(/a/)
+ s.string = 'b'
+ assert_equal 0, s.pos
end
def test_pos
@@ -98,6 +141,19 @@ end
assert_equal 11, s.pos
end
+ def test_concat
+ s = StringScanner.new('a')
+ s.scan(/a/)
+ s.concat 'b'
+ assert_equal false, s.eos?
+ assert_equal 'b', s.scan(/b/)
+ assert_equal true, s.eos?
+ s.concat 'c'
+ assert_equal false, s.eos?
+ assert_equal 'c', s.scan(/c/)
+ assert_equal true, s.eos?
+ end
+
def test_scan
s = StringScanner.new('stra strb strc', true)
tmp = s.scan(/\w+/)
@@ -139,6 +195,15 @@ end
assert_nil s.scan(/\w+/)
assert_nil s.scan(/\w+/)
+
+ s = StringScanner.new('test')
+ s.scan(/te/)
+ # This assumes #string does not duplicate string,
+ # but it is implementation specific issue.
+ # DO NOT RELY ON THIS FEATURE.
+ s.string.replace ''
+ # unspecified: assert_equal 2, s.pos
+ assert_equal nil, s.scan(/test/)
end
def test_skip
@@ -151,6 +216,11 @@ end
assert_nil s.skip(/\w+/)
assert_nil s.skip(/\s+/)
assert_equal true, s.eos?
+
+ s = StringScanner.new('test')
+ s.scan(/te/)
+ s.string.replace ''
+ assert_equal nil, s.skip(/./)
end
def test_getch
@@ -175,6 +245,11 @@ end
assert_equal "\244\242", s.getch
assert_nil s.getch
$KCODE = 'NONE'
+
+ s = StringScanner.new('test')
+ s.scan(/te/)
+ s.string.replace ''
+ assert_equal nil, s.getch
end
def test_get_byte
@@ -201,6 +276,11 @@ end
assert_equal "\242", s.get_byte
assert_nil s.get_byte
$KCODE = 'NONE'
+
+ s = StringScanner.new('test')
+ s.scan(/te/)
+ s.string.replace ''
+ assert_equal nil, s.get_byte
end
def test_matched
@@ -392,5 +472,4 @@ end
s.reset
assert_equal 0, s.pos
end
-
end