summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraamine <aamine@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-12-08 04:03:11 +0000
committeraamine <aamine@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-12-08 04:03:11 +0000
commit33565505819b313857f55a1f0b6309719e8799b3 (patch)
treea4d0ee41e4a573eae7c72738aaf5abd41aa6f635
parent62848326c88c7784b493a09f58798760971975d2 (diff)
* lib/uri/common.rb: new method URI.regexp. [ruby-dev:22121]
* test/uri/test_common.rb: add test for URI.regexp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5136 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--lib/uri/common.rb52
2 files changed, 41 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index 9c41c6f400..33bb1489af 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Mon Dec 8 13:02:11 2003 Minero Aoki <aamine@loveruby.net>
+
+ * lib/uri/common.rb: new method URI.regexp. [ruby-dev:22121]
+
+ * test/uri/test_common.rb: add test for URI.regexp.
+
Mon Dec 8 12:44:14 2003 Yukihiro Matsumoto <matz@ruby-lang.org>
* pack.c: define swap16 and swap32 only if they are not
diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index 5d6a3b5519..fbb3558a32 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -396,28 +396,46 @@ module URI
--- URI::extract(str[, schemes])
=end
- def self.extract(str, schemes = [])
- urls = []
- regexp = ABS_URI_REF
- unless schemes.empty?
- regexp = Regexp.new('(?=' + schemes.collect{|s|
- Regexp.quote(s + ':')
- }.join('|') + ')' + PATTERN::X_ABS_URI,
- Regexp::EXTENDED, 'N')
+ def self.extract(str, schemes = nil, &block)
+ if block_given?
+ str.scan(regexp(schemes)) { yield $& }
+ nil
+ else
+ result = []
+ str.scan(regexp(schemes)) { result.push $& }
+ result
end
+ end
- str.scan(regexp) {
- if block_given?
- yield($&)
- else
- urls << $&
+=begin
+
+--- URI::regexp([match_schemes])
+
+ Returns a Regexp object which matches to URI-like strings.
+ If MATCH_SCHEMES given, resulting regexp matches to URIs
+ whose scheme is one of the MATCH_SCHEMES.
+
+ The Regexp object returned by this method includes arbitrary
+ number of capture group (parentheses). Never rely on its
+ number.
+
+ # extract first URI from html_string
+ html_string.slice(URI.regexp)
+
+ # remove ftp URIs
+ html_string.sub(URI.regexp(['ftp'])
+
+ # You should not rely on the number of parentheses
+ html_string.scan(URI.regexp) do |*matches|
+ p $&
end
- }
- if block_given?
- return nil
+=end
+ def self.regexp(schemes = nil)
+ unless schemes
+ ABS_URI_REF
else
- return urls
+ /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
end
end