summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHiroshi SHIBATA <hsbt@ruby-lang.org>2023-02-07 16:22:16 +0900
committernagachika <nagachika@ruby-lang.org>2023-03-26 14:22:38 +0900
commitda27583cf364c0d69c085db4abf358c334a8eca1 (patch)
treebb2e5ff28214241b49e63c9f2dd28db0965dc257 /lib
parent19af12ff195aba64bdca7a83f564f2c0e46061c0 (diff)
Merge URI-0.12.0
Diffstat (limited to 'lib')
-rw-r--r--lib/uri.rb1
-rw-r--r--lib/uri/common.rb52
-rw-r--r--lib/uri/file.rb6
-rw-r--r--lib/uri/generic.rb14
-rw-r--r--lib/uri/mailto.rb2
-rw-r--r--lib/uri/rfc3986_parser.rb5
-rw-r--r--lib/uri/version.rb2
7 files changed, 64 insertions, 18 deletions
diff --git a/lib/uri.rb b/lib/uri.rb
index 394c156ac5..59a7c4ad28 100644
--- a/lib/uri.rb
+++ b/lib/uri.rb
@@ -101,3 +101,4 @@ require_relative 'uri/ldap'
require_relative 'uri/ldaps'
require_relative 'uri/mailto'
require_relative 'uri/ws'
+require_relative 'uri/wss'
diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index 26b179add2..ca38bec7ec 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -13,6 +13,8 @@ require_relative "rfc2396_parser"
require_relative "rfc3986_parser"
module URI
+ include RFC2396_REGEXP
+
REGEXP = RFC2396_REGEXP
Parser = RFC2396_Parser
RFC3986_PARSER = RFC3986_Parser.new
@@ -62,14 +64,17 @@ module URI
module_function :make_components_hash
end
- include REGEXP
-
module Schemes
end
private_constant :Schemes
+ #
+ # Register the given +klass+ to be instantiated when parsing URLs with the given +scheme+.
+ # Note that currently only schemes which after .upcase are valid constant names
+ # can be registered (no -/+/. allowed).
+ #
def self.register_scheme(scheme, klass)
- Schemes.const_set(scheme, klass)
+ Schemes.const_set(scheme.to_s.upcase, klass)
end
# Returns a Hash of the defined schemes.
@@ -295,6 +300,7 @@ module URI
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
+ TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
@@ -320,6 +326,33 @@ module URI
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded form data.
+ #
+ # This decodes + to SP.
+ #
+ # See URI.encode_www_form_component, URI.decode_www_form.
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/\+|%\h\h/, str, enc)
+ end
+
+ # Encodes +str+ using URL encoding
+ #
+ # This encodes SP to %20 instead of +.
+ def self.encode_uri_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded data.
+ #
+ # This does not decode + to SP.
+ def self.decode_uri_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/%\h\h/, str, enc)
+ end
+
+ def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
@@ -328,19 +361,16 @@ module URI
end
str.force_encoding(Encoding::ASCII_8BIT)
end
- str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
+ str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
+ private_class_method :_encode_uri_component
- # Decodes given +str+ of URL-encoded form data.
- #
- # This decodes + to SP.
- #
- # See URI.encode_www_form_component, URI.decode_www_form.
- def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
- str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+ str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
+ private_class_method :_decode_uri_component
# Generates URL-encoded form data from given +enum+.
#
diff --git a/lib/uri/file.rb b/lib/uri/file.rb
index 7671ad6470..4ff0bc097e 100644
--- a/lib/uri/file.rb
+++ b/lib/uri/file.rb
@@ -33,6 +33,9 @@ module URI
# If an Array is used, the components must be passed in the
# order <code>[host, path]</code>.
#
+ # A path from e.g. the File class should be escaped before
+ # being passed.
+ #
# Examples:
#
# require 'uri'
@@ -44,6 +47,9 @@ module URI
# :path => '/ruby/src'})
# uri2.to_s # => "file://host.example.com/ruby/src"
#
+ # uri3 = URI::File.build({:path => URI::escape('/path/my file.txt')})
+ # uri3.to_s # => "file:///path/my%20file.txt"
+ #
def self.build(args)
tmp = Util::make_components_hash(self, args)
super(tmp)
diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb
index cfa0de6b74..69698c4e2d 100644
--- a/lib/uri/generic.rb
+++ b/lib/uri/generic.rb
@@ -564,16 +564,26 @@ module URI
end
end
- # Returns the user component.
+ # Returns the user component (without URI decoding).
def user
@user
end
- # Returns the password component.
+ # Returns the password component (without URI decoding).
def password
@password
end
+ # Returns the user component after URI decoding.
+ def decoded_user
+ URI.decode_uri_component(@user) if @user
+ end
+
+ # Returns the password component after URI decoding.
+ def decoded_password
+ URI.decode_uri_component(@password) if @password
+ end
+
#
# Checks the host +v+ component for RFC2396 compliance
# and against the URI::Parser Regexp for :HOST.
diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb
index 87cb99656f..cb8024f301 100644
--- a/lib/uri/mailto.rb
+++ b/lib/uri/mailto.rb
@@ -15,7 +15,7 @@ module URI
# RFC6068, the mailto URL scheme.
#
class MailTo < Generic
- include REGEXP
+ include RFC2396_REGEXP
# A Default port of nil for URI::MailTo.
DEFAULT_PORT = nil
diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb
index 3e07de4805..f3816d9ae5 100644
--- a/lib/uri/rfc3986_parser.rb
+++ b/lib/uri/rfc3986_parser.rb
@@ -2,9 +2,8 @@
module URI
class RFC3986_Parser # :nodoc:
# URI defined in RFC3986
- # this regexp is modified not to host is not empty string
- RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
- RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
+ RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
+ RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
attr_reader :regexp
def initialize
diff --git a/lib/uri/version.rb b/lib/uri/version.rb
index 82188e25ad..a9643ef8bc 100644
--- a/lib/uri/version.rb
+++ b/lib/uri/version.rb
@@ -1,6 +1,6 @@
module URI
# :stopdoc:
- VERSION_CODE = '001100'.freeze
+ VERSION_CODE = '001200'.freeze
VERSION = VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze
# :startdoc:
end