summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 11:20:03 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 11:20:03 +0000
commit234a30459cdae6aa7da6e28a1082d9c11f315696 (patch)
tree397d1b3f4deb23f81ee269b125b8154dde253404
parentb9881083f14f853a208f8e097782cd2a97d61497 (diff)
set UTF-8 if given URI string is ASCII
Now URI is normally UTF-8, and US-ASCII URI string is considered as escaped a UTF-8 string. https://github.com/rails/rails/issues/32294 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62897 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--lib/uri/rfc2396_parser.rb4
-rw-r--r--test/uri/test_parser.rb1
2 files changed, 4 insertions, 1 deletions
diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb
index a6e5df9..6a2d819 100644
--- a/lib/uri/rfc2396_parser.rb
+++ b/lib/uri/rfc2396_parser.rb
@@ -329,7 +329,9 @@ module URI
# Removes escapes from +str+
#
def unescape(str, escaped = @regexp[:ESCAPED])
- str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(str.encoding) }
+ enc = str.encoding
+ enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
+ str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) }
end
@@to_s = Kernel.instance_method(:to_s)
diff --git a/test/uri/test_parser.rb b/test/uri/test_parser.rb
index b9cf4b7..088628a 100644
--- a/test/uri/test_parser.rb
+++ b/test/uri/test_parser.rb
@@ -50,6 +50,7 @@ class URI::TestParser < Test::Unit::TestCase
p1 = URI::Parser.new
assert_equal("\xe3\x83\x90", p1.unescape("\xe3\x83\x90"))
assert_equal("\xe3\x83\x90", p1.unescape('%e3%83%90'))
+ assert_equal("\u3042", p1.unescape('%e3%81%82'.force_encoding(Encoding::US_ASCII)))
assert_equal("\xe3\x83\x90\xe3\x83\x90", p1.unescape("\xe3\x83\x90%e3%83%90"))
end
end