diff options
-rw-r--r-- | ext/cgi/escape/escape.c | 353 | ||||
-rw-r--r-- | lib/cgi.rb | 4 | ||||
-rw-r--r-- | lib/cgi/cgi.gemspec | 19 | ||||
-rw-r--r-- | lib/cgi/cookie.rb | 44 | ||||
-rw-r--r-- | lib/cgi/core.rb | 45 | ||||
-rw-r--r-- | lib/cgi/util.rb | 49 | ||||
-rw-r--r-- | test/cgi/test_cgi_cookie.rb | 82 | ||||
-rw-r--r-- | test/cgi/test_cgi_header.rb | 8 | ||||
-rw-r--r-- | test/cgi/test_cgi_util.rb | 50 |
9 files changed, 460 insertions, 194 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index f88b61478b..c5b76de596 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -37,7 +37,7 @@ escaped_length(VALUE str) { const long len = RSTRING_LEN(str); if (len >= LONG_MAX / HTML_ESCAPE_MAX_LEN) { - ruby_malloc_size_overflow(len, HTML_ESCAPE_MAX_LEN); + ruby_malloc_size_overflow(len, HTML_ESCAPE_MAX_LEN); } return len * HTML_ESCAPE_MAX_LEN; } @@ -81,8 +81,8 @@ optimized_unescape_html(VALUE str) enum {UNICODE_MAX = 0x10ffff}; rb_encoding *enc = rb_enc_get(str); unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX : - strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 : - 128); + strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 : + 128); long i, len, beg = 0; size_t clen, plen; int overflow; @@ -94,89 +94,89 @@ optimized_unescape_html(VALUE str) cstr = RSTRING_PTR(str); for (i = 0; i < len; i++) { - unsigned long cc; - char c = cstr[i]; - if (c != '&') continue; - plen = i - beg; - if (++i >= len) break; - c = (unsigned char)cstr[i]; + unsigned long cc; + char c = cstr[i]; + if (c != '&') continue; + plen = i - beg; + if (++i >= len) break; + c = (unsigned char)cstr[i]; #define MATCH(s) (len - i >= (int)rb_strlen_lit(s) && \ - memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \ - (i += rb_strlen_lit(s) - 1, 1)) - switch (c) { - case 'a': - ++i; - if (MATCH("pos;")) { - c = '\''; - } - else if (MATCH("mp;")) { - c = '&'; - } - else continue; - break; - case 'q': - ++i; - if (MATCH("uot;")) { - c = '"'; - } - else continue; - break; - case 'g': - ++i; - if (MATCH("t;")) { - c = '>'; - } - else continue; - break; - case 'l': - ++i; - if (MATCH("t;")) { - c = '<'; - } - else continue; - break; - case '#': - if (len - ++i >= 2 && ISDIGIT(cstr[i])) { - cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow); - } - else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) { - cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow); - } - else continue; - i += clen; - if (overflow || cc >= charlimit || cstr[i] != ';') continue; - if (!dest) { - dest = rb_str_buf_new(len); - } - rb_str_cat(dest, cstr + beg, plen); - if (charlimit > 256) { - rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc)); - } - else { - c = (unsigned char)cc; - rb_str_cat(dest, &c, 1); - } - beg = i + 1; - continue; - default: - --i; - continue; - } - if (!dest) { - dest = rb_str_buf_new(len); - } - rb_str_cat(dest, cstr + beg, plen); - rb_str_cat(dest, &c, 1); - beg = i + 1; + memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \ + (i += rb_strlen_lit(s) - 1, 1)) + switch (c) { + case 'a': + ++i; + if (MATCH("pos;")) { + c = '\''; + } + else if (MATCH("mp;")) { + c = '&'; + } + else continue; + break; + case 'q': + ++i; + if (MATCH("uot;")) { + c = '"'; + } + else continue; + break; + case 'g': + ++i; + if (MATCH("t;")) { + c = '>'; + } + else continue; + break; + case 'l': + ++i; + if (MATCH("t;")) { + c = '<'; + } + else continue; + break; + case '#': + if (len - ++i >= 2 && ISDIGIT(cstr[i])) { + cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow); + } + else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) { + cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow); + } + else continue; + i += clen; + if (overflow || cc >= charlimit || cstr[i] != ';') continue; + if (!dest) { + dest = rb_str_buf_new(len); + } + rb_str_cat(dest, cstr + beg, plen); + if (charlimit > 256) { + rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc)); + } + else { + c = (unsigned char)cc; + rb_str_cat(dest, &c, 1); + } + beg = i + 1; + continue; + default: + --i; + continue; + } + if (!dest) { + dest = rb_str_buf_new(len); + } + rb_str_cat(dest, cstr + beg, plen); + rb_str_cat(dest, &c, 1); + beg = i + 1; } if (dest) { - rb_str_cat(dest, cstr + beg, len - beg); - preserve_original_state(str, dest); - return dest; + rb_str_cat(dest, cstr + beg, len - beg); + preserve_original_state(str, dest); + return dest; } else { - return rb_str_dup(str); + return rb_str_dup(str); } } @@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c) } static VALUE -optimized_escape(VALUE str) +optimized_escape(VALUE str, int plus_escape) { long i, len, beg = 0; VALUE dest = 0; @@ -211,38 +211,38 @@ optimized_escape(VALUE str) cstr = RSTRING_PTR(str); for (i = 0; i < len; ++i) { - const unsigned char c = (unsigned char)cstr[i]; - if (!url_unreserved_char(c)) { - if (!dest) { - dest = rb_str_buf_new(len); - } - - rb_str_cat(dest, cstr + beg, i - beg); - beg = i + 1; - - if (c == ' ') { - rb_str_cat_cstr(dest, "+"); - } - else { - buf[1] = upper_hexdigits[(c >> 4) & 0xf]; - buf[2] = upper_hexdigits[c & 0xf]; - rb_str_cat(dest, buf, 3); - } - } + const unsigned char c = (unsigned char)cstr[i]; + if (!url_unreserved_char(c)) { + if (!dest) { + dest = rb_str_buf_new(len); + } + + rb_str_cat(dest, cstr + beg, i - beg); + beg = i + 1; + + if (plus_escape && c == ' ') { + rb_str_cat_cstr(dest, "+"); + } + else { + buf[1] = upper_hexdigits[(c >> 4) & 0xf]; + buf[2] = upper_hexdigits[c & 0xf]; + rb_str_cat(dest, buf, 3); + } + } } if (dest) { - rb_str_cat(dest, cstr + beg, len - beg); - preserve_original_state(str, dest); - return dest; + rb_str_cat(dest, cstr + beg, len - beg); + preserve_original_state(str, dest); + return dest; } else { - return rb_str_dup(str); + return rb_str_dup(str); } } static VALUE -optimized_unescape(VALUE str, VALUE encoding) +optimized_unescape(VALUE str, VALUE encoding, int unescape_plus) { long i, len, beg = 0; VALUE dest = 0; @@ -254,52 +254,52 @@ optimized_unescape(VALUE str, VALUE encoding) cstr = RSTRING_PTR(str); for (i = 0; i < len; ++i) { - char buf[1]; - const char c = cstr[i]; - int clen = 0; - if (c == '%') { - if (i + 3 > len) break; - if (!ISXDIGIT(cstr[i+1])) continue; - if (!ISXDIGIT(cstr[i+2])) continue; - buf[0] = ((char_to_number(cstr[i+1]) << 4) - | char_to_number(cstr[i+2])); - clen = 2; - } - else if (c == '+') { - buf[0] = ' '; - } - else { - continue; - } - - if (!dest) { - dest = rb_str_buf_new(len); - } - - rb_str_cat(dest, cstr + beg, i - beg); - i += clen; - beg = i + 1; - - rb_str_cat(dest, buf, 1); + char buf[1]; + const char c = cstr[i]; + int clen = 0; + if (c == '%') { + if (i + 3 > len) break; + if (!ISXDIGIT(cstr[i+1])) continue; + if (!ISXDIGIT(cstr[i+2])) continue; + buf[0] = ((char_to_number(cstr[i+1]) << 4) + | char_to_number(cstr[i+2])); + clen = 2; + } + else if (unescape_plus && c == '+') { + buf[0] = ' '; + } + else { + continue; + } + + if (!dest) { + dest = rb_str_buf_new(len); + } + + rb_str_cat(dest, cstr + beg, i - beg); + i += clen; + beg = i + 1; + + rb_str_cat(dest, buf, 1); } if (dest) { - rb_str_cat(dest, cstr + beg, len - beg); - preserve_original_state(str, dest); - cr = ENC_CODERANGE_UNKNOWN; + rb_str_cat(dest, cstr + beg, len - beg); + preserve_original_state(str, dest); + cr = ENC_CODERANGE_UNKNOWN; } else { - dest = rb_str_dup(str); - cr = ENC_CODERANGE(str); + dest = rb_str_dup(str); + cr = ENC_CODERANGE(str); } origenc = rb_enc_get_index(str); if (origenc != encidx) { - rb_enc_associate_index(dest, encidx); - if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { - rb_enc_associate_index(dest, origenc); - if (cr != ENC_CODERANGE_UNKNOWN) - ENC_CODERANGE_SET(dest, cr); - } + rb_enc_associate_index(dest, encidx); + if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { + rb_enc_associate_index(dest, origenc); + if (cr != ENC_CODERANGE_UNKNOWN) + ENC_CODERANGE_SET(dest, cr); + } } return dest; } @@ -317,10 +317,10 @@ cgiesc_escape_html(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_escape_html(str); + return optimized_escape_html(str); } else { - return rb_call_super(1, &str); + return rb_call_super(1, &str); } } @@ -337,10 +337,10 @@ cgiesc_unescape_html(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_unescape_html(str); + return optimized_unescape_html(str); } else { - return rb_call_super(1, &str); + return rb_call_super(1, &str); } } @@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str) * call-seq: * CGI.escape(string) -> string * - * Returns URL-escaped string. + * Returns URL-escaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -357,10 +357,10 @@ cgiesc_escape(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_escape(str); + return optimized_escape(str, 1); } else { - return rb_call_super(1, &str); + return rb_call_super(1, &str); } } @@ -368,7 +368,7 @@ static VALUE accept_charset(int argc, VALUE *argv, VALUE self) { if (argc > 0) - return argv[0]; + return argv[0]; return rb_cvar_get(CLASS_OF(self), id_accept_charset); } @@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self) * call-seq: * CGI.unescape(string, encoding=@@accept_charset) -> string * - * Returns URL-unescaped string. + * Returns URL-unescaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -387,11 +387,54 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - VALUE enc = accept_charset(argc-1, argv+1, self); - return optimized_unescape(str, enc); + VALUE enc = accept_charset(argc-1, argv+1, self); + return optimized_unescape(str, enc, 1); + } + else { + return rb_call_super(argc, argv); + } +} + +/* + * call-seq: + * CGI.escapeURIComponent(string) -> string + * + * Returns URL-escaped string following RFC 3986. + * + */ +static VALUE +cgiesc_escape_uri_component(VALUE self, VALUE str) +{ + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_escape(str, 0); + } + else { + return rb_call_super(1, &str); + } +} + +/* + * call-seq: + * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string + * + * Returns URL-unescaped string following RFC 3986. + * + */ +static VALUE +cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self) +{ + VALUE str = (rb_check_arity(argc, 1, 2), argv[0]); + + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + VALUE enc = accept_charset(argc-1, argv+1, self); + return optimized_unescape(str, enc, 0); } else { - return rb_call_super(argc, argv); + return rb_call_super(argc, argv); } } @@ -414,6 +457,8 @@ InitVM_escape(void) rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); + rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); rb_prepend_module(rb_mUtil, rb_mEscape); diff --git a/lib/cgi.rb b/lib/cgi.rb index af466bc673..dda5eb1712 100644 --- a/lib/cgi.rb +++ b/lib/cgi.rb @@ -162,7 +162,7 @@ # cgi.has_key?('field_name') # cgi.include?('field_name') # -# CAUTION! cgi['field_name'] returned an Array with the old +# CAUTION! <code>cgi['field_name']</code> returned an Array with the old # cgi.rb(included in Ruby 1.6) # # === Get form values as hash @@ -288,7 +288,7 @@ # class CGI - VERSION = "0.3.2" + VERSION = "0.3.5" end require 'cgi/core' diff --git a/lib/cgi/cgi.gemspec b/lib/cgi/cgi.gemspec index 3ba62b93f6..381c55a5ca 100644 --- a/lib/cgi/cgi.gemspec +++ b/lib/cgi/cgi.gemspec @@ -22,10 +22,21 @@ Gem::Specification.new do |spec| spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = spec.homepage - spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do - `git ls-files -z 2>/dev/null`.split("\x0").reject { |f| f.match(%r{\A(?:(?:test|spec|features)/|\.git)}) } - end - spec.extensions = ["ext/cgi/escape/extconf.rb"] spec.executables = [] + + spec.files = [ + "LICENSE.txt", + "README.md", + *Dir["lib{.rb,/**/*.rb}", "bin/*"] ] + spec.require_paths = ["lib"] + + if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby' + spec.platform = 'java' + spec.require_paths << "ext/java/org/jruby/ext/cgi/escape/lib" + spec.files += Dir["ext/java/**/*.{rb}", "lib/cgi/escape.jar"] + else + spec.files += Dir["ext/cgi/**/*.{rb,c,h,sh}", "ext/cgi/escape/depend", "lib/cgi/escape.so"] + spec.extensions = ["ext/cgi/escape/extconf.rb"] + end end diff --git a/lib/cgi/cookie.rb b/lib/cgi/cookie.rb index 6b0d89ca3b..1a9c1a82c1 100644 --- a/lib/cgi/cookie.rb +++ b/lib/cgi/cookie.rb @@ -40,6 +40,10 @@ class CGI class Cookie < Array @@accept_charset="UTF-8" unless defined?(@@accept_charset) + TOKEN_RE = %r"\A[[!-~]&&[^()<>@,;:\\\"/?=\[\]{}]]+\z" + PATH_VALUE_RE = %r"\A[[ -~]&&[^;]]*\z" + DOMAIN_VALUE_RE = %r"\A(?<label>(?!-)[-A-Za-z0-9]+(?<!-))(?:\.\g<label>)*\z" + # Create a new CGI::Cookie object. # # :call-seq: @@ -72,8 +76,8 @@ class CGI @domain = nil @expires = nil if name.kind_of?(String) - @name = name - @path = (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "") + self.name = name + self.path = (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "") @secure = false @httponly = false return super(value) @@ -84,11 +88,11 @@ class CGI raise ArgumentError, "`name' required" end - @name = options["name"] + self.name = options["name"] value = Array(options["value"]) # simple support for IE - @path = options["path"] || (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "") - @domain = options["domain"] + self.path = options["path"] || (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "") + self.domain = options["domain"] @expires = options["expires"] @secure = options["secure"] == true @httponly = options["httponly"] == true @@ -97,11 +101,35 @@ class CGI end # Name of this cookie, as a +String+ - attr_accessor :name + attr_reader :name + # Set name of this cookie + def name=(str) + if str and !TOKEN_RE.match?(str) + raise ArgumentError, "invalid name: #{str.dump}" + end + @name = str + end + # Path for which this cookie applies, as a +String+ - attr_accessor :path + attr_reader :path + # Set path for which this cookie applies + def path=(str) + if str and !PATH_VALUE_RE.match?(str) + raise ArgumentError, "invalid path: #{str.dump}" + end + @path = str + end + # Domain for which this cookie applies, as a +String+ - attr_accessor :domain + attr_reader :domain + # Set domain for which this cookie applies + def domain=(str) + if str and ((str = str.b).bytesize > 255 or !DOMAIN_VALUE_RE.match?(str)) + raise ArgumentError, "invalid domain: #{str.dump}" + end + @domain = str + end + # Time at which this cookie expires, as a +Time+ attr_accessor :expires # True if this cookie is secure; false otherwise diff --git a/lib/cgi/core.rb b/lib/cgi/core.rb index bec76e0749..62e606837a 100644 --- a/lib/cgi/core.rb +++ b/lib/cgi/core.rb @@ -188,17 +188,28 @@ class CGI # Using #header with the HTML5 tag maker will create a <header> element. alias :header :http_header + def _no_crlf_check(str) + if str + str = str.to_s + raise "A HTTP status or header field must not include CR and LF" if str =~ /[\r\n]/ + str + else + nil + end + end + private :_no_crlf_check + def _header_for_string(content_type) #:nodoc: buf = ''.dup if nph?() - buf << "#{$CGI_ENV['SERVER_PROTOCOL'] || 'HTTP/1.0'} 200 OK#{EOL}" + buf << "#{_no_crlf_check($CGI_ENV['SERVER_PROTOCOL']) || 'HTTP/1.0'} 200 OK#{EOL}" buf << "Date: #{CGI.rfc1123_date(Time.now)}#{EOL}" - buf << "Server: #{$CGI_ENV['SERVER_SOFTWARE']}#{EOL}" + buf << "Server: #{_no_crlf_check($CGI_ENV['SERVER_SOFTWARE'])}#{EOL}" buf << "Connection: close#{EOL}" end - buf << "Content-Type: #{content_type}#{EOL}" + buf << "Content-Type: #{_no_crlf_check(content_type)}#{EOL}" if @output_cookies - @output_cookies.each {|cookie| buf << "Set-Cookie: #{cookie}#{EOL}" } + @output_cookies.each {|cookie| buf << "Set-Cookie: #{_no_crlf_check(cookie)}#{EOL}" } end return buf end # _header_for_string @@ -213,9 +224,9 @@ class CGI ## NPH options.delete('nph') if defined?(MOD_RUBY) if options.delete('nph') || nph?() - protocol = $CGI_ENV['SERVER_PROTOCOL'] || 'HTTP/1.0' + protocol = _no_crlf_check($CGI_ENV['SERVER_PROTOCOL']) || 'HTTP/1.0' status = options.delete('status') - status = HTTP_STATUS[status] || status || '200 OK' + status = HTTP_STATUS[status] || _no_crlf_check(status) || '200 OK' buf << "#{protocol} #{status}#{EOL}" buf << "Date: #{CGI.rfc1123_date(Time.now)}#{EOL}" options['server'] ||= $CGI_ENV['SERVER_SOFTWARE'] || '' @@ -223,38 +234,38 @@ class CGI end ## common headers status = options.delete('status') - buf << "Status: #{HTTP_STATUS[status] || status}#{EOL}" if status + buf << "Status: #{HTTP_STATUS[status] || _no_crlf_check(status)}#{EOL}" if status server = options.delete('server') - buf << "Server: #{server}#{EOL}" if server + buf << "Server: #{_no_crlf_check(server)}#{EOL}" if server connection = options.delete('connection') - buf << "Connection: #{connection}#{EOL}" if connection + buf << "Connection: #{_no_crlf_check(connection)}#{EOL}" if connection type = options.delete('type') - buf << "Content-Type: #{type}#{EOL}" #if type + buf << "Content-Type: #{_no_crlf_check(type)}#{EOL}" #if type length = options.delete('length') - buf << "Content-Length: #{length}#{EOL}" if length + buf << "Content-Length: #{_no_crlf_check(length)}#{EOL}" if length language = options.delete('language') - buf << "Content-Language: #{language}#{EOL}" if language + buf << "Content-Language: #{_no_crlf_check(language)}#{EOL}" if language expires = options.delete('expires') buf << "Expires: #{CGI.rfc1123_date(expires)}#{EOL}" if expires ## cookie if cookie = options.delete('cookie') case cookie when String, Cookie - buf << "Set-Cookie: #{cookie}#{EOL}" + buf << "Set-Cookie: #{_no_crlf_check(cookie)}#{EOL}" when Array arr = cookie - arr.each {|c| buf << "Set-Cookie: #{c}#{EOL}" } + arr.each {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" } when Hash hash = cookie - hash.each_value {|c| buf << "Set-Cookie: #{c}#{EOL}" } + hash.each_value {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" } end end if @output_cookies - @output_cookies.each {|c| buf << "Set-Cookie: #{c}#{EOL}" } + @output_cookies.each {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" } end ## other headers options.each do |key, value| - buf << "#{key}: #{value}#{EOL}" + buf << "#{_no_crlf_check(key)}: #{_no_crlf_check(value)}#{EOL}" end return buf end # _header_for_hash diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 55e61bf984..5a5c77ac97 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -5,24 +5,57 @@ class CGI extend Util end module CGI::Util - @@accept_charset="UTF-8" unless defined?(@@accept_charset) - # URL-encode a string. + @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) + + # URL-encode a string into application/x-www-form-urlencoded. + # Space characters (+" "+) are encoded with plus signs (+"+"+) # url_encoded_string = CGI.escape("'Stop!' said Fred") # # => "%27Stop%21%27+said+Fred" def escape(string) encoding = string.encoding - string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m| + buffer = string.b + buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end.tr(' ', '+').force_encoding(encoding) + end + buffer.tr!(' ', '+') + buffer.force_encoding(encoding) end - # URL-decode a string with encoding(optional). + # URL-decode an application/x-www-form-urlencoded string with encoding(optional). # string = CGI.unescape("%27Stop%21%27+said+Fred") # # => "'Stop!' said Fred" - def unescape(string,encoding=@@accept_charset) - str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m| + def unescape(string, encoding = @@accept_charset) + str = string.tr('+', ' ') + str = str.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # URL-encode a string following RFC 3986 + # Space characters (+" "+) are encoded with (+"%20"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27%20said%20Fred" + def escapeURIComponent(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.force_encoding(encoding) + end + + # URL-decode a string following RFC 3986 with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said%20Fred") + # # => "'Stop!'+said Fred" + def unescapeURIComponent(string, encoding = @@accept_charset) + str = string.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') - end.force_encoding(encoding) + end + str.force_encoding(encoding) str.valid_encoding? ? str : str.force_encoding(string.encoding) end diff --git a/test/cgi/test_cgi_cookie.rb b/test/cgi/test_cgi_cookie.rb index 985cc0d7a1..e3ec4bea52 100644 --- a/test/cgi/test_cgi_cookie.rb +++ b/test/cgi/test_cgi_cookie.rb @@ -60,6 +60,24 @@ class CGICookieTest < Test::Unit::TestCase end + def test_cgi_cookie_new_with_domain + h = {'name'=>'name1', 'value'=>'value1'} + cookie = CGI::Cookie.new('domain'=>'a.example.com', **h) + assert_equal('a.example.com', cookie.domain) + + cookie = CGI::Cookie.new('domain'=>'1.example.com', **h) + assert_equal('1.example.com', cookie.domain, 'enhanced by RFC 1123') + + assert_raise(ArgumentError) { + CGI::Cookie.new('domain'=>'-a.example.com', **h) + } + + assert_raise(ArgumentError) { + CGI::Cookie.new('domain'=>'a-.example.com', **h) + } + end + + def test_cgi_cookie_scriptname cookie = CGI::Cookie.new('name1', 'value1') assert_equal('', cookie.path) @@ -118,6 +136,70 @@ class CGICookieTest < Test::Unit::TestCase end + def test_cgi_cookie_domain_injection_into_name + name = "a=b; domain=example.com;" + path = "/" + domain = "example.jp" + assert_raise(ArgumentError) do + CGI::Cookie.new('name' => name, + 'value' => "value", + 'domain' => domain, + 'path' => path) + end + end + + + def test_cgi_cookie_newline_injection_into_name + name = "a=b;\r\nLocation: http://example.com#" + path = "/" + domain = "example.jp" + assert_raise(ArgumentError) do + CGI::Cookie.new('name' => name, + 'value' => "value", + 'domain' => domain, + 'path' => path) + end + end + + + def test_cgi_cookie_multibyte_injection_into_name + name = "a=b;\u3042" + path = "/" + domain = "example.jp" + assert_raise(ArgumentError) do + CGI::Cookie.new('name' => name, + 'value' => "value", + 'domain' => domain, + 'path' => path) + end + end + + + def test_cgi_cookie_injection_into_path + name = "name" + path = "/; samesite=none" + domain = "example.jp" + assert_raise(ArgumentError) do + CGI::Cookie.new('name' => name, + 'value' => "value", + 'domain' => domain, + 'path' => path) + end + end + + + def test_cgi_cookie_injection_into_domain + name = "name" + path = "/" + domain = "example.jp; samesite=none" + assert_raise(ArgumentError) do + CGI::Cookie.new('name' => name, + 'value' => "value", + 'domain' => domain, + 'path' => path) + end + end + instance_methods.each do |method| private method if method =~ /^test_(.*)/ && $1 != ENV['TEST'] diff --git a/test/cgi/test_cgi_header.rb b/test/cgi/test_cgi_header.rb index bab2d0348a..ec2f4deb72 100644 --- a/test/cgi/test_cgi_header.rb +++ b/test/cgi/test_cgi_header.rb @@ -176,6 +176,14 @@ class CGIHeaderTest < Test::Unit::TestCase end + def test_cgi_http_header_crlf_injection + cgi = CGI.new + assert_raise(RuntimeError) { cgi.http_header("text/xhtml\r\nBOO") } + assert_raise(RuntimeError) { cgi.http_header("type" => "text/xhtml\r\nBOO") } + assert_raise(RuntimeError) { cgi.http_header("status" => "200 OK\r\nBOO") } + assert_raise(RuntimeError) { cgi.http_header("location" => "text/xhtml\r\nBOO") } + end + instance_methods.each do |method| private method if method =~ /^test_(.*)/ && $1 != ENV['TEST'] diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index 5a2d07b328..a3be193a13 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -23,7 +23,6 @@ class CGIUtilTest < Test::Unit::TestCase ENV.update(@environ) end - def test_cgi_escape assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1)) assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding) @@ -70,6 +69,54 @@ class CGIUtilTest < Test::Unit::TestCase end; end + def test_cgi_escapeURIComponent + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1)) + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) + end + + def test_cgi_escapeURIComponent_with_unreserved_characters + assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", + CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), + "should not encode any unreserved characters, as per RFC3986 Section 2.3") + end + + def test_cgi_escapeURIComponent_with_invalid_byte_sequence + assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8"))) + end + + def test_cgi_escapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent + str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar")) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_unescapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent_accept_charset + return unless defined?(::Encoding) + + assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} + assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + assert_equal("", CGI.unescapeURIComponent('')) + end; + end + def test_cgi_pretty assert_equal("<HTML>\n <BODY>\n </BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>")) assert_equal("<HTML>\n\t<BODY>\n\t</BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>","\t")) @@ -105,6 +152,7 @@ class CGIUtilTest < Test::Unit::TestCase end def test_cgi_escape_html_large + return if RUBY_ENGINE == 'jruby' ulong_max, size_max = RbConfig::LIMITS.values_at("ULONG_MAX", "SIZE_MAX") return unless ulong_max < size_max # Platforms not concerned |