Merge CGI-0.3.5

author: Hiroshi SHIBATA <hsbt@ruby-lang.org> 2022-11-22 13:50:06 +0900
committer: nagachika <nagachika@ruby-lang.org> 2022-11-22 23:04:45 +0900
commit: 16ea2213295137bb25f10225408c54de452a507b (patch)
tree: 079b14ad168284cef887490b76b87102be48f1d6
parent: 14704023ec57546b1bed85d54f4f425986385ab4 (diff)
9 files changed, 460 insertions, 194 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index f88b61478b..c5b76de596 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -37,7 +37,7 @@ escaped_length(VALUE str)
 {
     const long len = RSTRING_LEN(str);
     if (len >= LONG_MAX / HTML_ESCAPE_MAX_LEN) {
-	ruby_malloc_size_overflow(len, HTML_ESCAPE_MAX_LEN);
+        ruby_malloc_size_overflow(len, HTML_ESCAPE_MAX_LEN);
     }
     return len * HTML_ESCAPE_MAX_LEN;
 }
@@ -81,8 +81,8 @@ optimized_unescape_html(VALUE str)
     enum {UNICODE_MAX = 0x10ffff};
     rb_encoding *enc = rb_enc_get(str);
     unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX :
-			       strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 :
-			       128);
+                               strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 :
+                               128);
     long i, len, beg = 0;
     size_t clen, plen;
     int overflow;
@@ -94,89 +94,89 @@ optimized_unescape_html(VALUE str)
     cstr = RSTRING_PTR(str);
 
     for (i = 0; i < len; i++) {
-	unsigned long cc;
-	char c = cstr[i];
-	if (c != '&') continue;
-	plen = i - beg;
-	if (++i >= len) break;
-	c = (unsigned char)cstr[i];
+        unsigned long cc;
+        char c = cstr[i];
+        if (c != '&') continue;
+        plen = i - beg;
+        if (++i >= len) break;
+        c = (unsigned char)cstr[i];
 #define MATCH(s) (len - i >= (int)rb_strlen_lit(s) && \
-		  memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
-		  (i += rb_strlen_lit(s) - 1, 1))
-	switch (c) {
-	  case 'a':
-	    ++i;
-	    if (MATCH("pos;")) {
-		c = '\'';
-	    }
-	    else if (MATCH("mp;")) {
-		c = '&';
-	    }
-	    else continue;
-	    break;
-	  case 'q':
-	    ++i;
-	    if (MATCH("uot;")) {
-		c = '"';
-	    }
-	    else continue;
-	    break;
-	  case 'g':
-	    ++i;
-	    if (MATCH("t;")) {
-		c = '>';
-	    }
-	    else continue;
-	    break;
-	  case 'l':
-	    ++i;
-	    if (MATCH("t;")) {
-		c = '<';
-	    }
-	    else continue;
-	    break;
-	  case '#':
-	    if (len - ++i >= 2 && ISDIGIT(cstr[i])) {
-		cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow);
-	    }
-	    else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) {
-		cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow);
-	    }
-	    else continue;
-	    i += clen;
-	    if (overflow || cc >= charlimit || cstr[i] != ';') continue;
-	    if (!dest) {
-		dest = rb_str_buf_new(len);
-	    }
-	    rb_str_cat(dest, cstr + beg, plen);
-	    if (charlimit > 256) {
-		rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc));
-	    }
-	    else {
-		c = (unsigned char)cc;
-		rb_str_cat(dest, &c, 1);
-	    }
-	    beg = i + 1;
-	    continue;
-	  default:
-	    --i;
-	    continue;
-	}
-	if (!dest) {
-	    dest = rb_str_buf_new(len);
-	}
-	rb_str_cat(dest, cstr + beg, plen);
-	rb_str_cat(dest, &c, 1);
-	beg = i + 1;
+                  memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
+                  (i += rb_strlen_lit(s) - 1, 1))
+        switch (c) {
+          case 'a':
+            ++i;
+            if (MATCH("pos;")) {
+                c = '\'';
+            }
+            else if (MATCH("mp;")) {
+                c = '&';
+            }
+            else continue;
+            break;
+          case 'q':
+            ++i;
+            if (MATCH("uot;")) {
+                c = '"';
+            }
+            else continue;
+            break;
+          case 'g':
+            ++i;
+            if (MATCH("t;")) {
+                c = '>';
+            }
+            else continue;
+            break;
+          case 'l':
+            ++i;
+            if (MATCH("t;")) {
+                c = '<';
+            }
+            else continue;
+            break;
+          case '#':
+            if (len - ++i >= 2 && ISDIGIT(cstr[i])) {
+                cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow);
+            }
+            else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) {
+                cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow);
+            }
+            else continue;
+            i += clen;
+            if (overflow || cc >= charlimit || cstr[i] != ';') continue;
+            if (!dest) {
+                dest = rb_str_buf_new(len);
+            }
+            rb_str_cat(dest, cstr + beg, plen);
+            if (charlimit > 256) {
+                rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc));
+            }
+            else {
+                c = (unsigned char)cc;
+                rb_str_cat(dest, &c, 1);
+            }
+            beg = i + 1;
+            continue;
+          default:
+            --i;
+            continue;
+        }
+        if (!dest) {
+            dest = rb_str_buf_new(len);
+        }
+        rb_str_cat(dest, cstr + beg, plen);
+        rb_str_cat(dest, &c, 1);
+        beg = i + 1;
     }
 
     if (dest) {
-	rb_str_cat(dest, cstr + beg, len - beg);
-	preserve_original_state(str, dest);
-	return dest;
+        rb_str_cat(dest, cstr + beg, len - beg);
+        preserve_original_state(str, dest);
+        return dest;
     }
     else {
-	return rb_str_dup(str);
+        return rb_str_dup(str);
     }
 }
 
@@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c)
 }
 
 static VALUE
-optimized_escape(VALUE str)
+optimized_escape(VALUE str, int plus_escape)
 {
     long i, len, beg = 0;
     VALUE dest = 0;
@@ -211,38 +211,38 @@ optimized_escape(VALUE str)
     cstr = RSTRING_PTR(str);
 
     for (i = 0; i < len; ++i) {
-	const unsigned char c = (unsigned char)cstr[i];
-	if (!url_unreserved_char(c)) {
-	    if (!dest) {
-		dest = rb_str_buf_new(len);
-	    }
-
-	    rb_str_cat(dest, cstr + beg, i - beg);
-	    beg = i + 1;
-
-	    if (c == ' ') {
-		rb_str_cat_cstr(dest, "+");
-	    }
-	    else {
-		buf[1] = upper_hexdigits[(c >> 4) & 0xf];
-		buf[2] = upper_hexdigits[c & 0xf];
-		rb_str_cat(dest, buf, 3);
-	    }
-	}
+        const unsigned char c = (unsigned char)cstr[i];
+        if (!url_unreserved_char(c)) {
+            if (!dest) {
+                dest = rb_str_buf_new(len);
+            }
+
+            rb_str_cat(dest, cstr + beg, i - beg);
+            beg = i + 1;
+
+            if (plus_escape && c == ' ') {
+                rb_str_cat_cstr(dest, "+");
+            }
+            else {
+                buf[1] = upper_hexdigits[(c >> 4) & 0xf];
+                buf[2] = upper_hexdigits[c & 0xf];
+                rb_str_cat(dest, buf, 3);
+            }
+        }
     }
 
     if (dest) {
-	rb_str_cat(dest, cstr + beg, len - beg);
-	preserve_original_state(str, dest);
-	return dest;
+        rb_str_cat(dest, cstr + beg, len - beg);
+        preserve_original_state(str, dest);
+        return dest;
     }
     else {
-	return rb_str_dup(str);
+        return rb_str_dup(str);
     }
 }
 
 static VALUE
-optimized_unescape(VALUE str, VALUE encoding)
+optimized_unescape(VALUE str, VALUE encoding, int unescape_plus)
 {
     long i, len, beg = 0;
     VALUE dest = 0;
@@ -254,52 +254,52 @@ optimized_unescape(VALUE str, VALUE encoding)
     cstr = RSTRING_PTR(str);
 
     for (i = 0; i < len; ++i) {
-	char buf[1];
-	const char c = cstr[i];
-	int clen = 0;
-	if (c == '%') {
-	    if (i + 3 > len) break;
-	    if (!ISXDIGIT(cstr[i+1])) continue;
-	    if (!ISXDIGIT(cstr[i+2])) continue;
-	    buf[0] = ((char_to_number(cstr[i+1]) << 4)
-		      | char_to_number(cstr[i+2]));
-	    clen = 2;
-	}
-	else if (c == '+') {
-	    buf[0] = ' ';
-	}
-	else {
-	    continue;
-	}
-
-	if (!dest) {
-	    dest = rb_str_buf_new(len);
-	}
-
-	rb_str_cat(dest, cstr + beg, i - beg);
-	i += clen;
-	beg = i + 1;
-
-	rb_str_cat(dest, buf, 1);
+        char buf[1];
+        const char c = cstr[i];
+        int clen = 0;
+        if (c == '%') {
+            if (i + 3 > len) break;
+            if (!ISXDIGIT(cstr[i+1])) continue;
+            if (!ISXDIGIT(cstr[i+2])) continue;
+            buf[0] = ((char_to_number(cstr[i+1]) << 4)
+                      | char_to_number(cstr[i+2]));
+            clen = 2;
+        }
+        else if (unescape_plus && c == '+') {
+            buf[0] = ' ';
+        }
+        else {
+            continue;
+        }
+
+        if (!dest) {
+            dest = rb_str_buf_new(len);
+        }
+
+        rb_str_cat(dest, cstr + beg, i - beg);
+        i += clen;
+        beg = i + 1;
+
+        rb_str_cat(dest, buf, 1);
     }
 
     if (dest) {
-	rb_str_cat(dest, cstr + beg, len - beg);
-	preserve_original_state(str, dest);
-	cr = ENC_CODERANGE_UNKNOWN;
+        rb_str_cat(dest, cstr + beg, len - beg);
+        preserve_original_state(str, dest);
+        cr = ENC_CODERANGE_UNKNOWN;
     }
     else {
-	dest = rb_str_dup(str);
-	cr = ENC_CODERANGE(str);
+        dest = rb_str_dup(str);
+        cr = ENC_CODERANGE(str);
     }
     origenc = rb_enc_get_index(str);
     if (origenc != encidx) {
-	rb_enc_associate_index(dest, encidx);
-	if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
-	    rb_enc_associate_index(dest, origenc);
-	    if (cr != ENC_CODERANGE_UNKNOWN)
-		ENC_CODERANGE_SET(dest, cr);
-	}
+        rb_enc_associate_index(dest, encidx);
+        if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
+            rb_enc_associate_index(dest, origenc);
+            if (cr != ENC_CODERANGE_UNKNOWN)
+                ENC_CODERANGE_SET(dest, cr);
+        }
     }
     return dest;
 }
@@ -317,10 +317,10 @@ cgiesc_escape_html(VALUE self, VALUE str)
     StringValue(str);
 
     if (rb_enc_str_asciicompat_p(str)) {
-	return optimized_escape_html(str);
+        return optimized_escape_html(str);
     }
     else {
-	return rb_call_super(1, &str);
+        return rb_call_super(1, &str);
     }
 }
 
@@ -337,10 +337,10 @@ cgiesc_unescape_html(VALUE self, VALUE str)
     StringValue(str);
 
     if (rb_enc_str_asciicompat_p(str)) {
-	return optimized_unescape_html(str);
+        return optimized_unescape_html(str);
     }
     else {
-	return rb_call_super(1, &str);
+        return rb_call_super(1, &str);
     }
 }
 
@@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str)
  *  call-seq:
  *     CGI.escape(string) -> string
  *
- *  Returns URL-escaped string.
+ *  Returns URL-escaped string (+application/x-www-form-urlencoded+).
  *
  */
 static VALUE
@@ -357,10 +357,10 @@ cgiesc_escape(VALUE self, VALUE str)
     StringValue(str);
 
     if (rb_enc_str_asciicompat_p(str)) {
-	return optimized_escape(str);
+        return optimized_escape(str, 1);
     }
     else {
-	return rb_call_super(1, &str);
+        return rb_call_super(1, &str);
     }
 }
 
@@ -368,7 +368,7 @@ static VALUE
 accept_charset(int argc, VALUE *argv, VALUE self)
 {
     if (argc > 0)
-	return argv[0];
+        return argv[0];
     return rb_cvar_get(CLASS_OF(self), id_accept_charset);
 }
 
@@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self)
  *  call-seq:
  *     CGI.unescape(string, encoding=@@accept_charset) -> string
  *
- *  Returns URL-unescaped string.
+ *  Returns URL-unescaped string (+application/x-www-form-urlencoded+).
  *
  */
 static VALUE
@@ -387,11 +387,54 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)
     StringValue(str);
 
     if (rb_enc_str_asciicompat_p(str)) {
-	VALUE enc = accept_charset(argc-1, argv+1, self);
-	return optimized_unescape(str, enc);
+        VALUE enc = accept_charset(argc-1, argv+1, self);
+        return optimized_unescape(str, enc, 1);
+    }
+    else {
+        return rb_call_super(argc, argv);
+    }
+}
+
+/*
+ *  call-seq:
+ *     CGI.escapeURIComponent(string) -> string
+ *
+ *  Returns URL-escaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_escape_uri_component(VALUE self, VALUE str)
+{
+    StringValue(str);
+
+    if (rb_enc_str_asciicompat_p(str)) {
+        return optimized_escape(str, 0);
+    }
+    else {
+        return rb_call_super(1, &str);
+    }
+}
+
+/*
+ *  call-seq:
+ *     CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string
+ *
+ *  Returns URL-unescaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self)
+{
+    VALUE str = (rb_check_arity(argc, 1, 2), argv[0]);
+
+    StringValue(str);
+
+    if (rb_enc_str_asciicompat_p(str)) {
+        VALUE enc = accept_charset(argc-1, argv+1, self);
+        return optimized_unescape(str, enc, 0);
     }
     else {
-	return rb_call_super(argc, argv);
+        return rb_call_super(argc, argv);
     }
 }
 
@@ -414,6 +457,8 @@ InitVM_escape(void)
     rb_mUtil   = rb_define_module_under(rb_cCGI, "Util");
     rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
     rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
+    rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
+    rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
     rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
     rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
     rb_prepend_module(rb_mUtil, rb_mEscape);
diff --git a/lib/cgi.rb b/lib/cgi.rb
index af466bc673..dda5eb1712 100644
--- a/lib/cgi.rb
+++ b/lib/cgi.rb
@@ -162,7 +162,7 @@
 #   cgi.has_key?('field_name')
 #   cgi.include?('field_name')
 #
-# CAUTION! cgi['field_name'] returned an Array with the old
+# CAUTION! <code>cgi['field_name']</code> returned an Array with the old
 # cgi.rb(included in Ruby 1.6)
 #
 # === Get form values as hash
@@ -288,7 +288,7 @@
 #
 
 class CGI
-  VERSION = "0.3.2"
+  VERSION = "0.3.5"
 end
 
 require 'cgi/core'
diff --git a/lib/cgi/cgi.gemspec b/lib/cgi/cgi.gemspec
index 3ba62b93f6..381c55a5ca 100644
--- a/lib/cgi/cgi.gemspec
+++ b/lib/cgi/cgi.gemspec
@@ -22,10 +22,21 @@ Gem::Specification.new do |spec|
   spec.metadata["homepage_uri"] = spec.homepage
   spec.metadata["source_code_uri"] = spec.homepage
 
-  spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
-    `git ls-files -z 2>/dev/null`.split("\x0").reject { |f| f.match(%r{\A(?:(?:test|spec|features)/|\.git)}) }
-  end
-  spec.extensions    = ["ext/cgi/escape/extconf.rb"]
   spec.executables   = []
+
+  spec.files = [
+    "LICENSE.txt",
+    "README.md",
+    *Dir["lib{.rb,/**/*.rb}", "bin/*"]  ]
+
   spec.require_paths = ["lib"]
+
+  if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby'
+    spec.platform = 'java'
+    spec.require_paths << "ext/java/org/jruby/ext/cgi/escape/lib"
+    spec.files += Dir["ext/java/**/*.{rb}", "lib/cgi/escape.jar"]
+  else
+    spec.files += Dir["ext/cgi/**/*.{rb,c,h,sh}", "ext/cgi/escape/depend", "lib/cgi/escape.so"]
+    spec.extensions    = ["ext/cgi/escape/extconf.rb"]
+  end
 end
diff --git a/lib/cgi/cookie.rb b/lib/cgi/cookie.rb
index 6b0d89ca3b..1a9c1a82c1 100644
--- a/lib/cgi/cookie.rb
+++ b/lib/cgi/cookie.rb
@@ -40,6 +40,10 @@ class CGI
   class Cookie < Array
     @@accept_charset="UTF-8" unless defined?(@@accept_charset)
 
+    TOKEN_RE = %r"\A[[!-~]&&[^()<>@,;:\\\"/?=\[\]{}]]+\z"
+    PATH_VALUE_RE = %r"\A[[ -~]&&[^;]]*\z"
+    DOMAIN_VALUE_RE = %r"\A(?<label>(?!-)[-A-Za-z0-9]+(?<!-))(?:\.\g<label>)*\z"
+
     # Create a new CGI::Cookie object.
     #
     # :call-seq:
@@ -72,8 +76,8 @@ class CGI
       @domain = nil
       @expires = nil
       if name.kind_of?(String)
-        @name = name
-        @path = (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "")
+        self.name = name
+        self.path = (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "")
         @secure = false
         @httponly = false
         return super(value)
@@ -84,11 +88,11 @@ class CGI
         raise ArgumentError, "`name' required"
       end
 
-      @name = options["name"]
+      self.name = options["name"]
       value = Array(options["value"])
       # simple support for IE
-      @path = options["path"] || (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "")
-      @domain = options["domain"]
+      self.path = options["path"] || (%r|\A(.*/)| =~ ENV["SCRIPT_NAME"] ? $1 : "")
+      self.domain = options["domain"]
       @expires = options["expires"]
       @secure = options["secure"] == true
       @httponly = options["httponly"] == true
@@ -97,11 +101,35 @@ class CGI
     end
 
     # Name of this cookie, as a +String+
-    attr_accessor :name
+    attr_reader :name
+    # Set name of this cookie
+    def name=(str)
+      if str and !TOKEN_RE.match?(str)
+        raise ArgumentError, "invalid name: #{str.dump}"
+      end
+      @name = str
+    end
+
     # Path for which this cookie applies, as a +String+
-    attr_accessor :path
+    attr_reader :path
+    # Set path for which this cookie applies
+    def path=(str)
+      if str and !PATH_VALUE_RE.match?(str)
+        raise ArgumentError, "invalid path: #{str.dump}"
+      end
+      @path = str
+    end
+
     # Domain for which this cookie applies, as a +String+
-    attr_accessor :domain
+    attr_reader :domain
+    # Set domain for which this cookie applies
+    def domain=(str)
+      if str and ((str = str.b).bytesize > 255 or !DOMAIN_VALUE_RE.match?(str))
+        raise ArgumentError, "invalid domain: #{str.dump}"
+      end
+      @domain = str
+    end
+
     # Time at which this cookie expires, as a +Time+
     attr_accessor :expires
     # True if this cookie is secure; false otherwise
diff --git a/lib/cgi/core.rb b/lib/cgi/core.rb
index bec76e0749..62e606837a 100644
--- a/lib/cgi/core.rb
+++ b/lib/cgi/core.rb
@@ -188,17 +188,28 @@ class CGI
   # Using #header with the HTML5 tag maker will create a <header> element.
   alias :header :http_header
 
+  def _no_crlf_check(str)
+    if str
+      str = str.to_s
+      raise "A HTTP status or header field must not include CR and LF" if str =~ /[\r\n]/
+      str
+    else
+      nil
+    end
+  end
+  private :_no_crlf_check
+
   def _header_for_string(content_type) #:nodoc:
     buf = ''.dup
     if nph?()
-      buf << "#{$CGI_ENV['SERVER_PROTOCOL'] || 'HTTP/1.0'} 200 OK#{EOL}"
+      buf << "#{_no_crlf_check($CGI_ENV['SERVER_PROTOCOL']) || 'HTTP/1.0'} 200 OK#{EOL}"
       buf << "Date: #{CGI.rfc1123_date(Time.now)}#{EOL}"
-      buf << "Server: #{$CGI_ENV['SERVER_SOFTWARE']}#{EOL}"
+      buf << "Server: #{_no_crlf_check($CGI_ENV['SERVER_SOFTWARE'])}#{EOL}"
       buf << "Connection: close#{EOL}"
     end
-    buf << "Content-Type: #{content_type}#{EOL}"
+    buf << "Content-Type: #{_no_crlf_check(content_type)}#{EOL}"
     if @output_cookies
-      @output_cookies.each {|cookie| buf << "Set-Cookie: #{cookie}#{EOL}" }
+      @output_cookies.each {|cookie| buf << "Set-Cookie: #{_no_crlf_check(cookie)}#{EOL}" }
     end
     return buf
   end # _header_for_string
@@ -213,9 +224,9 @@ class CGI
     ## NPH
     options.delete('nph') if defined?(MOD_RUBY)
     if options.delete('nph') || nph?()
-      protocol = $CGI_ENV['SERVER_PROTOCOL'] || 'HTTP/1.0'
+      protocol = _no_crlf_check($CGI_ENV['SERVER_PROTOCOL']) || 'HTTP/1.0'
       status = options.delete('status')
-      status = HTTP_STATUS[status] || status || '200 OK'
+      status = HTTP_STATUS[status] || _no_crlf_check(status) || '200 OK'
       buf << "#{protocol} #{status}#{EOL}"
       buf << "Date: #{CGI.rfc1123_date(Time.now)}#{EOL}"
       options['server'] ||= $CGI_ENV['SERVER_SOFTWARE'] || ''
@@ -223,38 +234,38 @@ class CGI
     end
     ## common headers
     status = options.delete('status')
-    buf << "Status: #{HTTP_STATUS[status] || status}#{EOL}" if status
+    buf << "Status: #{HTTP_STATUS[status] || _no_crlf_check(status)}#{EOL}" if status
     server = options.delete('server')
-    buf << "Server: #{server}#{EOL}" if server
+    buf << "Server: #{_no_crlf_check(server)}#{EOL}" if server
     connection = options.delete('connection')
-    buf << "Connection: #{connection}#{EOL}" if connection
+    buf << "Connection: #{_no_crlf_check(connection)}#{EOL}" if connection
     type = options.delete('type')
-    buf << "Content-Type: #{type}#{EOL}" #if type
+    buf << "Content-Type: #{_no_crlf_check(type)}#{EOL}" #if type
     length = options.delete('length')
-    buf << "Content-Length: #{length}#{EOL}" if length
+    buf << "Content-Length: #{_no_crlf_check(length)}#{EOL}" if length
     language = options.delete('language')
-    buf << "Content-Language: #{language}#{EOL}" if language
+    buf << "Content-Language: #{_no_crlf_check(language)}#{EOL}" if language
     expires = options.delete('expires')
     buf << "Expires: #{CGI.rfc1123_date(expires)}#{EOL}" if expires
     ## cookie
     if cookie = options.delete('cookie')
       case cookie
       when String, Cookie
-        buf << "Set-Cookie: #{cookie}#{EOL}"
+        buf << "Set-Cookie: #{_no_crlf_check(cookie)}#{EOL}"
       when Array
         arr = cookie
-        arr.each {|c| buf << "Set-Cookie: #{c}#{EOL}" }
+        arr.each {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" }
       when Hash
         hash = cookie
-        hash.each_value {|c| buf << "Set-Cookie: #{c}#{EOL}" }
+        hash.each_value {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" }
       end
     end
     if @output_cookies
-      @output_cookies.each {|c| buf << "Set-Cookie: #{c}#{EOL}" }
+      @output_cookies.each {|c| buf << "Set-Cookie: #{_no_crlf_check(c)}#{EOL}" }
     end
     ## other headers
     options.each do |key, value|
-      buf << "#{key}: #{value}#{EOL}"
+      buf << "#{_no_crlf_check(key)}: #{_no_crlf_check(value)}#{EOL}"
     end
     return buf
   end # _header_for_hash
diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb
index 55e61bf984..5a5c77ac97 100644
--- a/lib/cgi/util.rb
+++ b/lib/cgi/util.rb
@@ -5,24 +5,57 @@ class CGI
   extend Util
 end
 module CGI::Util
-  @@accept_charset="UTF-8" unless defined?(@@accept_charset)
-  # URL-encode a string.
+  @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
+
+  # URL-encode a string into application/x-www-form-urlencoded.
+  # Space characters (+" "+) are encoded with plus signs (+"+"+)
   #   url_encoded_string = CGI.escape("'Stop!' said Fred")
   #      # => "%27Stop%21%27+said+Fred"
   def escape(string)
     encoding = string.encoding
-    string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
+    buffer = string.b
+    buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
       '%' + m.unpack('H2' * m.bytesize).join('%').upcase
-    end.tr(' ', '+').force_encoding(encoding)
+    end
+    buffer.tr!(' ', '+')
+    buffer.force_encoding(encoding)
   end
 
-  # URL-decode a string with encoding(optional).
+  # URL-decode an application/x-www-form-urlencoded string with encoding(optional).
   #   string = CGI.unescape("%27Stop%21%27+said+Fred")
   #      # => "'Stop!' said Fred"
-  def unescape(string,encoding=@@accept_charset)
-    str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
+  def unescape(string, encoding = @@accept_charset)
+    str = string.tr('+', ' ')
+    str = str.b
+    str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
+      [m.delete('%')].pack('H*')
+    end
+    str.force_encoding(encoding)
+    str.valid_encoding? ? str : str.force_encoding(string.encoding)
+  end
+
+  # URL-encode a string following RFC 3986
+  # Space characters (+" "+) are encoded with (+"%20"+)
+  #   url_encoded_string = CGI.escape("'Stop!' said Fred")
+  #      # => "%27Stop%21%27%20said%20Fred"
+  def escapeURIComponent(string)
+    encoding = string.encoding
+    buffer = string.b
+    buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
+      '%' + m.unpack('H2' * m.bytesize).join('%').upcase
+    end
+    buffer.force_encoding(encoding)
+  end
+
+  # URL-decode a string following RFC 3986 with encoding(optional).
+  #   string = CGI.unescape("%27Stop%21%27+said%20Fred")
+  #      # => "'Stop!'+said Fred"
+  def unescapeURIComponent(string, encoding = @@accept_charset)
+    str = string.b
+    str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
       [m.delete('%')].pack('H*')
-    end.force_encoding(encoding)
+    end
+    str.force_encoding(encoding)
     str.valid_encoding? ? str : str.force_encoding(string.encoding)
   end
 
diff --git a/test/cgi/test_cgi_cookie.rb b/test/cgi/test_cgi_cookie.rb
index 985cc0d7a1..e3ec4bea52 100644
--- a/test/cgi/test_cgi_cookie.rb
+++ b/test/cgi/test_cgi_cookie.rb
@@ -60,6 +60,24 @@ class CGICookieTest < Test::Unit::TestCase
   end
 
 
+  def test_cgi_cookie_new_with_domain
+    h = {'name'=>'name1', 'value'=>'value1'}
+    cookie = CGI::Cookie.new('domain'=>'a.example.com', **h)
+    assert_equal('a.example.com', cookie.domain)
+
+    cookie = CGI::Cookie.new('domain'=>'1.example.com', **h)
+    assert_equal('1.example.com', cookie.domain, 'enhanced by RFC 1123')
+
+    assert_raise(ArgumentError) {
+      CGI::Cookie.new('domain'=>'-a.example.com', **h)
+    }
+
+    assert_raise(ArgumentError) {
+      CGI::Cookie.new('domain'=>'a-.example.com', **h)
+    }
+  end
+
+
   def test_cgi_cookie_scriptname
     cookie = CGI::Cookie.new('name1', 'value1')
     assert_equal('', cookie.path)
@@ -118,6 +136,70 @@ class CGICookieTest < Test::Unit::TestCase
   end
 
 
+  def test_cgi_cookie_domain_injection_into_name
+    name = "a=b; domain=example.com;"
+    path = "/"
+    domain = "example.jp"
+    assert_raise(ArgumentError) do
+      CGI::Cookie.new('name' => name,
+                      'value' => "value",
+                      'domain' => domain,
+                      'path' => path)
+    end
+  end
+
+
+  def test_cgi_cookie_newline_injection_into_name
+    name = "a=b;\r\nLocation: http://example.com#"
+    path = "/"
+    domain = "example.jp"
+    assert_raise(ArgumentError) do
+      CGI::Cookie.new('name' => name,
+                      'value' => "value",
+                      'domain' => domain,
+                      'path' => path)
+    end
+  end
+
+
+  def test_cgi_cookie_multibyte_injection_into_name
+    name = "a=b;\u3042"
+    path = "/"
+    domain = "example.jp"
+    assert_raise(ArgumentError) do
+      CGI::Cookie.new('name' => name,
+                      'value' => "value",
+                      'domain' => domain,
+                      'path' => path)
+    end
+  end
+
+
+  def test_cgi_cookie_injection_into_path
+    name = "name"
+    path = "/; samesite=none"
+    domain = "example.jp"
+    assert_raise(ArgumentError) do
+      CGI::Cookie.new('name' => name,
+                      'value' => "value",
+                      'domain' => domain,
+                      'path' => path)
+    end
+  end
+
+
+  def test_cgi_cookie_injection_into_domain
+    name = "name"
+    path = "/"
+    domain = "example.jp; samesite=none"
+    assert_raise(ArgumentError) do
+      CGI::Cookie.new('name' => name,
+                      'value' => "value",
+                      'domain' => domain,
+                      'path' => path)
+    end
+  end
+
 
   instance_methods.each do |method|
     private method if method =~ /^test_(.*)/ && $1 != ENV['TEST']
diff --git a/test/cgi/test_cgi_header.rb b/test/cgi/test_cgi_header.rb
index bab2d0348a..ec2f4deb72 100644
--- a/test/cgi/test_cgi_header.rb
+++ b/test/cgi/test_cgi_header.rb
@@ -176,6 +176,14 @@ class CGIHeaderTest < Test::Unit::TestCase
   end
 
 
+  def test_cgi_http_header_crlf_injection
+    cgi = CGI.new
+    assert_raise(RuntimeError) { cgi.http_header("text/xhtml\r\nBOO") }
+    assert_raise(RuntimeError) { cgi.http_header("type" => "text/xhtml\r\nBOO") }
+    assert_raise(RuntimeError) { cgi.http_header("status" => "200 OK\r\nBOO") }
+    assert_raise(RuntimeError) { cgi.http_header("location" => "text/xhtml\r\nBOO") }
+  end
+
 
   instance_methods.each do |method|
     private method if method =~ /^test_(.*)/ && $1 != ENV['TEST']
diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb
index 5a2d07b328..a3be193a13 100644
--- a/test/cgi/test_cgi_util.rb
+++ b/test/cgi/test_cgi_util.rb
@@ -23,7 +23,6 @@ class CGIUtilTest < Test::Unit::TestCase
     ENV.update(@environ)
   end
 
-
   def test_cgi_escape
     assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1))
     assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding)
@@ -70,6 +69,54 @@ class CGIUtilTest < Test::Unit::TestCase
     end;
   end
 
+  def test_cgi_escapeURIComponent
+    assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1))
+    assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding)
+  end
+
+  def test_cgi_escapeURIComponent_with_unreserved_characters
+    assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~",
+                 CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"),
+                 "should not encode any unreserved characters, as per RFC3986 Section 2.3")
+  end
+
+  def test_cgi_escapeURIComponent_with_invalid_byte_sequence
+    assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")))
+  end
+
+  def test_cgi_escapeURIComponent_preserve_encoding
+    assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding)
+    assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding)
+    assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding)
+  end
+
+  def test_cgi_unescapeURIComponent
+    str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93')
+    assert_equal(@str1, str)
+    return unless defined?(::Encoding)
+
+    assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar"))
+
+    assert_equal(@str1.encoding, str.encoding)
+    assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2"))
+  end
+
+  def test_cgi_unescapeURIComponent_preserve_encoding
+    assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding)
+    assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding)
+    assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding)
+  end
+
+  def test_cgi_unescapeURIComponent_accept_charset
+    return unless defined?(::Encoding)
+
+    assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)}
+    assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}")
+    begin;
+      assert_equal("", CGI.unescapeURIComponent(''))
+    end;
+  end
+
   def test_cgi_pretty
     assert_equal("<HTML>\n  <BODY>\n  </BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>"))
     assert_equal("<HTML>\n\t<BODY>\n\t</BODY>\n</HTML>\n",CGI.pretty("<HTML><BODY></BODY></HTML>","\t"))
@@ -105,6 +152,7 @@ class CGIUtilTest < Test::Unit::TestCase
   end
 
   def test_cgi_escape_html_large
+    return if RUBY_ENGINE == 'jruby'
     ulong_max, size_max = RbConfig::LIMITS.values_at("ULONG_MAX", "SIZE_MAX")
     return unless ulong_max < size_max # Platforms not concerned
author	Hiroshi SHIBATA <hsbt@ruby-lang.org>	2022-11-22 13:50:06 +0900
committer	nagachika <nagachika@ruby-lang.org>	2022-11-22 23:04:45 +0900
commit	16ea2213295137bb25f10225408c54de452a507b (patch)
tree	079b14ad168284cef887490b76b87102be48f1d6
parent	14704023ec57546b1bed85d54f4f425986385ab4 (diff)