1 files changed, 81 insertions, 15 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index 068647747d..495ad83aa3 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -83,7 +83,7 @@ optimized_unescape_html(VALUE str)
     unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX :
                                strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 :
                                128);
-    long i, len, beg = 0;
+    long i, j, len, beg = 0;
     size_t clen, plen;
     int overflow;
     const char *cstr;
@@ -100,6 +100,7 @@ optimized_unescape_html(VALUE str)
         plen = i - beg;
         if (++i >= len) break;
         c = (unsigned char)cstr[i];
+        j = i;
 #define MATCH(s) (len - i >= (int)rb_strlen_lit(s) && \
                   memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
                   (i += rb_strlen_lit(s) - 1, 1))
@@ -112,28 +113,40 @@ optimized_unescape_html(VALUE str)
             else if (MATCH("mp;")) {
                 c = '&';
             }
-            else continue;
+            else {
+                i = j;
+                continue;
+            }
             break;
           case 'q':
             ++i;
             if (MATCH("uot;")) {
                 c = '"';
             }
-            else continue;
+            else {
+                i = j;
+                continue;
+            }
             break;
           case 'g':
             ++i;
             if (MATCH("t;")) {
                 c = '>';
             }
-            else continue;
+            else {
+                i = j;
+                continue;
+            }
             break;
           case 'l':
             ++i;
             if (MATCH("t;")) {
                 c = '<';
             }
-            else continue;
+            else {
+                i = j;
+                continue;
+            }
             break;
           case '#':
             if (len - ++i >= 2 && ISDIGIT(cstr[i])) {
@@ -142,9 +155,15 @@ optimized_unescape_html(VALUE str)
             else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) {
                 cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow);
             }
-            else continue;
+            else {
+                i = j;
+                continue;
+            }
             i += clen;
-            if (overflow || cc >= charlimit || cstr[i] != ';') continue;
+            if (overflow || cc >= charlimit || cstr[i] != ';') {
+                i = j;
+                continue;
+            }
             if (!dest) {
                 dest = rb_str_buf_new(len);
             }
@@ -200,7 +219,7 @@ url_unreserved_char(unsigned char c)
 }
 
 static VALUE
-optimized_escape(VALUE str)
+optimized_escape(VALUE str, int plus_escape)
 {
     long i, len, beg = 0;
     VALUE dest = 0;
@@ -220,7 +239,7 @@ optimized_escape(VALUE str)
             rb_str_cat(dest, cstr + beg, i - beg);
             beg = i + 1;
 
-            if (c == ' ') {
+            if (plus_escape && c == ' ') {
                 rb_str_cat_cstr(dest, "+");
             }
             else {
@@ -242,7 +261,7 @@ optimized_escape(VALUE str)
 }
 
 static VALUE
-optimized_unescape(VALUE str, VALUE encoding)
+optimized_unescape(VALUE str, VALUE encoding, int unescape_plus)
 {
     long i, len, beg = 0;
     VALUE dest = 0;
@@ -265,7 +284,7 @@ optimized_unescape(VALUE str, VALUE encoding)
                       | char_to_number(cstr[i+2]));
             clen = 2;
         }
-        else if (c == '+') {
+        else if (unescape_plus && c == '+') {
             buf[0] = ' ';
         }
         else {
@@ -348,7 +367,7 @@ cgiesc_unescape_html(VALUE self, VALUE str)
  *  call-seq:
  *     CGI.escape(string) -> string
  *
- *  Returns URL-escaped string.
+ *  Returns URL-escaped string (+application/x-www-form-urlencoded+).
  *
  */
 static VALUE
@@ -357,7 +376,7 @@ cgiesc_escape(VALUE self, VALUE str)
     StringValue(str);
 
     if (rb_enc_str_asciicompat_p(str)) {
-        return optimized_escape(str);
+        return optimized_escape(str, 1);
     }
     else {
         return rb_call_super(1, &str);
@@ -376,7 +395,7 @@ accept_charset(int argc, VALUE *argv, VALUE self)
  *  call-seq:
  *     CGI.unescape(string, encoding=@@accept_charset) -> string
  *
- *  Returns URL-unescaped string.
+ *  Returns URL-unescaped string (+application/x-www-form-urlencoded+).
  *
  */
 static VALUE
@@ -388,7 +407,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)
 
     if (rb_enc_str_asciicompat_p(str)) {
         VALUE enc = accept_charset(argc-1, argv+1, self);
-        return optimized_unescape(str, enc);
+        return optimized_unescape(str, enc, 1);
+    }
+    else {
+        return rb_call_super(argc, argv);
+    }
+}
+
+/*
+ *  call-seq:
+ *     CGI.escapeURIComponent(string) -> string
+ *
+ *  Returns URL-escaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_escape_uri_component(VALUE self, VALUE str)
+{
+    StringValue(str);
+
+    if (rb_enc_str_asciicompat_p(str)) {
+        return optimized_escape(str, 0);
+    }
+    else {
+        return rb_call_super(1, &str);
+    }
+}
+
+/*
+ *  call-seq:
+ *     CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string
+ *
+ *  Returns URL-unescaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self)
+{
+    VALUE str = (rb_check_arity(argc, 1, 2), argv[0]);
+
+    StringValue(str);
+
+    if (rb_enc_str_asciicompat_p(str)) {
+        VALUE enc = accept_charset(argc-1, argv+1, self);
+        return optimized_unescape(str, enc, 0);
     }
     else {
         return rb_call_super(argc, argv);
@@ -414,6 +476,10 @@ InitVM_escape(void)
     rb_mUtil   = rb_define_module_under(rb_cCGI, "Util");
     rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
     rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
+    rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
+    rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent");
+    rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
+    rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent");
     rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
     rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
     rb_prepend_module(rb_mUtil, rb_mEscape);