summaryrefslogtreecommitdiff
path: root/ext/cgi/escape/escape.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/cgi/escape/escape.c')
-rw-r--r--ext/cgi/escape/escape.c96
1 files changed, 81 insertions, 15 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index 068647747d..495ad83aa3 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -83,7 +83,7 @@ optimized_unescape_html(VALUE str)
unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX :
strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 :
128);
- long i, len, beg = 0;
+ long i, j, len, beg = 0;
size_t clen, plen;
int overflow;
const char *cstr;
@@ -100,6 +100,7 @@ optimized_unescape_html(VALUE str)
plen = i - beg;
if (++i >= len) break;
c = (unsigned char)cstr[i];
+ j = i;
#define MATCH(s) (len - i >= (int)rb_strlen_lit(s) && \
memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
(i += rb_strlen_lit(s) - 1, 1))
@@ -112,28 +113,40 @@ optimized_unescape_html(VALUE str)
else if (MATCH("mp;")) {
c = '&';
}
- else continue;
+ else {
+ i = j;
+ continue;
+ }
break;
case 'q':
++i;
if (MATCH("uot;")) {
c = '"';
}
- else continue;
+ else {
+ i = j;
+ continue;
+ }
break;
case 'g':
++i;
if (MATCH("t;")) {
c = '>';
}
- else continue;
+ else {
+ i = j;
+ continue;
+ }
break;
case 'l':
++i;
if (MATCH("t;")) {
c = '<';
}
- else continue;
+ else {
+ i = j;
+ continue;
+ }
break;
case '#':
if (len - ++i >= 2 && ISDIGIT(cstr[i])) {
@@ -142,9 +155,15 @@ optimized_unescape_html(VALUE str)
else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) {
cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow);
}
- else continue;
+ else {
+ i = j;
+ continue;
+ }
i += clen;
- if (overflow || cc >= charlimit || cstr[i] != ';') continue;
+ if (overflow || cc >= charlimit || cstr[i] != ';') {
+ i = j;
+ continue;
+ }
if (!dest) {
dest = rb_str_buf_new(len);
}
@@ -200,7 +219,7 @@ url_unreserved_char(unsigned char c)
}
static VALUE
-optimized_escape(VALUE str)
+optimized_escape(VALUE str, int plus_escape)
{
long i, len, beg = 0;
VALUE dest = 0;
@@ -220,7 +239,7 @@ optimized_escape(VALUE str)
rb_str_cat(dest, cstr + beg, i - beg);
beg = i + 1;
- if (c == ' ') {
+ if (plus_escape && c == ' ') {
rb_str_cat_cstr(dest, "+");
}
else {
@@ -242,7 +261,7 @@ optimized_escape(VALUE str)
}
static VALUE
-optimized_unescape(VALUE str, VALUE encoding)
+optimized_unescape(VALUE str, VALUE encoding, int unescape_plus)
{
long i, len, beg = 0;
VALUE dest = 0;
@@ -265,7 +284,7 @@ optimized_unescape(VALUE str, VALUE encoding)
| char_to_number(cstr[i+2]));
clen = 2;
}
- else if (c == '+') {
+ else if (unescape_plus && c == '+') {
buf[0] = ' ';
}
else {
@@ -348,7 +367,7 @@ cgiesc_unescape_html(VALUE self, VALUE str)
* call-seq:
* CGI.escape(string) -> string
*
- * Returns URL-escaped string.
+ * Returns URL-escaped string (+application/x-www-form-urlencoded+).
*
*/
static VALUE
@@ -357,7 +376,7 @@ cgiesc_escape(VALUE self, VALUE str)
StringValue(str);
if (rb_enc_str_asciicompat_p(str)) {
- return optimized_escape(str);
+ return optimized_escape(str, 1);
}
else {
return rb_call_super(1, &str);
@@ -376,7 +395,7 @@ accept_charset(int argc, VALUE *argv, VALUE self)
* call-seq:
* CGI.unescape(string, encoding=@@accept_charset) -> string
*
- * Returns URL-unescaped string.
+ * Returns URL-unescaped string (+application/x-www-form-urlencoded+).
*
*/
static VALUE
@@ -388,7 +407,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)
if (rb_enc_str_asciicompat_p(str)) {
VALUE enc = accept_charset(argc-1, argv+1, self);
- return optimized_unescape(str, enc);
+ return optimized_unescape(str, enc, 1);
+ }
+ else {
+ return rb_call_super(argc, argv);
+ }
+}
+
+/*
+ * call-seq:
+ * CGI.escapeURIComponent(string) -> string
+ *
+ * Returns URL-escaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_escape_uri_component(VALUE self, VALUE str)
+{
+ StringValue(str);
+
+ if (rb_enc_str_asciicompat_p(str)) {
+ return optimized_escape(str, 0);
+ }
+ else {
+ return rb_call_super(1, &str);
+ }
+}
+
+/*
+ * call-seq:
+ * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string
+ *
+ * Returns URL-unescaped string following RFC 3986.
+ *
+ */
+static VALUE
+cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self)
+{
+ VALUE str = (rb_check_arity(argc, 1, 2), argv[0]);
+
+ StringValue(str);
+
+ if (rb_enc_str_asciicompat_p(str)) {
+ VALUE enc = accept_charset(argc-1, argv+1, self);
+ return optimized_unescape(str, enc, 0);
}
else {
return rb_call_super(argc, argv);
@@ -414,6 +476,10 @@ InitVM_escape(void)
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
+ rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
+ rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent");
+ rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
+ rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent");
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mUtil, rb_mEscape);