diff options
Diffstat (limited to 'ext/cgi/escape/escape.c')
-rw-r--r-- | ext/cgi/escape/escape.c | 96 |
1 files changed, 81 insertions, 15 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 068647747d..495ad83aa3 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -83,7 +83,7 @@ optimized_unescape_html(VALUE str) unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX : strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 : 128); - long i, len, beg = 0; + long i, j, len, beg = 0; size_t clen, plen; int overflow; const char *cstr; @@ -100,6 +100,7 @@ optimized_unescape_html(VALUE str) plen = i - beg; if (++i >= len) break; c = (unsigned char)cstr[i]; + j = i; #define MATCH(s) (len - i >= (int)rb_strlen_lit(s) && \ memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \ (i += rb_strlen_lit(s) - 1, 1)) @@ -112,28 +113,40 @@ optimized_unescape_html(VALUE str) else if (MATCH("mp;")) { c = '&'; } - else continue; + else { + i = j; + continue; + } break; case 'q': ++i; if (MATCH("uot;")) { c = '"'; } - else continue; + else { + i = j; + continue; + } break; case 'g': ++i; if (MATCH("t;")) { c = '>'; } - else continue; + else { + i = j; + continue; + } break; case 'l': ++i; if (MATCH("t;")) { c = '<'; } - else continue; + else { + i = j; + continue; + } break; case '#': if (len - ++i >= 2 && ISDIGIT(cstr[i])) { @@ -142,9 +155,15 @@ optimized_unescape_html(VALUE str) else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) { cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow); } - else continue; + else { + i = j; + continue; + } i += clen; - if (overflow || cc >= charlimit || cstr[i] != ';') continue; + if (overflow || cc >= charlimit || cstr[i] != ';') { + i = j; + continue; + } if (!dest) { dest = rb_str_buf_new(len); } @@ -200,7 +219,7 @@ url_unreserved_char(unsigned char c) } static VALUE -optimized_escape(VALUE str) +optimized_escape(VALUE str, int plus_escape) { long i, len, beg = 0; VALUE dest = 0; @@ -220,7 +239,7 @@ optimized_escape(VALUE str) rb_str_cat(dest, cstr + beg, i - beg); beg = i + 1; - if (c == ' ') { + if (plus_escape && c == ' ') { rb_str_cat_cstr(dest, "+"); } else { @@ -242,7 +261,7 @@ optimized_escape(VALUE str) } static VALUE -optimized_unescape(VALUE str, VALUE encoding) +optimized_unescape(VALUE str, VALUE encoding, int unescape_plus) { long i, len, beg = 0; VALUE dest = 0; @@ -265,7 +284,7 @@ optimized_unescape(VALUE str, VALUE encoding) | char_to_number(cstr[i+2])); clen = 2; } - else if (c == '+') { + else if (unescape_plus && c == '+') { buf[0] = ' '; } else { @@ -348,7 +367,7 @@ cgiesc_unescape_html(VALUE self, VALUE str) * call-seq: * CGI.escape(string) -> string * - * Returns URL-escaped string. + * Returns URL-escaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -357,7 +376,7 @@ cgiesc_escape(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_escape(str); + return optimized_escape(str, 1); } else { return rb_call_super(1, &str); @@ -376,7 +395,7 @@ accept_charset(int argc, VALUE *argv, VALUE self) * call-seq: * CGI.unescape(string, encoding=@@accept_charset) -> string * - * Returns URL-unescaped string. + * Returns URL-unescaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -388,7 +407,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self) if (rb_enc_str_asciicompat_p(str)) { VALUE enc = accept_charset(argc-1, argv+1, self); - return optimized_unescape(str, enc); + return optimized_unescape(str, enc, 1); + } + else { + return rb_call_super(argc, argv); + } +} + +/* + * call-seq: + * CGI.escapeURIComponent(string) -> string + * + * Returns URL-escaped string following RFC 3986. + * + */ +static VALUE +cgiesc_escape_uri_component(VALUE self, VALUE str) +{ + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_escape(str, 0); + } + else { + return rb_call_super(1, &str); + } +} + +/* + * call-seq: + * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string + * + * Returns URL-unescaped string following RFC 3986. + * + */ +static VALUE +cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self) +{ + VALUE str = (rb_check_arity(argc, 1, 2), argv[0]); + + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + VALUE enc = accept_charset(argc-1, argv+1, self); + return optimized_unescape(str, enc, 0); } else { return rb_call_super(argc, argv); @@ -414,6 +476,10 @@ InitVM_escape(void) rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); + rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent"); + rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); + rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent"); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); rb_prepend_module(rb_mUtil, rb_mEscape); |