summaryrefslogtreecommitdiff
path: root/ext/cgi
diff options
context:
space:
mode:
authorTakashi Kokubun <takashikkbn@gmail.com>2019-06-04 19:58:39 +0900
committerTakashi Kokubun <takashikkbn@gmail.com>2019-06-05 10:08:55 +0900
commit8d81e59aa7a62652caf85f9c8db371703668c149 (patch)
treec30a047f31e5a7d2fb02158831c61cd191b566b2 /ext/cgi
parentb31e1b4a7c5b97d861c2b34ed4ccd982d5c6cb82 (diff)
Optimize CGI.escapeHTML by reducing buffer extension
and switch-case branches. Buffer allocation optimization using `ALLOCA_N` would be the main benefit of patch. It eliminates the O(N) buffer extensions. It also reduces the number of branches using escape table like https://mattn.kaoriya.net/software/lang/c/20160817011915.htm. Closes: https://github.com/ruby/ruby/pull/2226 Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org> Co-authored-by: Yasuhiro MATSUMOTO <mattn.jp@gmail.com>
Diffstat (limited to 'ext/cgi')
-rw-r--r--ext/cgi/escape/escape.c82
1 files changed, 33 insertions, 49 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index ced1b18..9b64c35 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -11,27 +11,20 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
static ID id_accept_charset;
-static void
-html_escaped_cat(VALUE str, char c)
-{
- switch (c) {
- case '\'':
- rb_str_cat_cstr(str, "&#39;");
- break;
- case '&':
- rb_str_cat_cstr(str, "&amp;");
- break;
- case '"':
- rb_str_cat_cstr(str, "&quot;");
- break;
- case '<':
- rb_str_cat_cstr(str, "&lt;");
- break;
- case '>':
- rb_str_cat_cstr(str, "&gt;");
- break;
- }
-}
+#define HTML_ESCAPE_MAX_LEN 6
+
+static const struct {
+ uint8_t len;
+ char str[HTML_ESCAPE_MAX_LEN+1];
+} html_escape_table[UCHAR_MAX+1] = {
+#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str}
+ HTML_ESCAPE('\'', "&#39;"),
+ HTML_ESCAPE('&', "&amp;"),
+ HTML_ESCAPE('"', "&quot;"),
+ HTML_ESCAPE('<', "&lt;"),
+ HTML_ESCAPE('>', "&gt;"),
+#undef HTML_ESCAPE
+};
static inline void
preserve_original_state(VALUE orig, VALUE dest)
@@ -44,36 +37,27 @@ preserve_original_state(VALUE orig, VALUE dest)
static VALUE
optimized_escape_html(VALUE str)
{
- long i, len, beg = 0;
- VALUE dest = 0;
- const char *cstr;
-
- len = RSTRING_LEN(str);
- cstr = RSTRING_PTR(str);
-
- for (i = 0; i < len; i++) {
- switch (cstr[i]) {
- case '\'':
- case '&':
- case '"':
- case '<':
- case '>':
- if (!dest) {
- dest = rb_str_buf_new(len);
- }
-
- rb_str_cat(dest, cstr + beg, i - beg);
- beg = i + 1;
-
- html_escaped_cat(dest, cstr[i]);
- break;
- }
+ const char *cstr = RSTRING_PTR(str);
+ const char *end = cstr + RSTRING_LEN(str);
+ char *buf = ALLOCA_N(char, RSTRING_LEN(str) * HTML_ESCAPE_MAX_LEN);
+
+ char *dest = buf;
+ while (cstr < end) {
+ const unsigned char c = *cstr++;
+ uint8_t len = html_escape_table[c].len;
+ if (len) {
+ memcpy(dest, html_escape_table[c].str, len);
+ dest += len;
+ }
+ else {
+ *dest++ = c;
+ }
}
- if (dest) {
- rb_str_cat(dest, cstr + beg, len - beg);
- preserve_original_state(str, dest);
- return dest;
+ if (RSTRING_LEN(str) < (dest - buf)) {
+ VALUE escaped = rb_str_new(buf, dest - buf);
+ preserve_original_state(str, escaped);
+ return escaped;
}
else {
return rb_str_dup(str);