summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorTakashi Kokubun <takashikkbn@gmail.com>2019-06-05 19:28:51 +0900
committerTakashi Kokubun <takashikkbn@gmail.com>2019-06-05 21:07:04 +0900
commit0a29dc87e62c701db56816cb430daf07a4f02bea (patch)
tree8c4afbace1e6a42d36e0401d2f8b1ddc7bdea373 /ext
parentf3c877e8deaea91ff27c0fca837c9388d030a896 (diff)
Optimize CGI.escapeHTML by reducing buffer extension
and switch-case branches. Buffer allocation optimization using `ALLOCA_N` would be the main benefit of patch. It eliminates the O(N) buffer extensions. It also reduces the number of branches using escape table like https://mattn.kaoriya.net/software/lang/c/20160817011915.htm. Closes: https://github.com/ruby/ruby/pull/2226 Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org> Co-authored-by: Yasuhiro MATSUMOTO <mattn.jp@gmail.com>
Diffstat (limited to 'ext')
-rw-r--r--ext/cgi/escape/escape.c85
1 files changed, 36 insertions, 49 deletions
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index 78d196db71..76d8f0d067 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -11,27 +11,20 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
static ID id_accept_charset;
-static void
-html_escaped_cat(VALUE str, char c)
-{
- switch (c) {
- case '\'':
- rb_str_cat_cstr(str, "&#39;");
- break;
- case '&':
- rb_str_cat_cstr(str, "&amp;");
- break;
- case '"':
- rb_str_cat_cstr(str, "&quot;");
- break;
- case '<':
- rb_str_cat_cstr(str, "&lt;");
- break;
- case '>':
- rb_str_cat_cstr(str, "&gt;");
- break;
- }
-}
+#define HTML_ESCAPE_MAX_LEN 6
+
+static const struct {
+ uint8_t len;
+ char str[HTML_ESCAPE_MAX_LEN+1];
+} html_escape_table[UCHAR_MAX+1] = {
+#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str}
+ HTML_ESCAPE('\'', "&#39;"),
+ HTML_ESCAPE('&', "&amp;"),
+ HTML_ESCAPE('"', "&quot;"),
+ HTML_ESCAPE('<', "&lt;"),
+ HTML_ESCAPE('>', "&gt;"),
+#undef HTML_ESCAPE
+};
static inline void
preserve_original_state(VALUE orig, VALUE dest)
@@ -44,40 +37,34 @@ preserve_original_state(VALUE orig, VALUE dest)
static VALUE
optimized_escape_html(VALUE str)
{
- long i, len, beg = 0;
- VALUE dest = 0;
- const char *cstr;
-
- len = RSTRING_LEN(str);
- cstr = RSTRING_PTR(str);
-
- for (i = 0; i < len; i++) {
- switch (cstr[i]) {
- case '\'':
- case '&':
- case '"':
- case '<':
- case '>':
- if (!dest) {
- dest = rb_str_buf_new(len);
- }
-
- rb_str_cat(dest, cstr + beg, i - beg);
- beg = i + 1;
-
- html_escaped_cat(dest, cstr[i]);
- break;
+ VALUE vbuf;
+ char *buf = ALLOCV_N(char, vbuf, RSTRING_LEN(str) * HTML_ESCAPE_MAX_LEN);
+ const char *cstr = RSTRING_PTR(str);
+ const char *end = cstr + RSTRING_LEN(str);
+
+ char *dest = buf;
+ while (cstr < end) {
+ const unsigned char c = *cstr++;
+ uint8_t len = html_escape_table[c].len;
+ if (len) {
+ memcpy(dest, html_escape_table[c].str, len);
+ dest += len;
+ }
+ else {
+ *dest++ = c;
}
}
- if (dest) {
- rb_str_cat(dest, cstr + beg, len - beg);
- preserve_original_state(str, dest);
- return dest;
+ VALUE escaped;
+ if (RSTRING_LEN(str) < (dest - buf)) {
+ escaped = rb_str_new(buf, dest - buf);
+ preserve_original_state(str, escaped);
}
else {
- return rb_str_dup(str);
+ escaped = rb_str_dup(str);
}
+ ALLOCV_END(vbuf);
+ return escaped;
}
static VALUE