summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYusuke Endoh <mame@ruby-lang.org>2022-03-24 16:59:11 +0900
committerYusuke Endoh <mame@ruby-lang.org>2022-03-30 16:50:46 +0900
commitffc3b37f969a779f93b8f8a5b3591b4ef7de1538 (patch)
tree25b2d942e8eb2c4a73043773edfcfd6c0d709155
parent23530d68cb04aed9c2f59a050523b0193ee2d0c1 (diff)
re.c: Add Regexp.timeout= and Regexp.timeout
[Feature #17837]
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/5703
-rw-r--r--include/ruby/onigmo.h7
-rw-r--r--re.c88
-rw-r--r--regcomp.c3
-rw-r--r--regexec.c2
-rw-r--r--regint.h8
-rw-r--r--test/ruby/test_regexp.rb17
6 files changed, 125 insertions, 0 deletions
diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index 6187b37dc3..a7ef59c7c8 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -793,6 +793,13 @@ typedef struct re_pattern_buffer {
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
+ /* rb_hrtime_t from hrtime.h */
+#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
+ int128_t timelimit;
+#else
+ uint64_t timelimit;
+#endif
+
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
} OnigRegexType;
diff --git a/re.c b/re.c
index 9c1adbb0ff..50fa140fd0 100644
--- a/re.c
+++ b/re.c
@@ -14,12 +14,14 @@
#include <ctype.h>
#include "encindex.h"
+#include "hrtime.h"
#include "internal.h"
#include "internal/hash.h"
#include "internal/imemo.h"
#include "internal/re.h"
#include "internal/string.h"
#include "internal/variable.h"
+#include "ractor_core.h"
#include "regint.h"
#include "ruby/encoding.h"
#include "ruby/re.h"
@@ -1593,6 +1595,9 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
}
+ // inherit the timeout settings
+ rb_hrtime_t timelimit = reg->timelimit;
+
const char *ptr;
long len;
RSTRING_GETMEM(unescaped, ptr, len);
@@ -1604,6 +1609,8 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
}
+ reg->timelimit = timelimit;
+
RB_GC_GUARD(unescaped);
return reg;
}
@@ -4091,6 +4098,84 @@ re_warn(const char *s)
rb_warn("%s", s);
}
+// The process-global timeout for regexp matching
+rb_hrtime_t rb_reg_match_time_limit = 0;
+
+// This function is periodically called during regexp matching
+void
+rb_reg_check_timeout(regex_t *reg, void *end_time_)
+{
+ rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
+
+ if (*end_time == 0) {
+ // This is the first time to check interrupts;
+ // just measure the current time and determine the end time
+ // if timeout is set.
+ rb_hrtime_t timelimit = reg->timelimit;
+
+ if (!timelimit) {
+ // no per-object timeout.
+ timelimit = rb_reg_match_time_limit;
+ }
+
+ if (timelimit) {
+ *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
+ }
+ else {
+ // no timeout is set
+ *end_time = RB_HRTIME_MAX;
+ }
+ }
+ else {
+ if (*end_time < rb_hrtime_now()) {
+ // timeout is exceeded
+ rb_raise(rb_eRuntimeError, "regexp match timeout");
+ }
+ }
+}
+
+/*
+ * call-seq:
+ * Regexp.timeout -> int or float or nil
+ *
+ * It returns the current default timeout interval for Regexp matching in second.
+ * +nil+ means no default timeout configuration.
+ */
+
+static VALUE
+rb_reg_s_timeout_get(VALUE dummy)
+{
+ double d = hrtime2double(rb_reg_match_time_limit);
+ if (d == 0.0) return Qnil;
+ return DBL2NUM(d);
+}
+
+/*
+ * call-seq:
+ * Regexp.timeout = int or float or nil
+ *
+ * It sets the default timeout interval for Regexp matching in second.
+ * +nil+ means no default timeout configuration.
+ * This configuration is process-global. If you want to set timeout for
+ * each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
+ *
+ * Regexp.timeout = 1
+ * /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
+ */
+
+static VALUE
+rb_reg_s_timeout_set(VALUE dummy, VALUE limit)
+{
+ double timeout = NIL_P(limit) ? 0.0 : NUM2DBL(limit);
+
+ rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors");
+
+ if (timeout < 0) timeout = 0;
+ double2hrtime(&rb_reg_match_time_limit, timeout);
+
+ return limit;
+}
+
/*
* Document-class: RegexpError
*
@@ -4170,6 +4255,9 @@ Init_Regexp(void)
rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
+ rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
+ rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
+
/* see Regexp.options and Regexp.new */
rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
/* see Regexp.options and Regexp.new */
diff --git a/regcomp.c b/regcomp.c
index d51163103e..3e65c9d2e3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5973,6 +5973,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
(reg)->name_table = (void* )NULL;
(reg)->case_fold_flag = case_fold_flag;
+
+ (reg)->timelimit = 0;
+
return 0;
}
diff --git a/regexec.c b/regexec.c
index da17c04a55..c77d48b1d9 100644
--- a/regexec.c
+++ b/regexec.c
@@ -422,6 +422,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).gpos = (arg_gpos);\
(msa).best_len = ONIG_MISMATCH;\
(msa).counter = 0;\
+ (msa).end_time = 0;\
} while(0)
#else
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
@@ -431,6 +432,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).start = (arg_start);\
(msa).gpos = (arg_gpos);\
(msa).counter = 0;\
+ (msa).end_time = 0;\
} while(0)
#endif
diff --git a/regint.h b/regint.h
index 0e9777cc1e..6c88f278c1 100644
--- a/regint.h
+++ b/regint.h
@@ -152,6 +152,7 @@
msa->counter++; \
if (msa->counter >= 128) { \
msa->counter = 0; \
+ rb_reg_check_timeout(reg, &msa->end_time); \
rb_thread_check_ints(); \
} \
} while(0)
@@ -877,6 +878,12 @@ typedef struct {
int state_check_buff_size;
#endif
int counter;
+ /* rb_hrtime_t from hrtime.h */
+#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
+ int128_t end_time;
+#else
+ uint64_t end_time;
+#endif
} OnigMatchArg;
@@ -942,6 +949,7 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
#ifdef RUBY
extern size_t onig_memsize(const regex_t *reg);
extern size_t onig_region_memsize(const struct re_registers *regs);
+void rb_reg_check_timeout(regex_t *reg, void *end_time);
#endif
RUBY_SYMBOL_EXPORT_END
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 94098a850d..7bcddc6e07 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -1457,4 +1457,21 @@ class TestRegexp < Test::Unit::TestCase
}
assert_empty(errs, msg)
end
+
+ def test_s_timeout
+ assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
+ begin;
+ Regexp.timeout = 0.2
+ assert_equal(0.2, Regexp.timeout)
+
+ t = Time.now
+ assert_raise_with_message(RuntimeError, "regexp match timeout") do
+ # A typical ReDoS case
+ /^(a*)*$/ =~ "a" * 1000000 + "x"
+ end
+ t = Time.now - t
+
+ assert_in_delta(0.2, t, 0.1)
+ end;
+ end
end