summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornormal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-07-18 02:29:59 +0000
committernormal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-07-18 02:29:59 +0000
commitd04c085b3c39d7e3893620c8c64aaf89269d2d2c (patch)
tree187b42aea525bc09a90e51111c161aee5cc0043f /string.c
parented831a6c5b6a4f7331ffba5f8b4a2f1d079217ab (diff)
hash: keep fstrings of tainted strings for string keys
The same hash keys may be loaded from tainted data sources frequently (e.g. parsing headers from socket or loading YAML data from a file). If a non-tainted fstring already exists (because the application expects the hash key), cache and deduplicate the tainted version in the new tainted_frozen_strings table. For non-embedded strings, this also allows sharing with the underlying malloc-ed data. * vm_core.h (rb_vm_struct): add tainted_frozen_strings * vm.c (ruby_vm_destruct): free tainted_frozen_strings (Init_vm_objects): initialize tainted_frozen_strings (rb_vm_tfstring_table): accessor for tainted_frozen_strings * internal.h: declare rb_fstring_existing, rb_vm_tfstring_table * hash.c (fstring_existing_str): remove (moved to string.c) (hash_aset_str): use rb_fstring_existing * string.c (rb_fstring_existing): new, based on fstring_existing_str (tainted_fstr_update): new (rb_fstring_existing0): new, based on fstring_existing_str (rb_tainted_fstring_existing): new, special case for tainted strings (rb_str_free): delete from tainted_frozen_strings table * test/ruby/test_optimization.rb (test_hash_reuse_fstring): new test [ruby-core:82012] [Bug #13737] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59354 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/string.c b/string.c
index 2012a281d6..72fb65087c 100644
--- a/string.c
+++ b/string.c
@@ -349,6 +349,99 @@ register_fstring(VALUE str)
return ret;
}
+static int
+tainted_fstr_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
+{
+ VALUE *fstr = (VALUE *)arg;
+ VALUE str = (VALUE)*key;
+
+ if (existing) {
+ /* because of lazy sweep, str may be unmarked already and swept
+ * at next time */
+ if (rb_objspace_garbage_object_p(str)) {
+ *fstr = Qundef;
+ return ST_DELETE;
+ }
+
+ *fstr = str;
+ return ST_STOP;
+ }
+ else {
+ str = rb_str_resurrect(str);
+ RB_OBJ_TAINT_RAW(str);
+ RB_FL_SET_RAW(str, RSTRING_FSTR);
+ RB_OBJ_FREEZE_RAW(str);
+
+ *key = *val = *fstr = str;
+ return ST_CONTINUE;
+ }
+}
+
+static VALUE
+rb_fstring_existing0(VALUE str)
+{
+ st_table *frozen_strings = rb_vm_fstring_table();
+ st_data_t fstr;
+
+ if (st_lookup(frozen_strings, str, &fstr)) {
+ if (rb_objspace_garbage_object_p(fstr)) {
+ return register_fstring(str);
+ }
+ else {
+ return (VALUE)fstr;
+ }
+ }
+ return Qnil;
+}
+
+static VALUE
+rb_tainted_fstring_existing(VALUE str)
+{
+ VALUE ret;
+ st_data_t fstr;
+ st_table *tfstrings = rb_vm_tfstring_table();
+
+ if (st_lookup(tfstrings, str, &fstr)) {
+ ret = (VALUE)fstr;
+ if (!rb_objspace_garbage_object_p(ret)) {
+ return ret;
+ }
+ }
+ ret = rb_fstring_existing0(str);
+ if (NIL_P(ret)) {
+ return Qnil;
+ }
+ if (!RB_FL_TEST_RAW(ret, RSTRING_FSTR)) {
+ return Qnil;
+ }
+ do {
+ fstr = (st_data_t)ret;
+ st_update(tfstrings, fstr, tainted_fstr_update, (st_data_t)&fstr);
+ } while ((VALUE)fstr == Qundef);
+
+ ret = (VALUE)fstr;
+ assert(OBJ_FROZEN_RAW(ret));
+ assert(!FL_TEST_RAW(ret, STR_FAKESTR));
+ assert(!FL_TEST_RAW(ret, FL_EXIVAR));
+ assert(FL_TEST_RAW(ret, RSTRING_FSTR));
+ assert(FL_TEST_RAW(ret, FL_TAINT));
+ assert(RBASIC_CLASS(ret) == rb_cString);
+
+ return ret;
+}
+
+VALUE
+rb_fstring_existing(VALUE str)
+{
+ if (FL_TEST_RAW(str, RSTRING_FSTR))
+ return str;
+
+ if (!RB_OBJ_TAINTED_RAW(str))
+ return rb_fstring_existing0(str);
+
+ return rb_tainted_fstring_existing(str);
+}
+
static VALUE
setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
{
@@ -1311,6 +1404,7 @@ rb_str_free(VALUE str)
if (FL_TEST(str, RSTRING_FSTR)) {
st_data_t fstr = (st_data_t)str;
st_delete(rb_vm_fstring_table(), &fstr, NULL);
+ st_delete(rb_vm_tfstring_table(), &fstr, NULL);
RB_DEBUG_COUNTER_INC(obj_str_fstr);
}