summaryrefslogtreecommitdiff
path: root/ext/objspace/objspace_dump.c
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2022-06-29 15:21:11 +0200
committerJean Boussier <jean.boussier@gmail.com>2022-07-04 20:04:59 +0200
commit890df5f81271e031ae86775b8b8b8e4e4d4640d8 (patch)
tree3e33ab2b8fafbf05e5117973140e4557770b552e /ext/objspace/objspace_dump.c
parentb92fb7869624cb58475516361505f65f4f8a24d0 (diff)
ObjectSpace.dump: Include string coderange
I suspect that some shared pages are invalidated because some static string don't have their coderange set eagerly. So the first time they are scanned, the entire memory page is invalidated. Being able to see the coderange in `ObjectSpace` would help debug this. And in addition `dump` currently call `is_broken_string()` and `is_ascii_string()` which both end up scanning the string and assigning coderange. I think it's undesirable as `dump` should be read only.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6076
Diffstat (limited to 'ext/objspace/objspace_dump.c')
-rw-r--r--ext/objspace/objspace_dump.c47
1 files changed, 42 insertions, 5 deletions
diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c
index b570acbd95..18e052f210 100644
--- a/ext/objspace/objspace_dump.c
+++ b/ext/objspace/objspace_dump.c
@@ -313,6 +313,16 @@ reachable_object_i(VALUE ref, void *data)
dc->cur_obj_references++;
}
+bool
+dump_string_ascii_only(const char *str, long size) {
+ for (long i = 0; i < size; i++) {
+ if (str[i] & 0x80) {
+ return false;
+ }
+ }
+ return true;
+}
+
static void
dump_append_string_content(struct dump_config *dc, VALUE obj)
{
@@ -323,9 +333,17 @@ dump_append_string_content(struct dump_config *dc, VALUE obj)
dump_append_sizet(dc, rb_str_capacity(obj));
}
- if (is_ascii_string(obj)) {
- dump_append(dc, ", \"value\":");
- dump_append_string_value(dc, obj);
+ if (RSTRING_LEN(obj) && rb_enc_asciicompat(rb_enc_from_index(ENCODING_GET(obj)))) {
+ int cr = ENC_CODERANGE(obj);
+ if (cr == RUBY_ENC_CODERANGE_UNKNOWN) {
+ if (dump_string_ascii_only(RSTRING_PTR(obj), RSTRING_LEN(obj))) {
+ cr = RUBY_ENC_CODERANGE_7BIT;
+ }
+ }
+ if (cr == RUBY_ENC_CODERANGE_7BIT) {
+ dump_append(dc, ", \"value\":");
+ dump_append_string_value(dc, obj);
+ }
}
}
@@ -389,8 +407,6 @@ dump_object(VALUE obj, struct dump_config *dc)
case T_STRING:
if (STR_EMBED_P(obj))
dump_append(dc, ", \"embedded\":true");
- if (is_broken_string(obj))
- dump_append(dc, ", \"broken\":true");
if (FL_TEST(obj, RSTRING_FSTR))
dump_append(dc, ", \"fstring\":true");
if (STR_SHARED_P(obj))
@@ -403,6 +419,27 @@ dump_object(VALUE obj, struct dump_config *dc)
dump_append(dc, rb_enc_name(rb_enc_from_index(ENCODING_GET(obj))));
dump_append(dc, "\"");
}
+
+ dump_append(dc, ", \"coderange\":\"");
+ switch (RB_ENC_CODERANGE(obj)) {
+ case RUBY_ENC_CODERANGE_UNKNOWN:
+ dump_append(dc, "unknown");
+ break;
+ case RUBY_ENC_CODERANGE_7BIT:
+ dump_append(dc, "7bit");
+ break;
+ case RUBY_ENC_CODERANGE_VALID:
+ dump_append(dc, "valid");
+ break;
+ case RUBY_ENC_CODERANGE_BROKEN:
+ dump_append(dc, "broken");
+ break;
+ }
+ dump_append(dc, "\"");
+
+ if (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_BROKEN)
+ dump_append(dc, ", \"broken\":true");
+
break;
case T_HASH: