diff options
Diffstat (limited to 'benchmark/string_scrub.yml')
| -rw-r--r-- | benchmark/string_scrub.yml | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/benchmark/string_scrub.yml b/benchmark/string_scrub.yml new file mode 100644 index 0000000000..4b5faaad8e --- /dev/null +++ b/benchmark/string_scrub.yml @@ -0,0 +1,48 @@ +prelude: | + + STRING_SIZE = 1024 + def duplicate_to_length(str, target_length) + return "" if target_length <= 0 + return str[0, target_length] if str.length >= target_length + + (str * ((target_length / str.length) + 1))[0, target_length] + end + base = "Hello \u{1f600} world! \u{00e9}\u{00f1}" + padding = duplicate_to_length(base, STRING_SIZE) + + valid_utf8 = (padding.b + "OK".b).force_encoding("UTF-8") + valid_utf8.valid_encoding? + unknown_but_valid_utf8 = valid_utf8.dup.b.force_encoding("UTF-8") + invalid_utf8 = (padding.b + "\x80\xFF".b).force_encoding("UTF-8") + invalid_utf8.valid_encoding? + unknown_but_invalid_utf8 = (padding.b + "\x80\xFF".b).force_encoding("UTF-8") + + worst_case_utf8 = duplicate_to_length("\u{1f600}\u{00e9}\u{00f1}", STRING_SIZE).b.force_encoding("UTF-8") + + unknown_but_valid_utf8_worst_case = worst_case_utf8.dup.b.force_encoding("UTF-8") + unknown_but_invalid_utf8_worst_case = (worst_case_utf8.b + "\x80\xFF".b).force_encoding("UTF-8") + +benchmark: + scrub_known_valid: | + string = valid_utf8.dup + string.scrub! + + scrub_known_invalid: | + string = invalid_utf8.dup + string.scrub! + + scrub_unknown_but_valid_coderange: | + string = unknown_but_valid_utf8.dup + string.scrub! + + scrub_unknown_and_invalid_coderange: | + string = unknown_but_invalid_utf8.dup + string.scrub! + + scrub_unknown_but_valid_coderange_worst_case: | + string = unknown_but_valid_utf8_worst_case.dup + string.scrub! + + scrub_unknown_and_invalid_coderange_worst_case: | + string = unknown_but_invalid_utf8_worst_case.dup + string.scrub!
\ No newline at end of file |
