summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2022-06-29 10:35:41 +0200
committerJean Boussier <jean.boussier@gmail.com>2022-07-19 10:41:40 +0200
commit0ae8dbbee0631721e820e29d0a613485cdcb3507 (patch)
treeca3159a389e5480c1135167f7282363eb6bfdb1f /string.c
parentee1d2b276abbf6b03d862952a949302672a470a9 (diff)
rb_str_buf_append: fastpath to str_buf_cat
If the LHS is ASCII compatible and the RHS is 7BIT we can directly concat without being concerned about anything else. Benchmark: ``` compare-ruby: ruby 3.2.0dev (2022-07-12T15:01:11Z master 71aec68566) [arm64-darwin21] built-ruby: ruby 3.2.0dev (2022-07-13T10:13:53Z faster-buffer-conc.. a04c10476d) [arm64-darwin21] warming up... | |compare-ruby|built-ruby| |:---------------------|-----------:|---------:| |binary_append_utf8 | 385.315k| 573.663k| | | -| 1.49x| |binary_append_binary | 446.579k| 574.898k| | | -| 1.29x| |utf8_append_utf8 | 430.936k| 573.394k| | | -| 1.33x| ``` Note that in the benchmark, the RHS always have a precomputed coderange. So the benchmark never enter the slowpath of having to scan the RHS. However it's extremly likely that we'll end up scanning it anyway in rb_enc_cr_str_buf_cat
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6120
Diffstat (limited to 'string.c')
-rw-r--r--string.c22
1 files changed, 19 insertions, 3 deletions
diff --git a/string.c b/string.c
index 4cca9b1eac..d6c79c7988 100644
--- a/string.c
+++ b/string.c
@@ -3303,12 +3303,28 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr)
}
}
+static inline bool
+str_enc_fastpath(VALUE str)
+{
+ // The overwhelming majority of strings are in one of these 3 encodings.
+ switch (ENCODING_GET_INLINED(str)) {
+ case ENCINDEX_ASCII_8BIT:
+ case ENCINDEX_UTF_8:
+ case ENCINDEX_US_ASCII:
+ return true;
+ default:
+ return false;
+ }
+}
+
VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
- int str2_cr;
-
- str2_cr = ENC_CODERANGE(str2);
+ int str2_cr = rb_enc_str_coderange(str2);
+ if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
+ str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2));
+ return str;
+ }
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
ENCODING_GET(str2), str2_cr, &str2_cr);