[ruby/json] parser.c: Extract `json_string_cacheable_p`

We can share that logic between the two functions. https://github.com/ruby/json/commit/ac580458e0
author: Jean Boussier <jean.boussier@gmail.com> 2025-11-03 11:32:58 +0100
committer: git <svn-admin@ruby-lang.org> 2025-11-03 10:33:44 +0000
commit: 0832e954c9ef181563be0e70ba089ed0a8c0d02e (patch)
tree: 27785557d2f2a846dd365e627c35afbe00b4034e
parent: ea0a411f2552ec89c7121ceeee7e23fbafc7bab6 (diff)
1 files changed, 10 insertions, 27 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index caa4f9fa05..1e83dad915 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -90,19 +90,6 @@ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rs
 
 static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
 {
-    if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
-        // Common names aren't likely to be very long. So we just don't
-        // cache names above an arbitrary threshold.
-        return Qfalse;
-    }
-
-    if (RB_UNLIKELY(!rb_isalpha((unsigned char)str[0]))) {
-        // Simple heuristic, if the first character isn't a letter,
-        // we're much less likely to see this string again.
-        // We mostly want to cache strings that are likely to be repeated.
-        return Qfalse;
-    }
-
     int low = 0;
     int high = cache->length - 1;
 
@@ -130,19 +117,6 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon
 
 static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
 {
-    if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
-        // Common names aren't likely to be very long. So we just don't
-        // cache names above an arbitrary threshold.
-        return Qfalse;
-    }
-
-    if (RB_UNLIKELY(!rb_isalpha((unsigned char)str[0]))) {
-        // Simple heuristic, if the first character isn't a letter,
-        // we're much less likely to see this string again.
-        // We mostly want to cache strings that are likely to be repeated.
-        return Qfalse;
-    }
-
     int low = 0;
     int high = cache->length - 1;
 
@@ -600,11 +574,20 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
     return result;
 }
 
+static inline bool json_string_cacheable_p(const char *string, size_t length)
+{
+    //  We mostly want to cache strings that are likely to be repeated.
+    // Simple heuristics:
+    //  - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
+    //  - If the first character isn't a letter, we're much less likely to see this string again.
+    return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
+}
+
 static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
 {
     size_t bufferSize = stringEnd - string;
 
-    if (is_name && state->in_array) {
+    if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
         VALUE cached_key;
         if (RB_UNLIKELY(symbolize)) {
             cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
author	Jean Boussier <jean.boussier@gmail.com>	2025-11-03 11:32:58 +0100
committer	git <svn-admin@ruby-lang.org>	2025-11-03 10:33:44 +0000
commit	0832e954c9ef181563be0e70ba089ed0a8c0d02e (patch)
tree	27785557d2f2a846dd365e627c35afbe00b4034e
parent	ea0a411f2552ec89c7121ceeee7e23fbafc7bab6 (diff)