diff options
| author | Takashi Kokubun <takashikkbn@gmail.com> | 2026-02-09 13:44:42 -0800 |
|---|---|---|
| committer | Takashi Kokubun <takashikkbn@gmail.com> | 2026-02-09 13:44:42 -0800 |
| commit | 306930ae1ac62fb3b7f96581f4a6e9ab4c083e84 (patch) | |
| tree | ba2fa6f33143ef066aa871f8ba27771eec278b46 | |
| parent | c6d9ba58c50fd9c07023453d71cb55b4b9c36957 (diff) | |
merge revision(s) 78b7646bdb91285873ac26bca060591e06c45afe, b4a62a1ca949d93332ad8bce0fcc273581160cc5: [Backport #21842]
[PATCH] [Bug #21842] Let `rb_interned_str` return US-ASCII if possible
[PATCH] [DOC] Update docs for rb_interned_str and related functions (#15897)
Related to [Bug #21842].
* rb_interned_str: document what decides whether the returned string is
in US-ASCII or BINARY encoding.
* rb_interned_str_cstr: include the same description as rb_interned_str
for the encoding. This one was still missing the update for US-ASCII
and erroneously said the returned string was alwasy in BINARY encoding
* rb_str_to_interned_str: document how the encoding of the result is
defined.
Co-authored-by: Herwin <herwinw@users.noreply.github.com>
| -rw-r--r-- | include/ruby/internal/intern/string.h | 14 | ||||
| -rw-r--r-- | string.c | 10 | ||||
| -rw-r--r-- | test/-ext-/string/test_interned_str.rb | 5 |
3 files changed, 22 insertions, 7 deletions
diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index 75a28143fb..8bd1ffcfb4 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -412,8 +412,8 @@ VALUE rb_utf8_str_new_static(const char *ptr, long len); /** * Identical to rb_interned_str(), except it takes a Ruby's string instead of - * C's. It can also be seen as a routine identical to rb_str_new_shared(), - * except it returns an infamous "f"string. + * C's and preserves its encoding. It can also be seen as a routine identical + * to rb_str_new_shared(), except it returns an infamous "f"string. * * @param[in] str An object of ::RString. * @return An instance of ::rb_cString, either cached or allocated, which @@ -444,8 +444,9 @@ VALUE rb_str_to_interned_str(VALUE str); * terminating NUL character. * @exception rb_eArgError `len` is negative. * @return A found or created instance of ::rb_cString, of `len` bytes - * length, of "binary" encoding, whose contents are identical to - * that of `ptr`. + * length, whose contents are identical to that of `ptr`. Its + * encoding will be US-ASCII if all bytes are lower ASCII, BINARY + * otherwise. * @pre At least `len` bytes of continuous memory region shall be * accessible via `ptr`. */ @@ -461,8 +462,9 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `ptr`. + * @return An instance of ::rb_cString, whose contents are verbatim copy + * of `ptr`. Its encoding will be US-ASCII if all bytes are lower + * ASCII, BINARY otherwise. * @pre `ptr` must not be a null pointer. */ VALUE rb_interned_str_cstr(const char *ptr); @@ -12705,7 +12705,15 @@ VALUE rb_interned_str(const char *ptr, long len) { struct RString fake_str = {RBASIC_INIT}; - return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), true, false); + int encidx = ENCINDEX_US_ASCII; + int coderange = ENC_CODERANGE_7BIT; + if (len > 0 && search_nonascii(ptr, ptr + len)) { + encidx = ENCINDEX_ASCII_8BIT; + coderange = ENC_CODERANGE_VALID; + } + VALUE str = setup_fake_str(&fake_str, ptr, len, encidx); + ENC_CODERANGE_SET(str, coderange); + return register_fstring(str, true, false); } VALUE diff --git a/test/-ext-/string/test_interned_str.rb b/test/-ext-/string/test_interned_str.rb index 340dba41e8..a81cb59aa5 100644 --- a/test/-ext-/string/test_interned_str.rb +++ b/test/-ext-/string/test_interned_str.rb @@ -9,4 +9,9 @@ class Test_RbInternedStr < Test::Unit::TestCase src << "b" * 20 assert_equal "a" * 20, interned_str end + + def test_interned_str_encoding + src = :ascii.name + assert_equal Encoding::US_ASCII, Bug::String.rb_interned_str_dup(src).encoding + end end |
