diff options
| -rw-r--r-- | include/ruby/internal/intern/string.h | 14 | ||||
| -rw-r--r-- | string.c | 10 | ||||
| -rw-r--r-- | test/-ext-/string/test_interned_str.rb | 5 |
3 files changed, 22 insertions, 7 deletions
diff --git a/include/ruby/internal/intern/string.h b/include/ruby/internal/intern/string.h index 75a28143fb..8bd1ffcfb4 100644 --- a/include/ruby/internal/intern/string.h +++ b/include/ruby/internal/intern/string.h @@ -412,8 +412,8 @@ VALUE rb_utf8_str_new_static(const char *ptr, long len); /** * Identical to rb_interned_str(), except it takes a Ruby's string instead of - * C's. It can also be seen as a routine identical to rb_str_new_shared(), - * except it returns an infamous "f"string. + * C's and preserves its encoding. It can also be seen as a routine identical + * to rb_str_new_shared(), except it returns an infamous "f"string. * * @param[in] str An object of ::RString. * @return An instance of ::rb_cString, either cached or allocated, which @@ -444,8 +444,9 @@ VALUE rb_str_to_interned_str(VALUE str); * terminating NUL character. * @exception rb_eArgError `len` is negative. * @return A found or created instance of ::rb_cString, of `len` bytes - * length, of "binary" encoding, whose contents are identical to - * that of `ptr`. + * length, whose contents are identical to that of `ptr`. Its + * encoding will be US-ASCII if all bytes are lower ASCII, BINARY + * otherwise. * @pre At least `len` bytes of continuous memory region shall be * accessible via `ptr`. */ @@ -461,8 +462,9 @@ RBIMPL_ATTR_NONNULL(()) * * @param[in] ptr A C string. * @exception rb_eNoMemError Failed to allocate memory. - * @return An instance of ::rb_cString, of "binary" encoding, whose - * contents are verbatim copy of `ptr`. + * @return An instance of ::rb_cString, whose contents are verbatim copy + * of `ptr`. Its encoding will be US-ASCII if all bytes are lower + * ASCII, BINARY otherwise. * @pre `ptr` must not be a null pointer. */ VALUE rb_interned_str_cstr(const char *ptr); @@ -12705,7 +12705,15 @@ VALUE rb_interned_str(const char *ptr, long len) { struct RString fake_str = {RBASIC_INIT}; - return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), true, false); + int encidx = ENCINDEX_US_ASCII; + int coderange = ENC_CODERANGE_7BIT; + if (len > 0 && search_nonascii(ptr, ptr + len)) { + encidx = ENCINDEX_ASCII_8BIT; + coderange = ENC_CODERANGE_VALID; + } + VALUE str = setup_fake_str(&fake_str, ptr, len, encidx); + ENC_CODERANGE_SET(str, coderange); + return register_fstring(str, true, false); } VALUE diff --git a/test/-ext-/string/test_interned_str.rb b/test/-ext-/string/test_interned_str.rb index 340dba41e8..a81cb59aa5 100644 --- a/test/-ext-/string/test_interned_str.rb +++ b/test/-ext-/string/test_interned_str.rb @@ -9,4 +9,9 @@ class Test_RbInternedStr < Test::Unit::TestCase src << "b" * 20 assert_equal "a" * 20, interned_str end + + def test_interned_str_encoding + src = :ascii.name + assert_equal Encoding::US_ASCII, Bug::String.rb_interned_str_dup(src).encoding + end end |
