summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2021-09-29 19:59:31 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2021-10-01 20:28:44 +0900
commit842b0008c132dd587f09766a228041afb7fed24f (patch)
treeb7324862c67cfc7e28c0d4e4866b77f548019e6a
parent409dbc951b9875d27bd73748c88e15386473cffb (diff)
Skip broken strings as the locale encoding
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/4915
-rw-r--r--internal/string.h1
-rw-r--r--ruby.c11
-rw-r--r--string.c6
3 files changed, 14 insertions, 4 deletions
diff --git a/internal/string.h b/internal/string.h
index 546a0ac9a7..d010669ca8 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -42,6 +42,7 @@ size_t rb_str_memsize(VALUE);
char *rb_str_to_cstr(VALUE str);
const char *ruby_escaped_char(int c);
void rb_str_make_independent(VALUE str);
+int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
static inline bool STR_EMBED_P(VALUE str);
static inline bool STR_SHARED_P(VALUE str);
diff --git a/ruby.c b/ruby.c
index 3358068bbb..818161710c 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1680,7 +1680,11 @@ tty_enabled(void)
static VALUE
copy_str(VALUE str, rb_encoding *enc, bool intern)
{
- if (!intern) return rb_enc_associate(rb_str_dup(str), enc);
+ if (!intern) {
+ if (rb_enc_str_coderange_scan(str, enc) == ENC_CODERANGE_BROKEN)
+ return 0;
+ return rb_enc_associate(rb_str_dup(str), enc);
+ }
return rb_enc_interned_str(RSTRING_PTR(str), RSTRING_LEN(str), enc);
}
@@ -1916,7 +1920,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
if (newpath == path) continue;
path = newpath;
#else
- path = copy_str(path, lenc, !mark);
+ if (!(path = copy_str(path, lenc, !mark))) continue;
#endif
if (mark) rb_ivar_set(path, id_initial_load_path_mark, path);
if (!modifiable) {
@@ -1934,8 +1938,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
bool modified = false;
for (long i = loaded_before_enc; i < RARRAY_LEN(loaded_features); ++i) {
VALUE path = RARRAY_AREF(loaded_features, i);
- if (rb_enc_get(path) == IF_UTF8_PATH(uenc, lenc)) continue;
- path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true);
+ if (!(path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true))) continue;
modified = true;
RARRAY_ASET(loaded_features, i, path);
}
diff --git a/string.c b/string.c
index 299d506004..78e2ba923f 100644
--- a/string.c
+++ b/string.c
@@ -725,6 +725,12 @@ enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx)
}
int
+rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc)
+{
+ return enc_coderange_scan(str, enc, rb_enc_to_index(enc));
+}
+
+int
rb_enc_str_coderange(VALUE str)
{
int cr = ENC_CODERANGE(str);