summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--parse.y18
-rw-r--r--ruby_parser.c2
-rw-r--r--rubyparser.h2
-rw-r--r--spec/ruby/language/hash_spec.rb28
-rw-r--r--spec/ruby/language/symbol_spec.rb14
-rw-r--r--test/ruby/test_syntax.rb4
-rw-r--r--universal_parser.c2
7 files changed, 49 insertions, 21 deletions
diff --git a/parse.y b/parse.y
index dd96d6136e..3eadb0bdf5 100644
--- a/parse.y
+++ b/parse.y
@@ -12846,9 +12846,22 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc)
return NEW_DEFINED(n, loc);
}
+static VALUE
+str_to_sym_check(struct parser_params *p, VALUE lit, const YYLTYPE *loc)
+{
+ if (rb_enc_str_coderange(lit) == ENC_CODERANGE_BROKEN) {
+ yyerror1(loc, "invalid symbol");
+ lit = STR_NEW0();
+ }
+
+ return lit;
+}
+
static NODE*
symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
{
+ VALUE lit;
+
enum node_type type = nd_type(symbol);
switch (type) {
case NODE_DSTR:
@@ -12856,7 +12869,8 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
break;
case NODE_STR:
nd_set_type(symbol, NODE_LIT);
- RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(RNODE_LIT(symbol)->nd_lit));
+ lit = str_to_sym_check(p, RNODE_LIT(symbol)->nd_lit, &RNODE(symbol)->nd_loc);
+ RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(lit));
break;
default:
compile_error(p, "unexpected node as symbol: %s", parser_node_name(type));
@@ -14553,7 +14567,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
nd_set_loc(node, loc);
break;
case NODE_STR:
- lit = RNODE_STR(node)->nd_lit;
+ lit = str_to_sym_check(p, RNODE_STR(node)->nd_lit, &RNODE(node)->nd_loc);
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_STR(node)->nd_lit = ID2SYM(rb_intern_str(lit)));
nd_set_type(node, NODE_LIT);
nd_set_loc(node, loc);
diff --git a/ruby_parser.c b/ruby_parser.c
index 17fc352bed..10286b51e7 100644
--- a/ruby_parser.c
+++ b/ruby_parser.c
@@ -679,12 +679,14 @@ rb_parser_config_initialize(rb_parser_config_t *config)
config->enc_isspace = enc_isspace;
config->enc_coderange_7bit = ENC_CODERANGE_7BIT;
config->enc_coderange_unknown = ENC_CODERANGE_UNKNOWN;
+ config->enc_coderange_broken = ENC_CODERANGE_BROKEN;
config->enc_compatible = enc_compatible;
config->enc_from_encoding = enc_from_encoding;
config->encoding_get = encoding_get;
config->encoding_set = encoding_set;
config->encoding_is_ascii8bit = encoding_is_ascii8bit;
config->usascii_encoding = usascii_encoding;
+ config->enc_str_coderange = rb_enc_str_coderange;
config->ractor_make_shareable = rb_ractor_make_shareable;
diff --git a/rubyparser.h b/rubyparser.h
index 47b3b9c10b..d1f499a060 100644
--- a/rubyparser.h
+++ b/rubyparser.h
@@ -1295,12 +1295,14 @@ typedef struct rb_parser_config_struct {
int (*enc_isspace)(OnigCodePoint c, rb_encoding *enc);
int enc_coderange_7bit;
int enc_coderange_unknown;
+ int enc_coderange_broken;
rb_encoding *(*enc_compatible)(VALUE str1, VALUE str2);
VALUE (*enc_from_encoding)(rb_encoding *enc);
int (*encoding_get)(VALUE obj);
void (*encoding_set)(VALUE obj, int encindex);
int (*encoding_is_ascii8bit)(VALUE obj);
rb_encoding *(*usascii_encoding)(void);
+ int (*enc_str_coderange)(VALUE str);
/* Ractor */
VALUE (*ractor_make_shareable)(VALUE obj);
diff --git a/spec/ruby/language/hash_spec.rb b/spec/ruby/language/hash_spec.rb
index 6ac382c42c..1a5b5d0a71 100644
--- a/spec/ruby/language/hash_spec.rb
+++ b/spec/ruby/language/hash_spec.rb
@@ -191,20 +191,22 @@ describe "Hash literal" do
usascii_hash.keys.first.encoding.should == Encoding::US_ASCII
end
- it "raises an EncodingError at parse time when Symbol key with invalid bytes" do
- ScratchPad.record []
- -> {
- eval 'ScratchPad << 1; {:"\xC3" => 1}'
- }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
- ScratchPad.recorded.should == []
- end
+ ruby_bug "#20280", ""..."3.3" do
+ it "raises a SyntaxError at parse time when Symbol key with invalid bytes" do
+ ScratchPad.record []
+ -> {
+ eval 'ScratchPad << 1; {:"\xC3" => 1}'
+ }.should raise_error(SyntaxError, /invalid symbol/)
+ ScratchPad.recorded.should == []
+ end
- it "raises an EncodingError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
- ScratchPad.record []
- -> {
- eval 'ScratchPad << 1; {"\xC3": 1}'
- }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
- ScratchPad.recorded.should == []
+ it "raises a SyntaxError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
+ ScratchPad.record []
+ -> {
+ eval 'ScratchPad << 1; {"\xC3": 1}'
+ }.should raise_error(SyntaxError, /invalid symbol/)
+ ScratchPad.recorded.should == []
+ end
end
end
diff --git a/spec/ruby/language/symbol_spec.rb b/spec/ruby/language/symbol_spec.rb
index 7c1898efc2..ea6d541b8b 100644
--- a/spec/ruby/language/symbol_spec.rb
+++ b/spec/ruby/language/symbol_spec.rb
@@ -96,11 +96,13 @@ describe "A Symbol literal" do
%I{a b #{"c"}}.should == [:a, :b, :c]
end
- it "raises an EncodingError at parse time when Symbol with invalid bytes" do
- ScratchPad.record []
- -> {
- eval 'ScratchPad << 1; :"\xC3"'
- }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
- ScratchPad.recorded.should == []
+ ruby_bug "#20280", ""..."3.3" do
+ it "raises a SyntaxError at parse time when Symbol with invalid bytes" do
+ ScratchPad.record []
+ -> {
+ eval 'ScratchPad << 1; :"\xC3"'
+ }.should raise_error(SyntaxError, /invalid symbol/)
+ ScratchPad.recorded.should == []
+ end
end
end
diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb
index a7a25ef3c0..cc332a9412 100644
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@@ -1354,6 +1354,10 @@ eom
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
end
+ def test_invalid_encoding_symbol
+ assert_syntax_error('{"\xC3": 1}', "invalid symbol")
+ end
+
def test_do_after_local_variable
obj = Object.new
def obj.m; yield; end
diff --git a/universal_parser.c b/universal_parser.c
index 14759ad56f..29cc7de6ac 100644
--- a/universal_parser.c
+++ b/universal_parser.c
@@ -283,12 +283,14 @@ struct rb_imemo_tmpbuf_struct {
#define rb_enc_isspace p->config->enc_isspace
#define ENC_CODERANGE_7BIT p->config->enc_coderange_7bit
#define ENC_CODERANGE_UNKNOWN p->config->enc_coderange_unknown
+#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken
#define rb_enc_compatible p->config->enc_compatible
#define rb_enc_from_encoding p->config->enc_from_encoding
#define ENCODING_GET p->config->encoding_get
#define ENCODING_SET p->config->encoding_set
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
#define rb_usascii_encoding p->config->usascii_encoding
+#define rb_enc_str_coderange p->config->enc_str_coderange
#define rb_ractor_make_shareable p->config->ractor_make_shareable