diff options
| -rw-r--r-- | parse.y | 18 | ||||
| -rw-r--r-- | ruby_parser.c | 2 | ||||
| -rw-r--r-- | rubyparser.h | 2 | ||||
| -rw-r--r-- | spec/ruby/language/hash_spec.rb | 28 | ||||
| -rw-r--r-- | spec/ruby/language/symbol_spec.rb | 14 | ||||
| -rw-r--r-- | test/ruby/test_syntax.rb | 4 | ||||
| -rw-r--r-- | universal_parser.c | 2 |
7 files changed, 49 insertions, 21 deletions
@@ -12846,9 +12846,22 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc) return NEW_DEFINED(n, loc); } +static VALUE +str_to_sym_check(struct parser_params *p, VALUE lit, const YYLTYPE *loc) +{ + if (rb_enc_str_coderange(lit) == ENC_CODERANGE_BROKEN) { + yyerror1(loc, "invalid symbol"); + lit = STR_NEW0(); + } + + return lit; +} + static NODE* symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) { + VALUE lit; + enum node_type type = nd_type(symbol); switch (type) { case NODE_DSTR: @@ -12856,7 +12869,8 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) break; case NODE_STR: nd_set_type(symbol, NODE_LIT); - RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(RNODE_LIT(symbol)->nd_lit)); + lit = str_to_sym_check(p, RNODE_LIT(symbol)->nd_lit, &RNODE(symbol)->nd_loc); + RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(lit)); break; default: compile_error(p, "unexpected node as symbol: %s", parser_node_name(type)); @@ -14553,7 +14567,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc) nd_set_loc(node, loc); break; case NODE_STR: - lit = RNODE_STR(node)->nd_lit; + lit = str_to_sym_check(p, RNODE_STR(node)->nd_lit, &RNODE(node)->nd_loc); RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_STR(node)->nd_lit = ID2SYM(rb_intern_str(lit))); nd_set_type(node, NODE_LIT); nd_set_loc(node, loc); diff --git a/ruby_parser.c b/ruby_parser.c index 17fc352bed..10286b51e7 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -679,12 +679,14 @@ rb_parser_config_initialize(rb_parser_config_t *config) config->enc_isspace = enc_isspace; config->enc_coderange_7bit = ENC_CODERANGE_7BIT; config->enc_coderange_unknown = ENC_CODERANGE_UNKNOWN; + config->enc_coderange_broken = ENC_CODERANGE_BROKEN; config->enc_compatible = enc_compatible; config->enc_from_encoding = enc_from_encoding; config->encoding_get = encoding_get; config->encoding_set = encoding_set; config->encoding_is_ascii8bit = encoding_is_ascii8bit; config->usascii_encoding = usascii_encoding; + config->enc_str_coderange = rb_enc_str_coderange; config->ractor_make_shareable = rb_ractor_make_shareable; diff --git a/rubyparser.h b/rubyparser.h index 47b3b9c10b..d1f499a060 100644 --- a/rubyparser.h +++ b/rubyparser.h @@ -1295,12 +1295,14 @@ typedef struct rb_parser_config_struct { int (*enc_isspace)(OnigCodePoint c, rb_encoding *enc); int enc_coderange_7bit; int enc_coderange_unknown; + int enc_coderange_broken; rb_encoding *(*enc_compatible)(VALUE str1, VALUE str2); VALUE (*enc_from_encoding)(rb_encoding *enc); int (*encoding_get)(VALUE obj); void (*encoding_set)(VALUE obj, int encindex); int (*encoding_is_ascii8bit)(VALUE obj); rb_encoding *(*usascii_encoding)(void); + int (*enc_str_coderange)(VALUE str); /* Ractor */ VALUE (*ractor_make_shareable)(VALUE obj); diff --git a/spec/ruby/language/hash_spec.rb b/spec/ruby/language/hash_spec.rb index 6ac382c42c..1a5b5d0a71 100644 --- a/spec/ruby/language/hash_spec.rb +++ b/spec/ruby/language/hash_spec.rb @@ -191,20 +191,22 @@ describe "Hash literal" do usascii_hash.keys.first.encoding.should == Encoding::US_ASCII end - it "raises an EncodingError at parse time when Symbol key with invalid bytes" do - ScratchPad.record [] - -> { - eval 'ScratchPad << 1; {:"\xC3" => 1}' - }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"') - ScratchPad.recorded.should == [] - end + ruby_bug "#20280", ""..."3.3" do + it "raises a SyntaxError at parse time when Symbol key with invalid bytes" do + ScratchPad.record [] + -> { + eval 'ScratchPad << 1; {:"\xC3" => 1}' + }.should raise_error(SyntaxError, /invalid symbol/) + ScratchPad.recorded.should == [] + end - it "raises an EncodingError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do - ScratchPad.record [] - -> { - eval 'ScratchPad << 1; {"\xC3": 1}' - }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"') - ScratchPad.recorded.should == [] + it "raises a SyntaxError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do + ScratchPad.record [] + -> { + eval 'ScratchPad << 1; {"\xC3": 1}' + }.should raise_error(SyntaxError, /invalid symbol/) + ScratchPad.recorded.should == [] + end end end diff --git a/spec/ruby/language/symbol_spec.rb b/spec/ruby/language/symbol_spec.rb index 7c1898efc2..ea6d541b8b 100644 --- a/spec/ruby/language/symbol_spec.rb +++ b/spec/ruby/language/symbol_spec.rb @@ -96,11 +96,13 @@ describe "A Symbol literal" do %I{a b #{"c"}}.should == [:a, :b, :c] end - it "raises an EncodingError at parse time when Symbol with invalid bytes" do - ScratchPad.record [] - -> { - eval 'ScratchPad << 1; :"\xC3"' - }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"') - ScratchPad.recorded.should == [] + ruby_bug "#20280", ""..."3.3" do + it "raises a SyntaxError at parse time when Symbol with invalid bytes" do + ScratchPad.record [] + -> { + eval 'ScratchPad << 1; :"\xC3"' + }.should raise_error(SyntaxError, /invalid symbol/) + ScratchPad.recorded.should == [] + end end end diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index a7a25ef3c0..cc332a9412 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -1354,6 +1354,10 @@ eom assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073 end + def test_invalid_encoding_symbol + assert_syntax_error('{"\xC3": 1}', "invalid symbol") + end + def test_do_after_local_variable obj = Object.new def obj.m; yield; end diff --git a/universal_parser.c b/universal_parser.c index 14759ad56f..29cc7de6ac 100644 --- a/universal_parser.c +++ b/universal_parser.c @@ -283,12 +283,14 @@ struct rb_imemo_tmpbuf_struct { #define rb_enc_isspace p->config->enc_isspace #define ENC_CODERANGE_7BIT p->config->enc_coderange_7bit #define ENC_CODERANGE_UNKNOWN p->config->enc_coderange_unknown +#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken #define rb_enc_compatible p->config->enc_compatible #define rb_enc_from_encoding p->config->enc_from_encoding #define ENCODING_GET p->config->encoding_get #define ENCODING_SET p->config->encoding_set #define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit #define rb_usascii_encoding p->config->usascii_encoding +#define rb_enc_str_coderange p->config->enc_str_coderange #define rb_ractor_make_shareable p->config->ractor_make_shareable |
