summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2024-01-26 15:18:21 -0500
committerKevin Newton <kddnewton@gmail.com>2024-01-29 13:56:54 -0500
commitf634c7a268a175a917bdb8542b4495219196a0e2 (patch)
tree1f16eadb54ab7b927db902113dc9233cc6aa6361
parentd39d9e066fb55694abf007da86f658c6c4855a89 (diff)
[PRISM] Support UTF-8 symbols
Fixes ruby/prism#2242.
-rw-r--r--prism_compile.c19
-rw-r--r--test/ruby/test_compile_prism.rb6
2 files changed, 16 insertions, 9 deletions
diff --git a/prism_compile.c b/prism_compile.c
index 1d9f6f9e62..71ca6540b0 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -200,23 +200,24 @@ parse_string_encoded(const pm_node_t *node, const pm_string_t *string, const pm_
}
static inline ID
-parse_symbol(const uint8_t *start, const uint8_t *end, const pm_parser_t *parser)
+parse_symbol(const uint8_t *start, const uint8_t *end, const char *encoding)
{
- rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
+ rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(encoding));
return rb_intern3((const char *) start, end - start, enc);
}
static inline ID
-parse_string_symbol(const pm_string_t *string, const pm_parser_t *parser)
+parse_string_symbol(const pm_symbol_node_t *symbol, const pm_parser_t *parser)
{
- const uint8_t *start = pm_string_source(string);
- return parse_symbol(start, start + pm_string_length(string), parser);
+ const char *encoding = symbol->base.flags & PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING ? "UTF-8" : parser->encoding->name;
+ const uint8_t *start = pm_string_source(&symbol->unescaped);
+ return parse_symbol(start, start + pm_string_length(&symbol->unescaped), encoding);
}
static inline ID
parse_location_symbol(const pm_location_t *location, const pm_parser_t *parser)
{
- return parse_symbol(location->start, location->end, parser);
+ return parse_symbol(location->start, location->end, parser->encoding->name);
}
static int
@@ -395,7 +396,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
case PM_STRING_NODE:
return parse_string(&((pm_string_node_t *) node)->unescaped, parser);
case PM_SYMBOL_NODE:
- return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped, parser));
+ return ID2SYM(parse_string_symbol((pm_symbol_node_t *)node, parser));
case PM_TRUE_NODE:
return Qtrue;
default:
@@ -1870,7 +1871,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t
const pm_node_t *key = ((const pm_assoc_node_t *) element)->key;
assert(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE));
- VALUE symbol = ID2SYM(parse_string_symbol(&((const pm_symbol_node_t *) key)->unescaped, scope_node->parser));
+ VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser));
rb_ary_push(keys, symbol);
}
}
@@ -1915,7 +1916,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t
const pm_node_t *key = assoc->key;
assert(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE));
- VALUE symbol = ID2SYM(parse_string_symbol(&((const pm_symbol_node_t *) key)->unescaped, scope_node->parser));
+ VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser));
ADD_INSN(ret, &line.node, dup);
ADD_INSN1(ret, &line.node, putobject, symbol);
ADD_SEND(ret, &line.node, rb_intern("key?"), INT2FIX(1));
diff --git a/test/ruby/test_compile_prism.rb b/test/ruby/test_compile_prism.rb
index 93dcd42450..075dffc1b4 100644
--- a/test/ruby/test_compile_prism.rb
+++ b/test/ruby/test_compile_prism.rb
@@ -779,6 +779,12 @@ module Prism
def test_SymbolNode
assert_prism_eval(":pit")
+
+ # Test UTF-8 symbol in a US-ASCII file
+ assert_prism_eval(<<~'RUBY', raw: true)
+ # -*- coding: us-ascii -*-
+ :"\u{e9}"
+ RUBY
end
def test_XStringNode