diff options
| -rw-r--r-- | prism/extension.c | 2 | ||||
| -rw-r--r-- | test/prism/lex_test.rb | 18 |
2 files changed, 19 insertions, 1 deletions
diff --git a/prism/extension.c b/prism/extension.c index 9f9169cfff..27df8dac50 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o parse_lex_data_t parse_lex_data = { .source = source, .tokens = rb_ary_new(), - .encoding = rb_utf8_encoding(), + .encoding = rb_enc_find(pm_parser_encoding_name(parser)), .freeze = pm_options_freeze(options), }; diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 8ea7ce7e9b..1e06d52184 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -47,6 +47,24 @@ module Prism end end + def test_lex_encoding + tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value + tokens.each do |t| + assert_equal(Encoding::Windows_31J, t[0].value.encoding) + end + + # Shebangs must appear on the first line. For these cases, the encoding + # comment may appear second, but it should still change encoding. + tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value + #! /usr/bin/env ruby + # encoding: utf-8 + "わたし" + RUBY + tokens.each do |t| + assert_equal(Encoding::UTF_8, t[0].value.encoding) + end + end + if RUBY_VERSION >= "3.3" def test_lex_compat source = "foo bar" |
