summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2026-05-08 21:04:08 +0200
committergit <svn-admin@ruby-lang.org>2026-05-08 20:12:59 +0000
commitdc90c26a103ad62df73464cc1896edbcc90bd0c7 (patch)
tree721c23616309fbd17f62fd21a1a2d80c787c0cf3
parent3a5bfb8a144cbc1d8021fd3709e007fc6f5b1261 (diff)
[ruby/prism] Respect `encoding` option in `Prism.lex` and friends
utf-8 is the default for source files but can be overwritten via options https://github.com/ruby/prism/commit/355f451528
-rw-r--r--prism/extension.c2
-rw-r--r--test/prism/lex_test.rb18
2 files changed, 19 insertions, 1 deletions
diff --git a/prism/extension.c b/prism/extension.c
index 9f9169cfff..27df8dac50 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o
parse_lex_data_t parse_lex_data = {
.source = source,
.tokens = rb_ary_new(),
- .encoding = rb_utf8_encoding(),
+ .encoding = rb_enc_find(pm_parser_encoding_name(parser)),
.freeze = pm_options_freeze(options),
};
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 8ea7ce7e9b..1e06d52184 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -47,6 +47,24 @@ module Prism
end
end
+ def test_lex_encoding
+ tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value
+ tokens.each do |t|
+ assert_equal(Encoding::Windows_31J, t[0].value.encoding)
+ end
+
+ # Shebangs must appear on the first line. For these cases, the encoding
+ # comment may appear second, but it should still change encoding.
+ tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value
+ #! /usr/bin/env ruby
+ # encoding: utf-8
+ "わたし"
+ RUBY
+ tokens.each do |t|
+ assert_equal(Encoding::UTF_8, t[0].value.encoding)
+ end
+ end
+
if RUBY_VERSION >= "3.3"
def test_lex_compat
source = "foo bar"