diff options
| author | Earlopain <14981592+Earlopain@users.noreply.github.com> | 2026-05-08 21:04:08 +0200 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2026-05-08 20:12:59 +0000 |
| commit | dc90c26a103ad62df73464cc1896edbcc90bd0c7 (patch) | |
| tree | 721c23616309fbd17f62fd21a1a2d80c787c0cf3 | |
| parent | 3a5bfb8a144cbc1d8021fd3709e007fc6f5b1261 (diff) | |
[ruby/prism] Respect `encoding` option in `Prism.lex` and friends
utf-8 is the default for source files but can be overwritten via options
https://github.com/ruby/prism/commit/355f451528
| -rw-r--r-- | prism/extension.c | 2 | ||||
| -rw-r--r-- | test/prism/lex_test.rb | 18 |
2 files changed, 19 insertions, 1 deletions
diff --git a/prism/extension.c b/prism/extension.c index 9f9169cfff..27df8dac50 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -793,7 +793,7 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o parse_lex_data_t parse_lex_data = { .source = source, .tokens = rb_ary_new(), - .encoding = rb_utf8_encoding(), + .encoding = rb_enc_find(pm_parser_encoding_name(parser)), .freeze = pm_options_freeze(options), }; diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 8ea7ce7e9b..1e06d52184 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -47,6 +47,24 @@ module Prism end end + def test_lex_encoding + tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value + tokens.each do |t| + assert_equal(Encoding::Windows_31J, t[0].value.encoding) + end + + # Shebangs must appear on the first line. For these cases, the encoding + # comment may appear second, but it should still change encoding. + tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value + #! /usr/bin/env ruby + # encoding: utf-8 + "わたし" + RUBY + tokens.each do |t| + assert_equal(Encoding::UTF_8, t[0].value.encoding) + end + end + if RUBY_VERSION >= "3.3" def test_lex_compat source = "foo bar" |
