From 07ae044b0dd4968b4ef6dd072cc0a2a851d79902 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Fri, 8 May 2026 21:10:27 +0200 Subject: [ruby/prism] Take the strings encoding as the initial encoding in the ripper translator When no magic encoding comment is present, it does not default to utf-8, and takes the encoding of the string that contains the source code instead. Most of the time that will be utf-8, but not always. https://github.com/ruby/prism/commit/1a273db780 --- lib/prism/translation/ripper.rb | 5 +++-- test/prism/ruby/ripper_test.rb | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index ddcec997b9..f179a149a1 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -57,7 +57,8 @@ module Prism # [[1, 13], :on_kw, "end", END ]] # def self.lex(src, filename = "-", lineno = 1, raise_errors: false) - result = Prism.lex_compat(coerce_source(src), filepath: filename, line: lineno, version: "current") + coerced = coerce_source(src) + result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding) if result.failure? && raise_errors raise SyntaxError, result.errors.first.message @@ -4077,7 +4078,7 @@ module Prism # Lazily initialize the parse result. def result - @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true) + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding) end def line_and_column_cache diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 05be087868..4fff630561 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -224,6 +224,12 @@ module Prism assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) end + def test_encoding + source = '"わたし"'.encode(Encoding::Windows_31J) + assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) + assert_equal(Ripper.sexp(source), Translation::Ripper.sexp(source)) + end + def test_sexp_coercion string_like = Object.new def string_like.to_str -- cgit v1.2.3