summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2026-05-08 21:10:27 +0200
committergit <svn-admin@ruby-lang.org>2026-05-08 20:12:59 +0000
commit07ae044b0dd4968b4ef6dd072cc0a2a851d79902 (patch)
tree2a126468f5bf45d6bcb03b99dc197d2908e4ef80
parentdc90c26a103ad62df73464cc1896edbcc90bd0c7 (diff)
[ruby/prism] Take the strings encoding as the initial encoding in the ripper translator
When no magic encoding comment is present, it does not default to utf-8, and takes the encoding of the string that contains the source code instead. Most of the time that will be utf-8, but not always. https://github.com/ruby/prism/commit/1a273db780
-rw-r--r--lib/prism/translation/ripper.rb5
-rw-r--r--test/prism/ruby/ripper_test.rb6
2 files changed, 9 insertions, 2 deletions
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index ddcec997b9..f179a149a1 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -57,7 +57,8 @@ module Prism
# [[1, 13], :on_kw, "end", END ]]
#
def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
- result = Prism.lex_compat(coerce_source(src), filepath: filename, line: lineno, version: "current")
+ coerced = coerce_source(src)
+ result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
if result.failure? && raise_errors
raise SyntaxError, result.errors.first.message
@@ -4077,7 +4078,7 @@ module Prism
# Lazily initialize the parse result.
def result
- @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true)
+ @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
end
def line_and_column_cache
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 05be087868..4fff630561 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -224,6 +224,12 @@ module Prism
assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
end
+ def test_encoding
+ source = '"わたし"'.encode(Encoding::Windows_31J)
+ assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
+ assert_equal(Ripper.sexp(source), Translation::Ripper.sexp(source))
+ end
+
def test_sexp_coercion
string_like = Object.new
def string_like.to_str