From 2ea1950510003dbce5a761a712039ed69fa499f2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 16 Sep 2024 14:40:19 -0400 Subject: [ruby/prism] Do not leak explicit encoding Fixes [Bug #20744] https://github.com/ruby/prism/commit/f1b8b1b2a2 --- lib/prism/translation/ruby_parser.rb | 8 ++++- prism/prism.c | 1 + test/prism/fixtures/regex_escape_encoding.txt | 3 ++ test/prism/locals_test.rb | 4 +++ test/prism/snapshots/regex_escape_encoding.txt | 43 ++++++++++++++++++++++++++ test/prism/test_helper.rb | 2 +- 6 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 test/prism/fixtures/regex_escape_encoding.txt create mode 100644 test/prism/snapshots/regex_escape_encoding.txt diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 9cd39075ec..4ccff0b600 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1428,7 +1428,13 @@ module Prism # "foo" # ^^^^^ def visit_string_node(node) - s(node, :str, node.unescaped) + unescaped = node.unescaped + + if node.forced_binary_encoding? + unescaped.force_encoding(Encoding::BINARY) + end + + s(node, :str, unescaped) end # super(foo) diff --git a/prism/prism.c b/prism/prism.c index 6d4d2f8199..abbeb1fd36 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -168,6 +168,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato breakpoints[index++] = incrementor; } + parser->explicit_encoding = NULL; return lex_mode_push(parser, lex_mode); } diff --git a/test/prism/fixtures/regex_escape_encoding.txt b/test/prism/fixtures/regex_escape_encoding.txt new file mode 100644 index 0000000000..74e1647d67 --- /dev/null +++ b/test/prism/fixtures/regex_escape_encoding.txt @@ -0,0 +1,3 @@ +# encoding: US-ASCII +str = "hello \xFC" +str =~ /hello \u{fc}/ diff --git a/test/prism/locals_test.rb b/test/prism/locals_test.rb index ea61fd9499..2c0036289c 100644 --- a/test/prism/locals_test.rb +++ b/test/prism/locals_test.rb @@ -9,6 +9,10 @@ # to test on the most recent versions. return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.4.0" +# If we're on Ruby 3.4.0 and the default parser is Prism, then there is no point +# in comparing the locals because they will be the same. +return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism + # In Ruby 3.4.0, the local table for method forwarding changed. But 3.4.0 can # refer to the dev version, so while 3.4.0 still isn't released, we need to # check if we have a high enough revision. diff --git a/test/prism/snapshots/regex_escape_encoding.txt b/test/prism/snapshots/regex_escape_encoding.txt new file mode 100644 index 0000000000..0f2dcc2c54 --- /dev/null +++ b/test/prism/snapshots/regex_escape_encoding.txt @@ -0,0 +1,43 @@ +@ ProgramNode (location: (2,0)-(3,21)) +├── flags: ∅ +├── locals: [:str] +└── statements: + @ StatementsNode (location: (2,0)-(3,21)) + ├── flags: ∅ + └── body: (length: 2) + ├── @ LocalVariableWriteNode (location: (2,0)-(2,18)) + │ ├── flags: newline + │ ├── name: :str + │ ├── depth: 0 + │ ├── name_loc: (2,0)-(2,3) = "str" + │ ├── value: + │ │ @ StringNode (location: (2,6)-(2,18)) + │ │ ├── flags: forced_binary_encoding + │ │ ├── opening_loc: (2,6)-(2,7) = "\"" + │ │ ├── content_loc: (2,7)-(2,17) = "hello \\xFC" + │ │ ├── closing_loc: (2,17)-(2,18) = "\"" + │ │ └── unescaped: "hello \xFC" + │ └── operator_loc: (2,4)-(2,5) = "=" + └── @ CallNode (location: (3,0)-(3,21)) + ├── flags: newline + ├── receiver: + │ @ LocalVariableReadNode (location: (3,0)-(3,3)) + │ ├── flags: ∅ + │ ├── name: :str + │ └── depth: 0 + ├── call_operator_loc: ∅ + ├── name: :=~ + ├── message_loc: (3,4)-(3,6) = "=~" + ├── opening_loc: ∅ + ├── arguments: + │ @ ArgumentsNode (location: (3,7)-(3,21)) + │ ├── flags: ∅ + │ └── arguments: (length: 1) + │ └── @ RegularExpressionNode (location: (3,7)-(3,21)) + │ ├── flags: static_literal, forced_utf8_encoding + │ ├── opening_loc: (3,7)-(3,8) = "/" + │ ├── content_loc: (3,8)-(3,20) = "hello \\u{fc}" + │ ├── closing_loc: (3,20)-(3,21) = "/" + │ └── unescaped: "hello \\u{fc}" + ├── closing_loc: ∅ + └── block: ∅ diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb index d6d0abf548..b848500283 100644 --- a/test/prism/test_helper.rb +++ b/test/prism/test_helper.rb @@ -209,7 +209,7 @@ module Prism private - if RUBY_ENGINE == "ruby" + if RUBY_ENGINE == "ruby" && RubyVM::InstructionSequence.compile("").to_a[4][:parser] != :prism # Check that the given source is valid syntax by compiling it with RubyVM. def check_syntax(source) ignore_warnings { RubyVM::InstructionSequence.compile(source) } -- cgit v1.2.3