summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-09-16 14:40:19 -0400
committergit <svn-admin@ruby-lang.org>2024-09-16 18:57:54 +0000
commit2ea1950510003dbce5a761a712039ed69fa499f2 (patch)
tree1a5cfc65a27c437422a7fd0b17b83354b69274f6
parent1e53e46275e2f49a711ff90adddc804d11a347b1 (diff)
[ruby/prism] Do not leak explicit encoding
Fixes [Bug #20744] https://github.com/ruby/prism/commit/f1b8b1b2a2
-rw-r--r--lib/prism/translation/ruby_parser.rb8
-rw-r--r--prism/prism.c1
-rw-r--r--test/prism/fixtures/regex_escape_encoding.txt3
-rw-r--r--test/prism/locals_test.rb4
-rw-r--r--test/prism/snapshots/regex_escape_encoding.txt43
-rw-r--r--test/prism/test_helper.rb2
6 files changed, 59 insertions, 2 deletions
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index 9cd39075ec..4ccff0b600 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -1428,7 +1428,13 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- s(node, :str, node.unescaped)
+ unescaped = node.unescaped
+
+ if node.forced_binary_encoding?
+ unescaped.force_encoding(Encoding::BINARY)
+ end
+
+ s(node, :str, unescaped)
end
# super(foo)
diff --git a/prism/prism.c b/prism/prism.c
index 6d4d2f8199..abbeb1fd36 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -168,6 +168,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
breakpoints[index++] = incrementor;
}
+ parser->explicit_encoding = NULL;
return lex_mode_push(parser, lex_mode);
}
diff --git a/test/prism/fixtures/regex_escape_encoding.txt b/test/prism/fixtures/regex_escape_encoding.txt
new file mode 100644
index 0000000000..74e1647d67
--- /dev/null
+++ b/test/prism/fixtures/regex_escape_encoding.txt
@@ -0,0 +1,3 @@
+# encoding: US-ASCII
+str = "hello \xFC"
+str =~ /hello \u{fc}/
diff --git a/test/prism/locals_test.rb b/test/prism/locals_test.rb
index ea61fd9499..2c0036289c 100644
--- a/test/prism/locals_test.rb
+++ b/test/prism/locals_test.rb
@@ -9,6 +9,10 @@
# to test on the most recent versions.
return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.4.0"
+# If we're on Ruby 3.4.0 and the default parser is Prism, then there is no point
+# in comparing the locals because they will be the same.
+return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism
+
# In Ruby 3.4.0, the local table for method forwarding changed. But 3.4.0 can
# refer to the dev version, so while 3.4.0 still isn't released, we need to
# check if we have a high enough revision.
diff --git a/test/prism/snapshots/regex_escape_encoding.txt b/test/prism/snapshots/regex_escape_encoding.txt
new file mode 100644
index 0000000000..0f2dcc2c54
--- /dev/null
+++ b/test/prism/snapshots/regex_escape_encoding.txt
@@ -0,0 +1,43 @@
+@ ProgramNode (location: (2,0)-(3,21))
+├── flags: ∅
+├── locals: [:str]
+└── statements:
+ @ StatementsNode (location: (2,0)-(3,21))
+ ├── flags: ∅
+ └── body: (length: 2)
+ ├── @ LocalVariableWriteNode (location: (2,0)-(2,18))
+ │ ├── flags: newline
+ │ ├── name: :str
+ │ ├── depth: 0
+ │ ├── name_loc: (2,0)-(2,3) = "str"
+ │ ├── value:
+ │ │ @ StringNode (location: (2,6)-(2,18))
+ │ │ ├── flags: forced_binary_encoding
+ │ │ ├── opening_loc: (2,6)-(2,7) = "\""
+ │ │ ├── content_loc: (2,7)-(2,17) = "hello \\xFC"
+ │ │ ├── closing_loc: (2,17)-(2,18) = "\""
+ │ │ └── unescaped: "hello \xFC"
+ │ └── operator_loc: (2,4)-(2,5) = "="
+ └── @ CallNode (location: (3,0)-(3,21))
+ ├── flags: newline
+ ├── receiver:
+ │ @ LocalVariableReadNode (location: (3,0)-(3,3))
+ │ ├── flags: ∅
+ │ ├── name: :str
+ │ └── depth: 0
+ ├── call_operator_loc: ∅
+ ├── name: :=~
+ ├── message_loc: (3,4)-(3,6) = "=~"
+ ├── opening_loc: ∅
+ ├── arguments:
+ │ @ ArgumentsNode (location: (3,7)-(3,21))
+ │ ├── flags: ∅
+ │ └── arguments: (length: 1)
+ │ └── @ RegularExpressionNode (location: (3,7)-(3,21))
+ │ ├── flags: static_literal, forced_utf8_encoding
+ │ ├── opening_loc: (3,7)-(3,8) = "/"
+ │ ├── content_loc: (3,8)-(3,20) = "hello \\u{fc}"
+ │ ├── closing_loc: (3,20)-(3,21) = "/"
+ │ └── unescaped: "hello \\u{fc}"
+ ├── closing_loc: ∅
+ └── block: ∅
diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb
index d6d0abf548..b848500283 100644
--- a/test/prism/test_helper.rb
+++ b/test/prism/test_helper.rb
@@ -209,7 +209,7 @@ module Prism
private
- if RUBY_ENGINE == "ruby"
+ if RUBY_ENGINE == "ruby" && RubyVM::InstructionSequence.compile("").to_a[4][:parser] != :prism
# Check that the given source is valid syntax by compiling it with RubyVM.
def check_syntax(source)
ignore_warnings { RubyVM::InstructionSequence.compile(source) }