summaryrefslogtreecommitdiff
path: root/lib/prism/translation/parser
diff options
context:
space:
mode:
authorKoichi ITO <koic.ito@gmail.com>2024-03-15 01:22:14 +0900
committergit <svn-admin@ruby-lang.org>2024-03-15 12:31:40 +0000
commitc0b8dee95a5412f395486a9bcb4959f93509cecb (patch)
tree6ec4ec6c14027b24669c2ecd9d51419055e20681 /lib/prism/translation/parser
parentc45ad17fa1269aa882ed170760a5603a814d7c36 (diff)
[ruby/prism] Fix an AST and token incompatibility for `Prism::Translation::Parser`
This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser` for dstring literal: ```ruby "foo #{bar}" ``` ## Parser gem (Expected) ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the AST and tokens returned by the Parser gem were different. In this case, `dstr` node should not be nested: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:dstr, s(:str, "foo\n"), s(:str, " ")), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` https://github.com/ruby/prism/commit/c1652a9ee7
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r--lib/prism/translation/parser/compiler.rb41
-rw-r--r--lib/prism/translation/parser/lexer.rb20
2 files changed, 49 insertions, 12 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 4eb2c4a8da..0503d003f5 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -953,14 +953,35 @@ module Prism
def visit_interpolated_string_node(node)
if node.heredoc?
children, closing = visit_heredoc(node)
- builder.string_compose(token(node.opening_loc), children, closing)
+
+ return builder.string_compose(token(node.opening_loc), children, closing)
+ end
+
+ parts = if node.parts.one? { |part| part.type == :string_node }
+ node.parts.flat_map do |node|
+ if node.type == :string_node && node.unescaped.lines.count >= 2
+ start_offset = node.content_loc.start_offset
+
+ node.unescaped.lines.map do |line|
+ end_offset = start_offset + line.length
+ offsets = srange_offsets(start_offset, end_offset)
+ start_offset = end_offset
+
+ builder.string_internal([line, offsets])
+ end
+ else
+ visit(node)
+ end
+ end
else
- builder.string_compose(
- token(node.opening_loc),
- visit_all(node.parts),
- token(node.closing_loc)
- )
+ visit_all(node.parts)
end
+
+ builder.string_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
end
# :"foo #{bar}"
@@ -1492,17 +1513,17 @@ module Prism
elsif node.opening == "?"
builder.character([node.unescaped, srange(node.location)])
else
- parts = if node.unescaped.lines.count <= 1
+ parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
else
start_offset = node.content_loc.start_offset
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.length
+ [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
+ end_offset = start_offset + content_line.length
offsets = srange_offsets(start_offset, end_offset)
start_offset = end_offset
- builder.string_internal([line, offsets])
+ builder.string_internal([unescaped_line, offsets])
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 9cf86476ba..7febca449e 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -295,8 +295,24 @@ module Prism
unless (lines = token.value.lines).one?
start_offset = offset_cache[token.location.start_offset]
lines.map do |line|
- end_offset = start_offset + line.length
- tokens << [:tSTRING_CONTENT, [line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
+ newline = line.end_with?("\r\n") ? "\r\n" : "\n"
+ chomped_line = line.chomp
+ if match = chomped_line.match(/(?<backslashes>\\+)\z/)
+ adjustment = match[:backslashes].size / 2
+ adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
+ if match[:backslashes].size.odd?
+ adjusted_line.delete_suffix!("\\")
+ adjustment += 2
+ else
+ adjusted_line << newline
+ end
+ else
+ adjusted_line = line
+ adjustment = 0
+ end
+
+ end_offset = start_offset + adjusted_line.length + adjustment
+ tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
start_offset = end_offset
end
next