summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2025-01-03 19:14:15 +0100
committergit <svn-admin@ruby-lang.org>2025-01-05 18:12:44 +0000
commitc037f5a28c54b86139ef17db061fdbf7dc82fd32 (patch)
tree04efc5d50028d650499e8b5127c447591c8c8fef /lib
parent179e2cfa918c8bd418ca68b99b174f00f1ec205e (diff)
[ruby/prism] Fix parser translator ast for heredoc with written newlines
Heredocs that contain "\\n" don't start a new string node. https://github.com/ruby/prism/commit/61d9d3a15e
Diffstat (limited to 'lib')
-rw-r--r--lib/prism/translation/parser/compiler.rb34
1 files changed, 28 insertions, 6 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index d66e553fa3..0a95e44f85 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -2079,27 +2079,49 @@ module Prism
escaped_lengths = []
normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
if node.opening.end_with?("'")
escaped.each do |line|
escaped_lengths << line.bytesize
normalized_lengths << chomped_bytesize(line)
+ do_next_tokens << true
end
else
escaped
- .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
.each do |lines|
escaped_lengths << lines.sum(&:bytesize)
normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
+ unescaped_lines_count = lines.sum do |line|
+ line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
+ end
+ do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
+ do_next_tokens[-1] = true
end
end
start_offset = part.location.start_offset
-
- unescaped.map.with_index do |unescaped_line, index|
- inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
- start_offset += escaped_lengths.fetch(index, 0)
- inner_part
+ current_line = +""
+ current_normalized_length = 0
+
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(index, 0)
+ current_line = +""
+ current_normalized_length = 0
+ inner_part
+ else
+ nil
+ end
end
else
[visit(part)]