diff options
author | Koichi ITO <koic.ito@gmail.com> | 2024-03-15 01:22:14 +0900 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2024-03-15 12:31:40 +0000 |
commit | c0b8dee95a5412f395486a9bcb4959f93509cecb (patch) | |
tree | 6ec4ec6c14027b24669c2ecd9d51419055e20681 /lib/prism/translation/parser | |
parent | c45ad17fa1269aa882ed170760a5603a814d7c36 (diff) |
[ruby/prism] Fix an AST and token incompatibility for `Prism::Translation::Parser`
This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser`
for dstring literal:
```ruby
"foo
#{bar}"
```
## Parser gem (Expected)
```console
$ bundle exec ruby -Ilib -rparser/ruby33 -ve \
'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)'
ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22]
[s(:dstr,
s(:str, "foo\n"),
s(:str, " "),
s(:begin,
s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]],
[:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]],
[:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]],
[:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]],
[:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]],
[:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]],
[:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]]
```
## `Prism::Translation::Parser` (Actual)
Previously, the AST and tokens returned by the Parser gem were different. In this case, `dstr` node should not be nested:
```console
$ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \
'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)'
ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22]
[s(:dstr,
s(:dstr,
s(:str, "foo\n"),
s(:str, " ")),
s(:begin,
s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]],
[:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]],
[:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]],
[:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]],
[:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]],
[:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]],
[:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]]
```
After this correction, the AST and tokens returned by the Parser gem are the same:
```console
$ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \
'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)'
ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22]
[s(:dstr,
s(:str, "foo\n"),
s(:str, " "),
s(:begin,
s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]],
[:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]],
[:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]],
[:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]],
[:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]],
[:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]],
[:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]]
```
https://github.com/ruby/prism/commit/c1652a9ee7
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r-- | lib/prism/translation/parser/compiler.rb | 41 | ||||
-rw-r--r-- | lib/prism/translation/parser/lexer.rb | 20 |
2 files changed, 49 insertions, 12 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 4eb2c4a8da..0503d003f5 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -953,14 +953,35 @@ module Prism def visit_interpolated_string_node(node) if node.heredoc? children, closing = visit_heredoc(node) - builder.string_compose(token(node.opening_loc), children, closing) + + return builder.string_compose(token(node.opening_loc), children, closing) + end + + parts = if node.parts.one? { |part| part.type == :string_node } + node.parts.flat_map do |node| + if node.type == :string_node && node.unescaped.lines.count >= 2 + start_offset = node.content_loc.start_offset + + node.unescaped.lines.map do |line| + end_offset = start_offset + line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([line, offsets]) + end + else + visit(node) + end + end else - builder.string_compose( - token(node.opening_loc), - visit_all(node.parts), - token(node.closing_loc) - ) + visit_all(node.parts) end + + builder.string_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) end # :"foo #{bar}" @@ -1492,17 +1513,17 @@ module Prism elsif node.opening == "?" builder.character([node.unescaped, srange(node.location)]) else - parts = if node.unescaped.lines.count <= 1 + parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1 [builder.string_internal([node.unescaped, srange(node.content_loc)])] else start_offset = node.content_loc.start_offset - node.unescaped.lines.map do |line| - end_offset = start_offset + line.length + [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line| + end_offset = start_offset + content_line.length offsets = srange_offsets(start_offset, end_offset) start_offset = end_offset - builder.string_internal([line, offsets]) + builder.string_internal([unescaped_line, offsets]) end end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 9cf86476ba..7febca449e 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -295,8 +295,24 @@ module Prism unless (lines = token.value.lines).one? start_offset = offset_cache[token.location.start_offset] lines.map do |line| - end_offset = start_offset + line.length - tokens << [:tSTRING_CONTENT, [line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] + newline = line.end_with?("\r\n") ? "\r\n" : "\n" + chomped_line = line.chomp + if match = chomped_line.match(/(?<backslashes>\\+)\z/) + adjustment = match[:backslashes].size / 2 + adjusted_line = chomped_line.delete_suffix("\\" * adjustment) + if match[:backslashes].size.odd? + adjusted_line.delete_suffix!("\\") + adjustment += 2 + else + adjusted_line << newline + end + else + adjusted_line = line + adjustment = 0 + end + + end_offset = start_offset + adjusted_line.length + adjustment + tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] start_offset = end_offset end next |