summaryrefslogtreecommitdiff
path: root/lib/rdoc/markup/parser.rb
diff options
context:
space:
mode:
authordrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-12-20 03:22:49 +0000
committerdrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-12-20 03:22:49 +0000
commit2ef9c50c6e405717d06362787c4549ca4f1c6485 (patch)
treeee99486567461dd5796f3d6edcc9e204187f2666 /lib/rdoc/markup/parser.rb
parentd7effd506f5b91a636f2e6452ef1946b923007c7 (diff)
Import RDoc 3
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rdoc/markup/parser.rb')
-rw-r--r--lib/rdoc/markup/parser.rb426
1 files changed, 190 insertions, 236 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb
index 9fba69dc29..ea02ee3c5b 100644
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@@ -52,13 +52,13 @@ class RDoc::Markup::Parser
attr_reader :tokens
##
- # Parsers +str+ into a Document
+ # Parses +str+ into a Document
def self.parse str
parser = new
- #parser.debug = true
parser.tokenize str
- RDoc::Markup::Document.new(*parser.parse)
+ doc = RDoc::Markup::Document.new
+ parser.parse doc
end
##
@@ -86,6 +86,7 @@ class RDoc::Markup::Parser
# Builds a Heading of +level+
def build_heading level
+ _, text, = get # TEXT
heading = RDoc::Markup::Heading.new level, text
skip :NEWLINE
@@ -105,38 +106,69 @@ class RDoc::Markup::Parser
case type
when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
- list_type = type
- if column < margin then
+ if column < margin || (list.type && list.type != type) then
unget
break
end
- if list.type and list.type != list_type then
- unget
- break
- end
-
- list.type = list_type
+ list.type = type
+ peek_type, _, column, = peek_token
case type
when :NOTE, :LABEL then
- _, indent, = get # SPACE
- if :NEWLINE == peek_token.first then
- get
- peek_type, new_indent, peek_column, = peek_token
- indent = new_indent if
- peek_type == :INDENT and peek_column >= column
- unget
+ if peek_type == :NEWLINE then
+ # description not on the same line as LABEL/NOTE
+ # skip the trailing newline & any blank lines below
+ while peek_type == :NEWLINE
+ get
+ peek_type, _, column, = peek_token
+ end
+
+ # we may be:
+ # - at end of stream
+ # - at a column < margin:
+ # [text]
+ # blah blah blah
+ # - at the same column, but with a different type of list item
+ # [text]
+ # * blah blah
+ # - at the same column, with the same type of list item
+ # [one]
+ # [two]
+ # In all cases, we have an empty description.
+ # In the last case only, we continue.
+ if peek_type.nil? || column < margin then
+ empty = 1
+ elsif column == margin then
+ case peek_type
+ when type
+ empty = 2 # continue
+ when *LIST_TOKENS
+ empty = 1
+ else
+ empty = 0
+ end
+ else
+ empty = 0
+ end
+
+ if empty > 0 then
+ item = RDoc::Markup::ListItem.new(data)
+ item << RDoc::Markup::BlankLine.new
+ list << item
+ break if empty == 1
+ next
+ end
end
else
data = nil
- _, indent, = get
end
- list_item = build_list_item(margin + indent, data)
+ list_item = RDoc::Markup::ListItem.new data
+ parse list_item, column
+ list << list_item
- list << list_item if list_item
else
unget
break
@@ -151,54 +183,6 @@ class RDoc::Markup::Parser
end
##
- # Builds a ListItem that is flush to +indent+ with type +item_type+
-
- def build_list_item indent, item_type = nil
- p :list_item_start => [indent, item_type] if @debug
-
- list_item = RDoc::Markup::ListItem.new item_type
-
- until @tokens.empty? do
- type, data, column = get
-
- if column < indent and
- not type == :NEWLINE and
- (type != :INDENT or data < indent) then
- unget
- break
- end
-
- case type
- when :INDENT then
- unget
- list_item.push(*parse(indent))
- when :TEXT then
- unget
- list_item << build_paragraph(indent)
- when :HEADER then
- list_item << build_heading(data)
- when :NEWLINE then
- list_item << RDoc::Markup::BlankLine.new
- when *LIST_TOKENS then
- unget
- list_item << build_list(column)
- else
- raise ParseError, "Unhandled token #{@current_token.inspect}"
- end
- end
-
- p :list_item_end => [indent, item_type] if @debug
-
- return nil if list_item.empty?
-
- list_item.parts.shift if
- RDoc::Markup::BlankLine === list_item.parts.first and
- list_item.length > 1
-
- list_item
- end
-
- ##
# Builds a Paragraph that is flush to +margin+
def build_paragraph margin
@@ -209,18 +193,7 @@ class RDoc::Markup::Parser
until @tokens.empty? do
type, data, column, = get
- case type
- when :INDENT then
- next if data == margin and peek_token[0] == :TEXT
-
- unget
- break
- when :TEXT then
- if column != margin then
- unget
- break
- end
-
+ if type == :TEXT && column == margin then
paragraph << data
skip :NEWLINE
else
@@ -235,67 +208,81 @@ class RDoc::Markup::Parser
end
##
- # Builds a Verbatim that is flush to +margin+
+ # Builds a Verbatim that is indented from +margin+.
+ #
+ # The verbatim block is shifted left (the least indented lines start in
+ # column 0). Each part of the verbatim is one line of text, always
+ # terminated by a newline. Blank lines always consist of a single newline
+ # character, and there is never a single newline at the end of the verbatim.
def build_verbatim margin
p :verbatim_begin => margin if @debug
verbatim = RDoc::Markup::Verbatim.new
+ min_indent = nil
+ generate_leading_spaces = true
+ line = ''
+
until @tokens.empty? do
type, data, column, = get
- case type
- when :INDENT then
- if margin >= data then
- unget
- break
- end
+ if type == :NEWLINE then
+ line << data
+ verbatim << line
+ line = ''
+ generate_leading_spaces = true
+ next
+ end
- indent = data - margin
+ if column <= margin
+ unget
+ break
+ end
- verbatim << ' ' * indent
- when :HEADER then
- verbatim << '=' * data
+ if generate_leading_spaces then
+ indent = column - margin
+ line << ' ' * indent
+ min_indent = indent if min_indent.nil? || indent < min_indent
+ generate_leading_spaces = false
+ end
+ case type
+ when :HEADER then
+ line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
- verbatim << ' ' * (peek_column - column - data)
+ indent = peek_column - column - data
+ line << ' ' * indent
when :RULE then
width = 2 + data
- verbatim << '-' * width
-
+ line << '-' * width
_, _, peek_column, = peek_token
- peek_column ||= column + data + 2
- verbatim << ' ' * (peek_column - column - width)
+ peek_column ||= column + width
+ indent = peek_column - column - width
+ line << ' ' * indent
when :TEXT then
- verbatim << data
- when *LIST_TOKENS then
- if column <= margin then
- unget
- break
- end
-
+ line << data
+ else # *LIST_TOKENS
list_marker = case type
- when :BULLET then '*'
- when :LABEL then "[#{data}]"
- when :LALPHA, :NUMBER, :UALPHA then "#{data}."
- when :NOTE then "#{data}::"
+ when :BULLET then data
+ when :LABEL then "[#{data}]"
+ when :NOTE then "#{data}::"
+ else # :LALPHA, :NUMBER, :UALPHA
+ "#{data}."
end
-
- verbatim << list_marker
-
- _, data, = get
-
- verbatim << ' ' * (data - list_marker.length)
- when :NEWLINE then
- verbatim << data
- break unless [:INDENT, :NEWLINE].include? peek_token[0]
- else
- unget
- break
+ line << list_marker
+ peek_type, _, peek_column = peek_token
+ unless peek_type == :NEWLINE then
+ peek_column ||= column + list_marker.length
+ indent = peek_column - column - list_marker.length
+ line << ' ' * indent
+ end
end
+
end
+ verbatim << line << "\n" unless line.empty?
+ verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
verbatim.normalize
p :verbatim_end => margin if @debug
@@ -313,65 +300,60 @@ class RDoc::Markup::Parser
end
##
- # Parses the tokens into a Document
-
- def parse indent = 0
+ # Parses the tokens into an array of RDoc::Markup::XXX objects,
+ # and appends them to the passed +parent+ RDoc::Markup::YYY object.
+ #
+ # Exits at the end of the token stream, or when it encounters a token
+ # in a column less than +indent+ (unless it is a NEWLINE).
+ #
+ # Returns +parent+.
+
+ def parse parent, indent = 0
p :parse_start => indent if @debug
- document = []
-
until @tokens.empty? do
type, data, column, = get
- if type != :INDENT and column < indent then
+ if type == :NEWLINE then
+ # trailing newlines are skipped below, so this is a blank line
+ parent << RDoc::Markup::BlankLine.new
+ skip :NEWLINE, false
+ next
+ end
+
+ # indentation change: break or verbattim
+ if column < indent then
unget
break
+ elsif column > indent then
+ unget
+ parent << build_verbatim(indent)
+ next
end
+ # indentation is the same
case type
when :HEADER then
- document << build_heading(data)
- when :INDENT then
- if indent > data then
- unget
- break
- elsif indent == data then
- next
- end
-
- unget
- document << build_verbatim(indent)
- when :NEWLINE then
- document << RDoc::Markup::BlankLine.new
- skip :NEWLINE, false
+ parent << build_heading(data)
when :RULE then
- document << RDoc::Markup::Rule.new(data)
+ parent << RDoc::Markup::Rule.new(data)
skip :NEWLINE
when :TEXT then
unget
- document << build_paragraph(indent)
-
- # we're done with this paragraph (indent mismatch)
- break if peek_token[0] == :TEXT
+ parent << build_paragraph(indent)
when *LIST_TOKENS then
unget
-
- list = build_list(indent)
-
- document << list if list
-
- # we're done with this list (indent mismatch)
- break if LIST_TOKENS.include? peek_token.first and indent > 0
+ parent << build_list(indent)
else
type, data, column, line = @current_token
- raise ParseError,
- "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
+ raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
end
end
p :parse_end => indent if @debug
- document
+ parent
+
end
##
@@ -384,63 +366,16 @@ class RDoc::Markup::Parser
end
##
- # Skips a token of +token_type+, optionally raising an error.
+ # Skips the next token if its type is +token_type+.
+ #
+ # Optionally raises an error if the next token is not of the expected type.
def skip token_type, error = true
type, = get
-
return unless type # end of stream
-
return @current_token if token_type == type
-
unget
-
- raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
- error
- end
-
- ##
- # Consumes tokens until NEWLINE and turns them back into text
-
- def text
- text = ''
-
- loop do
- type, data, = get
-
- text << case type
- when :BULLET then
- _, space, = get # SPACE
- "*#{' ' * (space - 1)}"
- when :LABEL then
- _, space, = get # SPACE
- "[#{data}]#{' ' * (space - data.length - 2)}"
- when :LALPHA, :NUMBER, :UALPHA then
- _, space, = get # SPACE
- "#{data}.#{' ' * (space - 2)}"
- when :NOTE then
- _, space = get # SPACE
- "#{data}::#{' ' * (space - data.length - 2)}"
- when :TEXT then
- data
- when :NEWLINE then
- unget
- break
- when nil then
- break
- else
- raise ParseError, "unhandled token #{@current_token.inspect}"
- end
- end
-
- text
- end
-
- ##
- # Calculates the column and line of the current token based on +offset+.
-
- def token_pos offset
- [offset - @line_pos, @line]
+ raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
end
##
@@ -455,51 +390,62 @@ class RDoc::Markup::Parser
until s.eos? do
pos = s.pos
+ # leading spaces will be reflected by the column of the next token
+ # the only thing we loose are trailing spaces at the end of the file
+ next if s.scan(/ +/)
+
+ # note: after BULLET, LABEL, etc.,
+ # indent will be the column of the next non-newline token
+
@tokens << case
+ # [CR]LF => :NEWLINE
when s.scan(/\r?\n/) then
token = [:NEWLINE, s.matched, *token_pos(pos)]
@line_pos = s.pos
@line += 1
token
- when s.scan(/ +/) then
- [:INDENT, s.matched_size, *token_pos(pos)]
+ # === text => :HEADER then :TEXT
when s.scan(/(=+)\s*/) then
level = s[1].length
level = 6 if level > 6
@tokens << [:HEADER, level, *token_pos(pos)]
-
pos = s.pos
s.scan(/.*/)
- [:TEXT, s.matched, *token_pos(pos)]
- when s.scan(/^(-{3,}) *$/) then
+ [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
+ # --- (at least 3) and nothing else on the line => :RULE
+ when s.scan(/(-{3,}) *$/) then
[:RULE, s[1].length - 2, *token_pos(pos)]
- when s.scan(/([*-])\s+/) then
- @tokens << [:BULLET, :BULLET, *token_pos(pos)]
- [:SPACE, s.matched_size, *token_pos(pos)]
- when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then
+ # * or - followed by white space and text => :BULLET
+ when s.scan(/([*-]) +(\S)/) then
+ s.pos -= s[2].bytesize # unget \S
+ [:BULLET, s[1], *token_pos(pos)]
+ # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
+ when s.scan(/([a-z]|\d+)\. +(\S)/i) then
+ # FIXME if tab(s), the column will be wrong
+ # either support tabs everywhere by first expanding them to
+ # spaces, or assume that they will have been replaced
+ # before (and provide a check for that at least in debug
+ # mode)
list_label = s[1]
- width = s.matched_size - 1
-
- s.pos -= 1 # unget \S
-
- list_type = case list_label
- when /[a-z]/ then :LALPHA
- when /[A-Z]/ then :UALPHA
- when /\d/ then :NUMBER
- else
- raise ParseError, "BUG token #{list_label}"
- end
-
- @tokens << [list_type, list_label, *token_pos(pos)]
- [:SPACE, width, *token_pos(pos)]
+ s.pos -= s[2].bytesize # unget \S
+ list_type =
+ case list_label
+ when /[a-z]/ then :LALPHA
+ when /[A-Z]/ then :UALPHA
+ when /\d/ then :NUMBER
+ else
+ raise ParseError, "BUG token #{list_label}"
+ end
+ [list_type, list_label, *token_pos(pos)]
+ # [text] followed by spaces or end of line => :LABEL
when s.scan(/\[(.*?)\]( +|$)/) then
- @tokens << [:LABEL, s[1], *token_pos(pos)]
- [:SPACE, s.matched_size, *token_pos(pos)]
+ [:LABEL, s[1], *token_pos(pos)]
+ # text:: followed by spaces or end of line => :NOTE
when s.scan(/(.*?)::( +|$)/) then
- @tokens << [:NOTE, s[1], *token_pos(pos)]
- [:SPACE, s.matched_size, *token_pos(pos)]
+ [:NOTE, s[1], *token_pos(pos)]
+ # anything else: :TEXT
else s.scan(/.*/)
- [:TEXT, s.matched, *token_pos(pos)]
+ [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
end
end
@@ -507,9 +453,17 @@ class RDoc::Markup::Parser
end
##
- # Returns the current token or +token+ to the token stream
+ # Calculates the column and line of the current token based on +offset+.
+
+ def token_pos offset
+ [offset - @line_pos, @line]
+ end
+
+ ##
+ # Returns the current token to the token stream
- def unget token = @current_token
+ def unget
+ token = @current_token
p :unget => token if @debug
raise Error, 'too many #ungets' if token == @tokens.first
@tokens.unshift token if token