diff options
author | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-12-20 03:22:49 +0000 |
---|---|---|
committer | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-12-20 03:22:49 +0000 |
commit | 2ef9c50c6e405717d06362787c4549ca4f1c6485 (patch) | |
tree | ee99486567461dd5796f3d6edcc9e204187f2666 /lib/rdoc/markup/parser.rb | |
parent | d7effd506f5b91a636f2e6452ef1946b923007c7 (diff) |
Import RDoc 3
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rdoc/markup/parser.rb')
-rw-r--r-- | lib/rdoc/markup/parser.rb | 426 |
1 files changed, 190 insertions, 236 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb index 9fba69dc29..ea02ee3c5b 100644 --- a/lib/rdoc/markup/parser.rb +++ b/lib/rdoc/markup/parser.rb @@ -52,13 +52,13 @@ class RDoc::Markup::Parser attr_reader :tokens ## - # Parsers +str+ into a Document + # Parses +str+ into a Document def self.parse str parser = new - #parser.debug = true parser.tokenize str - RDoc::Markup::Document.new(*parser.parse) + doc = RDoc::Markup::Document.new + parser.parse doc end ## @@ -86,6 +86,7 @@ class RDoc::Markup::Parser # Builds a Heading of +level+ def build_heading level + _, text, = get # TEXT heading = RDoc::Markup::Heading.new level, text skip :NEWLINE @@ -105,38 +106,69 @@ class RDoc::Markup::Parser case type when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then - list_type = type - if column < margin then + if column < margin || (list.type && list.type != type) then unget break end - if list.type and list.type != list_type then - unget - break - end - - list.type = list_type + list.type = type + peek_type, _, column, = peek_token case type when :NOTE, :LABEL then - _, indent, = get # SPACE - if :NEWLINE == peek_token.first then - get - peek_type, new_indent, peek_column, = peek_token - indent = new_indent if - peek_type == :INDENT and peek_column >= column - unget + if peek_type == :NEWLINE then + # description not on the same line as LABEL/NOTE + # skip the trailing newline & any blank lines below + while peek_type == :NEWLINE + get + peek_type, _, column, = peek_token + end + + # we may be: + # - at end of stream + # - at a column < margin: + # [text] + # blah blah blah + # - at the same column, but with a different type of list item + # [text] + # * blah blah + # - at the same column, with the same type of list item + # [one] + # [two] + # In all cases, we have an empty description. + # In the last case only, we continue. + if peek_type.nil? || column < margin then + empty = 1 + elsif column == margin then + case peek_type + when type + empty = 2 # continue + when *LIST_TOKENS + empty = 1 + else + empty = 0 + end + else + empty = 0 + end + + if empty > 0 then + item = RDoc::Markup::ListItem.new(data) + item << RDoc::Markup::BlankLine.new + list << item + break if empty == 1 + next + end end else data = nil - _, indent, = get end - list_item = build_list_item(margin + indent, data) + list_item = RDoc::Markup::ListItem.new data + parse list_item, column + list << list_item - list << list_item if list_item else unget break @@ -151,54 +183,6 @@ class RDoc::Markup::Parser end ## - # Builds a ListItem that is flush to +indent+ with type +item_type+ - - def build_list_item indent, item_type = nil - p :list_item_start => [indent, item_type] if @debug - - list_item = RDoc::Markup::ListItem.new item_type - - until @tokens.empty? do - type, data, column = get - - if column < indent and - not type == :NEWLINE and - (type != :INDENT or data < indent) then - unget - break - end - - case type - when :INDENT then - unget - list_item.push(*parse(indent)) - when :TEXT then - unget - list_item << build_paragraph(indent) - when :HEADER then - list_item << build_heading(data) - when :NEWLINE then - list_item << RDoc::Markup::BlankLine.new - when *LIST_TOKENS then - unget - list_item << build_list(column) - else - raise ParseError, "Unhandled token #{@current_token.inspect}" - end - end - - p :list_item_end => [indent, item_type] if @debug - - return nil if list_item.empty? - - list_item.parts.shift if - RDoc::Markup::BlankLine === list_item.parts.first and - list_item.length > 1 - - list_item - end - - ## # Builds a Paragraph that is flush to +margin+ def build_paragraph margin @@ -209,18 +193,7 @@ class RDoc::Markup::Parser until @tokens.empty? do type, data, column, = get - case type - when :INDENT then - next if data == margin and peek_token[0] == :TEXT - - unget - break - when :TEXT then - if column != margin then - unget - break - end - + if type == :TEXT && column == margin then paragraph << data skip :NEWLINE else @@ -235,67 +208,81 @@ class RDoc::Markup::Parser end ## - # Builds a Verbatim that is flush to +margin+ + # Builds a Verbatim that is indented from +margin+. + # + # The verbatim block is shifted left (the least indented lines start in + # column 0). Each part of the verbatim is one line of text, always + # terminated by a newline. Blank lines always consist of a single newline + # character, and there is never a single newline at the end of the verbatim. def build_verbatim margin p :verbatim_begin => margin if @debug verbatim = RDoc::Markup::Verbatim.new + min_indent = nil + generate_leading_spaces = true + line = '' + until @tokens.empty? do type, data, column, = get - case type - when :INDENT then - if margin >= data then - unget - break - end + if type == :NEWLINE then + line << data + verbatim << line + line = '' + generate_leading_spaces = true + next + end - indent = data - margin + if column <= margin + unget + break + end - verbatim << ' ' * indent - when :HEADER then - verbatim << '=' * data + if generate_leading_spaces then + indent = column - margin + line << ' ' * indent + min_indent = indent if min_indent.nil? || indent < min_indent + generate_leading_spaces = false + end + case type + when :HEADER then + line << '=' * data _, _, peek_column, = peek_token peek_column ||= column + data - verbatim << ' ' * (peek_column - column - data) + indent = peek_column - column - data + line << ' ' * indent when :RULE then width = 2 + data - verbatim << '-' * width - + line << '-' * width _, _, peek_column, = peek_token - peek_column ||= column + data + 2 - verbatim << ' ' * (peek_column - column - width) + peek_column ||= column + width + indent = peek_column - column - width + line << ' ' * indent when :TEXT then - verbatim << data - when *LIST_TOKENS then - if column <= margin then - unget - break - end - + line << data + else # *LIST_TOKENS list_marker = case type - when :BULLET then '*' - when :LABEL then "[#{data}]" - when :LALPHA, :NUMBER, :UALPHA then "#{data}." - when :NOTE then "#{data}::" + when :BULLET then data + when :LABEL then "[#{data}]" + when :NOTE then "#{data}::" + else # :LALPHA, :NUMBER, :UALPHA + "#{data}." end - - verbatim << list_marker - - _, data, = get - - verbatim << ' ' * (data - list_marker.length) - when :NEWLINE then - verbatim << data - break unless [:INDENT, :NEWLINE].include? peek_token[0] - else - unget - break + line << list_marker + peek_type, _, peek_column = peek_token + unless peek_type == :NEWLINE then + peek_column ||= column + list_marker.length + indent = peek_column - column - list_marker.length + line << ' ' * indent + end end + end + verbatim << line << "\n" unless line.empty? + verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0 verbatim.normalize p :verbatim_end => margin if @debug @@ -313,65 +300,60 @@ class RDoc::Markup::Parser end ## - # Parses the tokens into a Document - - def parse indent = 0 + # Parses the tokens into an array of RDoc::Markup::XXX objects, + # and appends them to the passed +parent+ RDoc::Markup::YYY object. + # + # Exits at the end of the token stream, or when it encounters a token + # in a column less than +indent+ (unless it is a NEWLINE). + # + # Returns +parent+. + + def parse parent, indent = 0 p :parse_start => indent if @debug - document = [] - until @tokens.empty? do type, data, column, = get - if type != :INDENT and column < indent then + if type == :NEWLINE then + # trailing newlines are skipped below, so this is a blank line + parent << RDoc::Markup::BlankLine.new + skip :NEWLINE, false + next + end + + # indentation change: break or verbattim + if column < indent then unget break + elsif column > indent then + unget + parent << build_verbatim(indent) + next end + # indentation is the same case type when :HEADER then - document << build_heading(data) - when :INDENT then - if indent > data then - unget - break - elsif indent == data then - next - end - - unget - document << build_verbatim(indent) - when :NEWLINE then - document << RDoc::Markup::BlankLine.new - skip :NEWLINE, false + parent << build_heading(data) when :RULE then - document << RDoc::Markup::Rule.new(data) + parent << RDoc::Markup::Rule.new(data) skip :NEWLINE when :TEXT then unget - document << build_paragraph(indent) - - # we're done with this paragraph (indent mismatch) - break if peek_token[0] == :TEXT + parent << build_paragraph(indent) when *LIST_TOKENS then unget - - list = build_list(indent) - - document << list if list - - # we're done with this list (indent mismatch) - break if LIST_TOKENS.include? peek_token.first and indent > 0 + parent << build_list(indent) else type, data, column, line = @current_token - raise ParseError, - "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" + raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" end end p :parse_end => indent if @debug - document + parent + end ## @@ -384,63 +366,16 @@ class RDoc::Markup::Parser end ## - # Skips a token of +token_type+, optionally raising an error. + # Skips the next token if its type is +token_type+. + # + # Optionally raises an error if the next token is not of the expected type. def skip token_type, error = true type, = get - return unless type # end of stream - return @current_token if token_type == type - unget - - raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if - error - end - - ## - # Consumes tokens until NEWLINE and turns them back into text - - def text - text = '' - - loop do - type, data, = get - - text << case type - when :BULLET then - _, space, = get # SPACE - "*#{' ' * (space - 1)}" - when :LABEL then - _, space, = get # SPACE - "[#{data}]#{' ' * (space - data.length - 2)}" - when :LALPHA, :NUMBER, :UALPHA then - _, space, = get # SPACE - "#{data}.#{' ' * (space - 2)}" - when :NOTE then - _, space = get # SPACE - "#{data}::#{' ' * (space - data.length - 2)}" - when :TEXT then - data - when :NEWLINE then - unget - break - when nil then - break - else - raise ParseError, "unhandled token #{@current_token.inspect}" - end - end - - text - end - - ## - # Calculates the column and line of the current token based on +offset+. - - def token_pos offset - [offset - @line_pos, @line] + raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error end ## @@ -455,51 +390,62 @@ class RDoc::Markup::Parser until s.eos? do pos = s.pos + # leading spaces will be reflected by the column of the next token + # the only thing we loose are trailing spaces at the end of the file + next if s.scan(/ +/) + + # note: after BULLET, LABEL, etc., + # indent will be the column of the next non-newline token + @tokens << case + # [CR]LF => :NEWLINE when s.scan(/\r?\n/) then token = [:NEWLINE, s.matched, *token_pos(pos)] @line_pos = s.pos @line += 1 token - when s.scan(/ +/) then - [:INDENT, s.matched_size, *token_pos(pos)] + # === text => :HEADER then :TEXT when s.scan(/(=+)\s*/) then level = s[1].length level = 6 if level > 6 @tokens << [:HEADER, level, *token_pos(pos)] - pos = s.pos s.scan(/.*/) - [:TEXT, s.matched, *token_pos(pos)] - when s.scan(/^(-{3,}) *$/) then + [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] + # --- (at least 3) and nothing else on the line => :RULE + when s.scan(/(-{3,}) *$/) then [:RULE, s[1].length - 2, *token_pos(pos)] - when s.scan(/([*-])\s+/) then - @tokens << [:BULLET, :BULLET, *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] - when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then + # * or - followed by white space and text => :BULLET + when s.scan(/([*-]) +(\S)/) then + s.pos -= s[2].bytesize # unget \S + [:BULLET, s[1], *token_pos(pos)] + # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER + when s.scan(/([a-z]|\d+)\. +(\S)/i) then + # FIXME if tab(s), the column will be wrong + # either support tabs everywhere by first expanding them to + # spaces, or assume that they will have been replaced + # before (and provide a check for that at least in debug + # mode) list_label = s[1] - width = s.matched_size - 1 - - s.pos -= 1 # unget \S - - list_type = case list_label - when /[a-z]/ then :LALPHA - when /[A-Z]/ then :UALPHA - when /\d/ then :NUMBER - else - raise ParseError, "BUG token #{list_label}" - end - - @tokens << [list_type, list_label, *token_pos(pos)] - [:SPACE, width, *token_pos(pos)] + s.pos -= s[2].bytesize # unget \S + list_type = + case list_label + when /[a-z]/ then :LALPHA + when /[A-Z]/ then :UALPHA + when /\d/ then :NUMBER + else + raise ParseError, "BUG token #{list_label}" + end + [list_type, list_label, *token_pos(pos)] + # [text] followed by spaces or end of line => :LABEL when s.scan(/\[(.*?)\]( +|$)/) then - @tokens << [:LABEL, s[1], *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] + [:LABEL, s[1], *token_pos(pos)] + # text:: followed by spaces or end of line => :NOTE when s.scan(/(.*?)::( +|$)/) then - @tokens << [:NOTE, s[1], *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] + [:NOTE, s[1], *token_pos(pos)] + # anything else: :TEXT else s.scan(/.*/) - [:TEXT, s.matched, *token_pos(pos)] + [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] end end @@ -507,9 +453,17 @@ class RDoc::Markup::Parser end ## - # Returns the current token or +token+ to the token stream + # Calculates the column and line of the current token based on +offset+. + + def token_pos offset + [offset - @line_pos, @line] + end + + ## + # Returns the current token to the token stream - def unget token = @current_token + def unget + token = @current_token p :unget => token if @debug raise Error, 'too many #ungets' if token == @tokens.first @tokens.unshift token if token |