diff options
Diffstat (limited to 'lib/rdoc/markup/parser.rb')
| -rw-r--r-- | lib/rdoc/markup/parser.rb | 529 |
1 files changed, 0 insertions, 529 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb deleted file mode 100644 index 9fba69dc29..0000000000 --- a/lib/rdoc/markup/parser.rb +++ /dev/null @@ -1,529 +0,0 @@ -require 'strscan' -require 'rdoc/text' - -## -# A recursive-descent parser for RDoc markup. -# -# The parser tokenizes an input string then parses the tokens into a Document. -# Documents can be converted into output formats by writing a visitor like -# RDoc::Markup::ToHTML. -# -# The parser only handles the block-level constructs Paragraph, List, -# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as -# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager. -# -# To see what markup the Parser implements read RDoc. To see how to use -# RDoc markup to format text in your program read RDoc::Markup. - -class RDoc::Markup::Parser - - include RDoc::Text - - ## - # List token types - - LIST_TOKENS = [ - :BULLET, - :LABEL, - :LALPHA, - :NOTE, - :NUMBER, - :UALPHA, - ] - - ## - # Parser error subclass - - class Error < RuntimeError; end - - ## - # Raised when the parser is unable to handle the given markup - - class ParseError < Error; end - - ## - # Enables display of debugging information - - attr_accessor :debug - - ## - # Token accessor - - attr_reader :tokens - - ## - # Parsers +str+ into a Document - - def self.parse str - parser = new - #parser.debug = true - parser.tokenize str - RDoc::Markup::Document.new(*parser.parse) - end - - ## - # Returns a token stream for +str+, for testing - - def self.tokenize str - parser = new - parser.tokenize str - parser.tokens - end - - ## - # Creates a new Parser. See also ::parse - - def initialize - @tokens = [] - @current_token = nil - @debug = false - - @line = 0 - @line_pos = 0 - end - - ## - # Builds a Heading of +level+ - - def build_heading level - heading = RDoc::Markup::Heading.new level, text - skip :NEWLINE - - heading - end - - ## - # Builds a List flush to +margin+ - - def build_list margin - p :list_start => margin if @debug - - list = RDoc::Markup::List.new - - until @tokens.empty? do - type, data, column, = get - - case type - when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then - list_type = type - - if column < margin then - unget - break - end - - if list.type and list.type != list_type then - unget - break - end - - list.type = list_type - - case type - when :NOTE, :LABEL then - _, indent, = get # SPACE - if :NEWLINE == peek_token.first then - get - peek_type, new_indent, peek_column, = peek_token - indent = new_indent if - peek_type == :INDENT and peek_column >= column - unget - end - else - data = nil - _, indent, = get - end - - list_item = build_list_item(margin + indent, data) - - list << list_item if list_item - else - unget - break - end - end - - p :list_end => margin if @debug - - return nil if list.empty? - - list - end - - ## - # Builds a ListItem that is flush to +indent+ with type +item_type+ - - def build_list_item indent, item_type = nil - p :list_item_start => [indent, item_type] if @debug - - list_item = RDoc::Markup::ListItem.new item_type - - until @tokens.empty? do - type, data, column = get - - if column < indent and - not type == :NEWLINE and - (type != :INDENT or data < indent) then - unget - break - end - - case type - when :INDENT then - unget - list_item.push(*parse(indent)) - when :TEXT then - unget - list_item << build_paragraph(indent) - when :HEADER then - list_item << build_heading(data) - when :NEWLINE then - list_item << RDoc::Markup::BlankLine.new - when *LIST_TOKENS then - unget - list_item << build_list(column) - else - raise ParseError, "Unhandled token #{@current_token.inspect}" - end - end - - p :list_item_end => [indent, item_type] if @debug - - return nil if list_item.empty? - - list_item.parts.shift if - RDoc::Markup::BlankLine === list_item.parts.first and - list_item.length > 1 - - list_item - end - - ## - # Builds a Paragraph that is flush to +margin+ - - def build_paragraph margin - p :paragraph_start => margin if @debug - - paragraph = RDoc::Markup::Paragraph.new - - until @tokens.empty? do - type, data, column, = get - - case type - when :INDENT then - next if data == margin and peek_token[0] == :TEXT - - unget - break - when :TEXT then - if column != margin then - unget - break - end - - paragraph << data - skip :NEWLINE - else - unget - break - end - end - - p :paragraph_end => margin if @debug - - paragraph - end - - ## - # Builds a Verbatim that is flush to +margin+ - - def build_verbatim margin - p :verbatim_begin => margin if @debug - verbatim = RDoc::Markup::Verbatim.new - - until @tokens.empty? do - type, data, column, = get - - case type - when :INDENT then - if margin >= data then - unget - break - end - - indent = data - margin - - verbatim << ' ' * indent - when :HEADER then - verbatim << '=' * data - - _, _, peek_column, = peek_token - peek_column ||= column + data - verbatim << ' ' * (peek_column - column - data) - when :RULE then - width = 2 + data - verbatim << '-' * width - - _, _, peek_column, = peek_token - peek_column ||= column + data + 2 - verbatim << ' ' * (peek_column - column - width) - when :TEXT then - verbatim << data - when *LIST_TOKENS then - if column <= margin then - unget - break - end - - list_marker = case type - when :BULLET then '*' - when :LABEL then "[#{data}]" - when :LALPHA, :NUMBER, :UALPHA then "#{data}." - when :NOTE then "#{data}::" - end - - verbatim << list_marker - - _, data, = get - - verbatim << ' ' * (data - list_marker.length) - when :NEWLINE then - verbatim << data - break unless [:INDENT, :NEWLINE].include? peek_token[0] - else - unget - break - end - end - - verbatim.normalize - - p :verbatim_end => margin if @debug - - verbatim - end - - ## - # Pulls the next token from the stream. - - def get - @current_token = @tokens.shift - p :get => @current_token if @debug - @current_token - end - - ## - # Parses the tokens into a Document - - def parse indent = 0 - p :parse_start => indent if @debug - - document = [] - - until @tokens.empty? do - type, data, column, = get - - if type != :INDENT and column < indent then - unget - break - end - - case type - when :HEADER then - document << build_heading(data) - when :INDENT then - if indent > data then - unget - break - elsif indent == data then - next - end - - unget - document << build_verbatim(indent) - when :NEWLINE then - document << RDoc::Markup::BlankLine.new - skip :NEWLINE, false - when :RULE then - document << RDoc::Markup::Rule.new(data) - skip :NEWLINE - when :TEXT then - unget - document << build_paragraph(indent) - - # we're done with this paragraph (indent mismatch) - break if peek_token[0] == :TEXT - when *LIST_TOKENS then - unget - - list = build_list(indent) - - document << list if list - - # we're done with this list (indent mismatch) - break if LIST_TOKENS.include? peek_token.first and indent > 0 - else - type, data, column, line = @current_token - raise ParseError, - "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" - end - end - - p :parse_end => indent if @debug - - document - end - - ## - # Returns the next token on the stream without modifying the stream - - def peek_token - token = @tokens.first || [] - p :peek => token if @debug - token - end - - ## - # Skips a token of +token_type+, optionally raising an error. - - def skip token_type, error = true - type, = get - - return unless type # end of stream - - return @current_token if token_type == type - - unget - - raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if - error - end - - ## - # Consumes tokens until NEWLINE and turns them back into text - - def text - text = '' - - loop do - type, data, = get - - text << case type - when :BULLET then - _, space, = get # SPACE - "*#{' ' * (space - 1)}" - when :LABEL then - _, space, = get # SPACE - "[#{data}]#{' ' * (space - data.length - 2)}" - when :LALPHA, :NUMBER, :UALPHA then - _, space, = get # SPACE - "#{data}.#{' ' * (space - 2)}" - when :NOTE then - _, space = get # SPACE - "#{data}::#{' ' * (space - data.length - 2)}" - when :TEXT then - data - when :NEWLINE then - unget - break - when nil then - break - else - raise ParseError, "unhandled token #{@current_token.inspect}" - end - end - - text - end - - ## - # Calculates the column and line of the current token based on +offset+. - - def token_pos offset - [offset - @line_pos, @line] - end - - ## - # Turns text +input+ into a stream of tokens - - def tokenize input - s = StringScanner.new input - - @line = 0 - @line_pos = 0 - - until s.eos? do - pos = s.pos - - @tokens << case - when s.scan(/\r?\n/) then - token = [:NEWLINE, s.matched, *token_pos(pos)] - @line_pos = s.pos - @line += 1 - token - when s.scan(/ +/) then - [:INDENT, s.matched_size, *token_pos(pos)] - when s.scan(/(=+)\s*/) then - level = s[1].length - level = 6 if level > 6 - @tokens << [:HEADER, level, *token_pos(pos)] - - pos = s.pos - s.scan(/.*/) - [:TEXT, s.matched, *token_pos(pos)] - when s.scan(/^(-{3,}) *$/) then - [:RULE, s[1].length - 2, *token_pos(pos)] - when s.scan(/([*-])\s+/) then - @tokens << [:BULLET, :BULLET, *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] - when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then - list_label = s[1] - width = s.matched_size - 1 - - s.pos -= 1 # unget \S - - list_type = case list_label - when /[a-z]/ then :LALPHA - when /[A-Z]/ then :UALPHA - when /\d/ then :NUMBER - else - raise ParseError, "BUG token #{list_label}" - end - - @tokens << [list_type, list_label, *token_pos(pos)] - [:SPACE, width, *token_pos(pos)] - when s.scan(/\[(.*?)\]( +|$)/) then - @tokens << [:LABEL, s[1], *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] - when s.scan(/(.*?)::( +|$)/) then - @tokens << [:NOTE, s[1], *token_pos(pos)] - [:SPACE, s.matched_size, *token_pos(pos)] - else s.scan(/.*/) - [:TEXT, s.matched, *token_pos(pos)] - end - end - - self - end - - ## - # Returns the current token or +token+ to the token stream - - def unget token = @current_token - p :unget => token if @debug - raise Error, 'too many #ungets' if token == @tokens.first - @tokens.unshift token if token - end - -end - -require 'rdoc/markup/blank_line' -require 'rdoc/markup/document' -require 'rdoc/markup/heading' -require 'rdoc/markup/list' -require 'rdoc/markup/list_item' -require 'rdoc/markup/raw' -require 'rdoc/markup/paragraph' -require 'rdoc/markup/rule' -require 'rdoc/markup/verbatim' - |
