diff options
Diffstat (limited to 'ruby_1_9_3/lib/rdoc/markup/parser.rb')
-rw-r--r-- | ruby_1_9_3/lib/rdoc/markup/parser.rb | 497 |
1 files changed, 0 insertions, 497 deletions
diff --git a/ruby_1_9_3/lib/rdoc/markup/parser.rb b/ruby_1_9_3/lib/rdoc/markup/parser.rb deleted file mode 100644 index c18ce821fb..0000000000 --- a/ruby_1_9_3/lib/rdoc/markup/parser.rb +++ /dev/null @@ -1,497 +0,0 @@ -require 'strscan' -require 'rdoc/text' - -## -# A recursive-descent parser for RDoc markup. -# -# The parser tokenizes an input string then parses the tokens into a Document. -# Documents can be converted into output formats by writing a visitor like -# RDoc::Markup::ToHTML. -# -# The parser only handles the block-level constructs Paragraph, List, -# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as -# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager. -# -# To see what markup the Parser implements read RDoc. To see how to use -# RDoc markup to format text in your program read RDoc::Markup. - -class RDoc::Markup::Parser - - include RDoc::Text - - ## - # List token types - - LIST_TOKENS = [ - :BULLET, - :LABEL, - :LALPHA, - :NOTE, - :NUMBER, - :UALPHA, - ] - - ## - # Parser error subclass - - class Error < RuntimeError; end - - ## - # Raised when the parser is unable to handle the given markup - - class ParseError < Error; end - - ## - # Enables display of debugging information - - attr_accessor :debug - - ## - # Token accessor - - attr_reader :tokens - - ## - # Parses +str+ into a Document - - def self.parse str - parser = new - parser.tokenize str - doc = RDoc::Markup::Document.new - parser.parse doc - end - - ## - # Returns a token stream for +str+, for testing - - def self.tokenize str - parser = new - parser.tokenize str - parser.tokens - end - - ## - # Creates a new Parser. See also ::parse - - def initialize - @tokens = [] - @current_token = nil - @debug = false - - @line = 0 - @line_pos = 0 - end - - ## - # Builds a Heading of +level+ - - def build_heading level - type, text, = get - - text = case type - when :TEXT then - skip :NEWLINE - text - else - unget - '' - end - - RDoc::Markup::Heading.new level, text - end - - ## - # Builds a List flush to +margin+ - - def build_list margin - p :list_start => margin if @debug - - list = RDoc::Markup::List.new - - until @tokens.empty? do - type, data, column, = get - - case type - when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then - - if column < margin || (list.type && list.type != type) then - unget - break - end - - list.type = type - peek_type, _, column, = peek_token - - case type - when :NOTE, :LABEL then - if peek_type == :NEWLINE then - # description not on the same line as LABEL/NOTE - # skip the trailing newline & any blank lines below - while peek_type == :NEWLINE - get - peek_type, _, column, = peek_token - end - - # we may be: - # - at end of stream - # - at a column < margin: - # [text] - # blah blah blah - # - at the same column, but with a different type of list item - # [text] - # * blah blah - # - at the same column, with the same type of list item - # [one] - # [two] - # In all cases, we have an empty description. - # In the last case only, we continue. - if peek_type.nil? || column < margin then - empty = 1 - elsif column == margin then - case peek_type - when type - empty = 2 # continue - when *LIST_TOKENS - empty = 1 - else - empty = 0 - end - else - empty = 0 - end - - if empty > 0 then - item = RDoc::Markup::ListItem.new(data) - item << RDoc::Markup::BlankLine.new - list << item - break if empty == 1 - next - end - end - else - data = nil - end - - list_item = RDoc::Markup::ListItem.new data - parse list_item, column - list << list_item - - else - unget - break - end - end - - p :list_end => margin if @debug - - return nil if list.empty? - - list - end - - ## - # Builds a Paragraph that is flush to +margin+ - - def build_paragraph margin - p :paragraph_start => margin if @debug - - paragraph = RDoc::Markup::Paragraph.new - - until @tokens.empty? do - type, data, column, = get - - if type == :TEXT && column == margin then - paragraph << data - skip :NEWLINE - else - unget - break - end - end - - p :paragraph_end => margin if @debug - - paragraph - end - - ## - # Builds a Verbatim that is indented from +margin+. - # - # The verbatim block is shifted left (the least indented lines start in - # column 0). Each part of the verbatim is one line of text, always - # terminated by a newline. Blank lines always consist of a single newline - # character, and there is never a single newline at the end of the verbatim. - - def build_verbatim margin - p :verbatim_begin => margin if @debug - verbatim = RDoc::Markup::Verbatim.new - - min_indent = nil - generate_leading_spaces = true - line = '' - - until @tokens.empty? do - type, data, column, = get - - if type == :NEWLINE then - line << data - verbatim << line - line = '' - generate_leading_spaces = true - next - end - - if column <= margin - unget - break - end - - if generate_leading_spaces then - indent = column - margin - line << ' ' * indent - min_indent = indent if min_indent.nil? || indent < min_indent - generate_leading_spaces = false - end - - case type - when :HEADER then - line << '=' * data - _, _, peek_column, = peek_token - peek_column ||= column + data - indent = peek_column - column - data - line << ' ' * indent - when :RULE then - width = 2 + data - line << '-' * width - _, _, peek_column, = peek_token - peek_column ||= column + width - indent = peek_column - column - width - line << ' ' * indent - when :TEXT then - line << data - else # *LIST_TOKENS - list_marker = case type - when :BULLET then data - when :LABEL then "[#{data}]" - when :NOTE then "#{data}::" - else # :LALPHA, :NUMBER, :UALPHA - "#{data}." - end - line << list_marker - peek_type, _, peek_column = peek_token - unless peek_type == :NEWLINE then - peek_column ||= column + list_marker.length - indent = peek_column - column - list_marker.length - line << ' ' * indent - end - end - - end - - verbatim << line << "\n" unless line.empty? - verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0 - verbatim.normalize - - p :verbatim_end => margin if @debug - - verbatim - end - - ## - # Pulls the next token from the stream. - - def get - @current_token = @tokens.shift - p :get => @current_token if @debug - @current_token - end - - ## - # Parses the tokens into an array of RDoc::Markup::XXX objects, - # and appends them to the passed +parent+ RDoc::Markup::YYY object. - # - # Exits at the end of the token stream, or when it encounters a token - # in a column less than +indent+ (unless it is a NEWLINE). - # - # Returns +parent+. - - def parse parent, indent = 0 - p :parse_start => indent if @debug - - until @tokens.empty? do - type, data, column, = get - - if type == :NEWLINE then - # trailing newlines are skipped below, so this is a blank line - parent << RDoc::Markup::BlankLine.new - skip :NEWLINE, false - next - end - - # indentation change: break or verbatim - if column < indent then - unget - break - elsif column > indent then - unget - parent << build_verbatim(indent) - next - end - - # indentation is the same - case type - when :HEADER then - parent << build_heading(data) - when :RULE then - parent << RDoc::Markup::Rule.new(data) - skip :NEWLINE - when :TEXT then - unget - parent << build_paragraph(indent) - when *LIST_TOKENS then - unget - parent << build_list(indent) - else - type, data, column, line = @current_token - raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" - end - end - - p :parse_end => indent if @debug - - parent - - end - - ## - # Returns the next token on the stream without modifying the stream - - def peek_token - token = @tokens.first || [] - p :peek => token if @debug - token - end - - ## - # Skips the next token if its type is +token_type+. - # - # Optionally raises an error if the next token is not of the expected type. - - def skip token_type, error = true - type, = get - return unless type # end of stream - return @current_token if token_type == type - unget - raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error - end - - ## - # Turns text +input+ into a stream of tokens - - def tokenize input - s = StringScanner.new input - - @line = 0 - @line_pos = 0 - - until s.eos? do - pos = s.pos - - # leading spaces will be reflected by the column of the next token - # the only thing we loose are trailing spaces at the end of the file - next if s.scan(/ +/) - - # note: after BULLET, LABEL, etc., - # indent will be the column of the next non-newline token - - @tokens << case - # [CR]LF => :NEWLINE - when s.scan(/\r?\n/) then - token = [:NEWLINE, s.matched, *token_pos(pos)] - @line_pos = s.pos - @line += 1 - token - # === text => :HEADER then :TEXT - when s.scan(/(=+)(\s*)/) then - level = s[1].length - header = [:HEADER, level, *token_pos(pos)] - - if s[2] =~ /^\r?\n/ then - s.pos -= s[2].length - header - else - pos = s.pos - s.scan(/.*/) - @tokens << header - [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] - end - # --- (at least 3) and nothing else on the line => :RULE - when s.scan(/(-{3,}) *$/) then - [:RULE, s[1].length - 2, *token_pos(pos)] - # * or - followed by white space and text => :BULLET - when s.scan(/([*-]) +(\S)/) then - s.pos -= s[2].bytesize # unget \S - [:BULLET, s[1], *token_pos(pos)] - # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER - when s.scan(/([a-z]|\d+)\. +(\S)/i) then - # FIXME if tab(s), the column will be wrong - # either support tabs everywhere by first expanding them to - # spaces, or assume that they will have been replaced - # before (and provide a check for that at least in debug - # mode) - list_label = s[1] - s.pos -= s[2].bytesize # unget \S - list_type = - case list_label - when /[a-z]/ then :LALPHA - when /[A-Z]/ then :UALPHA - when /\d/ then :NUMBER - else - raise ParseError, "BUG token #{list_label}" - end - [list_type, list_label, *token_pos(pos)] - # [text] followed by spaces or end of line => :LABEL - when s.scan(/\[(.*?)\]( +|$)/) then - [:LABEL, s[1], *token_pos(pos)] - # text:: followed by spaces or end of line => :NOTE - when s.scan(/(.*?)::( +|$)/) then - [:NOTE, s[1], *token_pos(pos)] - # anything else: :TEXT - else s.scan(/.*/) - [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] - end - end - - self - end - - ## - # Calculates the column and line of the current token based on +offset+. - - def token_pos offset - [offset - @line_pos, @line] - end - - ## - # Returns the current token to the token stream - - def unget - token = @current_token - p :unget => token if @debug - raise Error, 'too many #ungets' if token == @tokens.first - @tokens.unshift token if token - end - -end - -require 'rdoc/markup/blank_line' -require 'rdoc/markup/document' -require 'rdoc/markup/heading' -require 'rdoc/markup/list' -require 'rdoc/markup/list_item' -require 'rdoc/markup/raw' -require 'rdoc/markup/paragraph' -require 'rdoc/markup/indented_paragraph' -require 'rdoc/markup/rule' -require 'rdoc/markup/verbatim' - |