summaryrefslogtreecommitdiff
path: root/ruby_1_9_3/lib/rdoc/markup/parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'ruby_1_9_3/lib/rdoc/markup/parser.rb')
-rw-r--r--ruby_1_9_3/lib/rdoc/markup/parser.rb497
1 files changed, 497 insertions, 0 deletions
diff --git a/ruby_1_9_3/lib/rdoc/markup/parser.rb b/ruby_1_9_3/lib/rdoc/markup/parser.rb
new file mode 100644
index 0000000000..c18ce821fb
--- /dev/null
+++ b/ruby_1_9_3/lib/rdoc/markup/parser.rb
@@ -0,0 +1,497 @@
+require 'strscan'
+require 'rdoc/text'
+
+##
+# A recursive-descent parser for RDoc markup.
+#
+# The parser tokenizes an input string then parses the tokens into a Document.
+# Documents can be converted into output formats by writing a visitor like
+# RDoc::Markup::ToHTML.
+#
+# The parser only handles the block-level constructs Paragraph, List,
+# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as
+# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager.
+#
+# To see what markup the Parser implements read RDoc. To see how to use
+# RDoc markup to format text in your program read RDoc::Markup.
+
+class RDoc::Markup::Parser
+
+ include RDoc::Text
+
+ ##
+ # List token types
+
+ LIST_TOKENS = [
+ :BULLET,
+ :LABEL,
+ :LALPHA,
+ :NOTE,
+ :NUMBER,
+ :UALPHA,
+ ]
+
+ ##
+ # Parser error subclass
+
+ class Error < RuntimeError; end
+
+ ##
+ # Raised when the parser is unable to handle the given markup
+
+ class ParseError < Error; end
+
+ ##
+ # Enables display of debugging information
+
+ attr_accessor :debug
+
+ ##
+ # Token accessor
+
+ attr_reader :tokens
+
+ ##
+ # Parses +str+ into a Document
+
+ def self.parse str
+ parser = new
+ parser.tokenize str
+ doc = RDoc::Markup::Document.new
+ parser.parse doc
+ end
+
+ ##
+ # Returns a token stream for +str+, for testing
+
+ def self.tokenize str
+ parser = new
+ parser.tokenize str
+ parser.tokens
+ end
+
+ ##
+ # Creates a new Parser. See also ::parse
+
+ def initialize
+ @tokens = []
+ @current_token = nil
+ @debug = false
+
+ @line = 0
+ @line_pos = 0
+ end
+
+ ##
+ # Builds a Heading of +level+
+
+ def build_heading level
+ type, text, = get
+
+ text = case type
+ when :TEXT then
+ skip :NEWLINE
+ text
+ else
+ unget
+ ''
+ end
+
+ RDoc::Markup::Heading.new level, text
+ end
+
+ ##
+ # Builds a List flush to +margin+
+
+ def build_list margin
+ p :list_start => margin if @debug
+
+ list = RDoc::Markup::List.new
+
+ until @tokens.empty? do
+ type, data, column, = get
+
+ case type
+ when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
+
+ if column < margin || (list.type && list.type != type) then
+ unget
+ break
+ end
+
+ list.type = type
+ peek_type, _, column, = peek_token
+
+ case type
+ when :NOTE, :LABEL then
+ if peek_type == :NEWLINE then
+ # description not on the same line as LABEL/NOTE
+ # skip the trailing newline & any blank lines below
+ while peek_type == :NEWLINE
+ get
+ peek_type, _, column, = peek_token
+ end
+
+ # we may be:
+ # - at end of stream
+ # - at a column < margin:
+ # [text]
+ # blah blah blah
+ # - at the same column, but with a different type of list item
+ # [text]
+ # * blah blah
+ # - at the same column, with the same type of list item
+ # [one]
+ # [two]
+ # In all cases, we have an empty description.
+ # In the last case only, we continue.
+ if peek_type.nil? || column < margin then
+ empty = 1
+ elsif column == margin then
+ case peek_type
+ when type
+ empty = 2 # continue
+ when *LIST_TOKENS
+ empty = 1
+ else
+ empty = 0
+ end
+ else
+ empty = 0
+ end
+
+ if empty > 0 then
+ item = RDoc::Markup::ListItem.new(data)
+ item << RDoc::Markup::BlankLine.new
+ list << item
+ break if empty == 1
+ next
+ end
+ end
+ else
+ data = nil
+ end
+
+ list_item = RDoc::Markup::ListItem.new data
+ parse list_item, column
+ list << list_item
+
+ else
+ unget
+ break
+ end
+ end
+
+ p :list_end => margin if @debug
+
+ return nil if list.empty?
+
+ list
+ end
+
+ ##
+ # Builds a Paragraph that is flush to +margin+
+
+ def build_paragraph margin
+ p :paragraph_start => margin if @debug
+
+ paragraph = RDoc::Markup::Paragraph.new
+
+ until @tokens.empty? do
+ type, data, column, = get
+
+ if type == :TEXT && column == margin then
+ paragraph << data
+ skip :NEWLINE
+ else
+ unget
+ break
+ end
+ end
+
+ p :paragraph_end => margin if @debug
+
+ paragraph
+ end
+
+ ##
+ # Builds a Verbatim that is indented from +margin+.
+ #
+ # The verbatim block is shifted left (the least indented lines start in
+ # column 0). Each part of the verbatim is one line of text, always
+ # terminated by a newline. Blank lines always consist of a single newline
+ # character, and there is never a single newline at the end of the verbatim.
+
+ def build_verbatim margin
+ p :verbatim_begin => margin if @debug
+ verbatim = RDoc::Markup::Verbatim.new
+
+ min_indent = nil
+ generate_leading_spaces = true
+ line = ''
+
+ until @tokens.empty? do
+ type, data, column, = get
+
+ if type == :NEWLINE then
+ line << data
+ verbatim << line
+ line = ''
+ generate_leading_spaces = true
+ next
+ end
+
+ if column <= margin
+ unget
+ break
+ end
+
+ if generate_leading_spaces then
+ indent = column - margin
+ line << ' ' * indent
+ min_indent = indent if min_indent.nil? || indent < min_indent
+ generate_leading_spaces = false
+ end
+
+ case type
+ when :HEADER then
+ line << '=' * data
+ _, _, peek_column, = peek_token
+ peek_column ||= column + data
+ indent = peek_column - column - data
+ line << ' ' * indent
+ when :RULE then
+ width = 2 + data
+ line << '-' * width
+ _, _, peek_column, = peek_token
+ peek_column ||= column + width
+ indent = peek_column - column - width
+ line << ' ' * indent
+ when :TEXT then
+ line << data
+ else # *LIST_TOKENS
+ list_marker = case type
+ when :BULLET then data
+ when :LABEL then "[#{data}]"
+ when :NOTE then "#{data}::"
+ else # :LALPHA, :NUMBER, :UALPHA
+ "#{data}."
+ end
+ line << list_marker
+ peek_type, _, peek_column = peek_token
+ unless peek_type == :NEWLINE then
+ peek_column ||= column + list_marker.length
+ indent = peek_column - column - list_marker.length
+ line << ' ' * indent
+ end
+ end
+
+ end
+
+ verbatim << line << "\n" unless line.empty?
+ verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
+ verbatim.normalize
+
+ p :verbatim_end => margin if @debug
+
+ verbatim
+ end
+
+ ##
+ # Pulls the next token from the stream.
+
+ def get
+ @current_token = @tokens.shift
+ p :get => @current_token if @debug
+ @current_token
+ end
+
+ ##
+ # Parses the tokens into an array of RDoc::Markup::XXX objects,
+ # and appends them to the passed +parent+ RDoc::Markup::YYY object.
+ #
+ # Exits at the end of the token stream, or when it encounters a token
+ # in a column less than +indent+ (unless it is a NEWLINE).
+ #
+ # Returns +parent+.
+
+ def parse parent, indent = 0
+ p :parse_start => indent if @debug
+
+ until @tokens.empty? do
+ type, data, column, = get
+
+ if type == :NEWLINE then
+ # trailing newlines are skipped below, so this is a blank line
+ parent << RDoc::Markup::BlankLine.new
+ skip :NEWLINE, false
+ next
+ end
+
+ # indentation change: break or verbatim
+ if column < indent then
+ unget
+ break
+ elsif column > indent then
+ unget
+ parent << build_verbatim(indent)
+ next
+ end
+
+ # indentation is the same
+ case type
+ when :HEADER then
+ parent << build_heading(data)
+ when :RULE then
+ parent << RDoc::Markup::Rule.new(data)
+ skip :NEWLINE
+ when :TEXT then
+ unget
+ parent << build_paragraph(indent)
+ when *LIST_TOKENS then
+ unget
+ parent << build_list(indent)
+ else
+ type, data, column, line = @current_token
+ raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
+ end
+ end
+
+ p :parse_end => indent if @debug
+
+ parent
+
+ end
+
+ ##
+ # Returns the next token on the stream without modifying the stream
+
+ def peek_token
+ token = @tokens.first || []
+ p :peek => token if @debug
+ token
+ end
+
+ ##
+ # Skips the next token if its type is +token_type+.
+ #
+ # Optionally raises an error if the next token is not of the expected type.
+
+ def skip token_type, error = true
+ type, = get
+ return unless type # end of stream
+ return @current_token if token_type == type
+ unget
+ raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
+ end
+
+ ##
+ # Turns text +input+ into a stream of tokens
+
+ def tokenize input
+ s = StringScanner.new input
+
+ @line = 0
+ @line_pos = 0
+
+ until s.eos? do
+ pos = s.pos
+
+ # leading spaces will be reflected by the column of the next token
+ # the only thing we loose are trailing spaces at the end of the file
+ next if s.scan(/ +/)
+
+ # note: after BULLET, LABEL, etc.,
+ # indent will be the column of the next non-newline token
+
+ @tokens << case
+ # [CR]LF => :NEWLINE
+ when s.scan(/\r?\n/) then
+ token = [:NEWLINE, s.matched, *token_pos(pos)]
+ @line_pos = s.pos
+ @line += 1
+ token
+ # === text => :HEADER then :TEXT
+ when s.scan(/(=+)(\s*)/) then
+ level = s[1].length
+ header = [:HEADER, level, *token_pos(pos)]
+
+ if s[2] =~ /^\r?\n/ then
+ s.pos -= s[2].length
+ header
+ else
+ pos = s.pos
+ s.scan(/.*/)
+ @tokens << header
+ [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
+ end
+ # --- (at least 3) and nothing else on the line => :RULE
+ when s.scan(/(-{3,}) *$/) then
+ [:RULE, s[1].length - 2, *token_pos(pos)]
+ # * or - followed by white space and text => :BULLET
+ when s.scan(/([*-]) +(\S)/) then
+ s.pos -= s[2].bytesize # unget \S
+ [:BULLET, s[1], *token_pos(pos)]
+ # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
+ when s.scan(/([a-z]|\d+)\. +(\S)/i) then
+ # FIXME if tab(s), the column will be wrong
+ # either support tabs everywhere by first expanding them to
+ # spaces, or assume that they will have been replaced
+ # before (and provide a check for that at least in debug
+ # mode)
+ list_label = s[1]
+ s.pos -= s[2].bytesize # unget \S
+ list_type =
+ case list_label
+ when /[a-z]/ then :LALPHA
+ when /[A-Z]/ then :UALPHA
+ when /\d/ then :NUMBER
+ else
+ raise ParseError, "BUG token #{list_label}"
+ end
+ [list_type, list_label, *token_pos(pos)]
+ # [text] followed by spaces or end of line => :LABEL
+ when s.scan(/\[(.*?)\]( +|$)/) then
+ [:LABEL, s[1], *token_pos(pos)]
+ # text:: followed by spaces or end of line => :NOTE
+ when s.scan(/(.*?)::( +|$)/) then
+ [:NOTE, s[1], *token_pos(pos)]
+ # anything else: :TEXT
+ else s.scan(/.*/)
+ [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
+ end
+ end
+
+ self
+ end
+
+ ##
+ # Calculates the column and line of the current token based on +offset+.
+
+ def token_pos offset
+ [offset - @line_pos, @line]
+ end
+
+ ##
+ # Returns the current token to the token stream
+
+ def unget
+ token = @current_token
+ p :unget => token if @debug
+ raise Error, 'too many #ungets' if token == @tokens.first
+ @tokens.unshift token if token
+ end
+
+end
+
+require 'rdoc/markup/blank_line'
+require 'rdoc/markup/document'
+require 'rdoc/markup/heading'
+require 'rdoc/markup/list'
+require 'rdoc/markup/list_item'
+require 'rdoc/markup/raw'
+require 'rdoc/markup/paragraph'
+require 'rdoc/markup/indented_paragraph'
+require 'rdoc/markup/rule'
+require 'rdoc/markup/verbatim'
+