summaryrefslogtreecommitdiff
path: root/ruby_1_9_3/lib/rdoc/markup/parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'ruby_1_9_3/lib/rdoc/markup/parser.rb')
-rw-r--r--ruby_1_9_3/lib/rdoc/markup/parser.rb497
1 files changed, 0 insertions, 497 deletions
diff --git a/ruby_1_9_3/lib/rdoc/markup/parser.rb b/ruby_1_9_3/lib/rdoc/markup/parser.rb
deleted file mode 100644
index c18ce821fb..0000000000
--- a/ruby_1_9_3/lib/rdoc/markup/parser.rb
+++ /dev/null
@@ -1,497 +0,0 @@
-require 'strscan'
-require 'rdoc/text'
-
-##
-# A recursive-descent parser for RDoc markup.
-#
-# The parser tokenizes an input string then parses the tokens into a Document.
-# Documents can be converted into output formats by writing a visitor like
-# RDoc::Markup::ToHTML.
-#
-# The parser only handles the block-level constructs Paragraph, List,
-# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as
-# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager.
-#
-# To see what markup the Parser implements read RDoc. To see how to use
-# RDoc markup to format text in your program read RDoc::Markup.
-
-class RDoc::Markup::Parser
-
- include RDoc::Text
-
- ##
- # List token types
-
- LIST_TOKENS = [
- :BULLET,
- :LABEL,
- :LALPHA,
- :NOTE,
- :NUMBER,
- :UALPHA,
- ]
-
- ##
- # Parser error subclass
-
- class Error < RuntimeError; end
-
- ##
- # Raised when the parser is unable to handle the given markup
-
- class ParseError < Error; end
-
- ##
- # Enables display of debugging information
-
- attr_accessor :debug
-
- ##
- # Token accessor
-
- attr_reader :tokens
-
- ##
- # Parses +str+ into a Document
-
- def self.parse str
- parser = new
- parser.tokenize str
- doc = RDoc::Markup::Document.new
- parser.parse doc
- end
-
- ##
- # Returns a token stream for +str+, for testing
-
- def self.tokenize str
- parser = new
- parser.tokenize str
- parser.tokens
- end
-
- ##
- # Creates a new Parser. See also ::parse
-
- def initialize
- @tokens = []
- @current_token = nil
- @debug = false
-
- @line = 0
- @line_pos = 0
- end
-
- ##
- # Builds a Heading of +level+
-
- def build_heading level
- type, text, = get
-
- text = case type
- when :TEXT then
- skip :NEWLINE
- text
- else
- unget
- ''
- end
-
- RDoc::Markup::Heading.new level, text
- end
-
- ##
- # Builds a List flush to +margin+
-
- def build_list margin
- p :list_start => margin if @debug
-
- list = RDoc::Markup::List.new
-
- until @tokens.empty? do
- type, data, column, = get
-
- case type
- when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
-
- if column < margin || (list.type && list.type != type) then
- unget
- break
- end
-
- list.type = type
- peek_type, _, column, = peek_token
-
- case type
- when :NOTE, :LABEL then
- if peek_type == :NEWLINE then
- # description not on the same line as LABEL/NOTE
- # skip the trailing newline & any blank lines below
- while peek_type == :NEWLINE
- get
- peek_type, _, column, = peek_token
- end
-
- # we may be:
- # - at end of stream
- # - at a column < margin:
- # [text]
- # blah blah blah
- # - at the same column, but with a different type of list item
- # [text]
- # * blah blah
- # - at the same column, with the same type of list item
- # [one]
- # [two]
- # In all cases, we have an empty description.
- # In the last case only, we continue.
- if peek_type.nil? || column < margin then
- empty = 1
- elsif column == margin then
- case peek_type
- when type
- empty = 2 # continue
- when *LIST_TOKENS
- empty = 1
- else
- empty = 0
- end
- else
- empty = 0
- end
-
- if empty > 0 then
- item = RDoc::Markup::ListItem.new(data)
- item << RDoc::Markup::BlankLine.new
- list << item
- break if empty == 1
- next
- end
- end
- else
- data = nil
- end
-
- list_item = RDoc::Markup::ListItem.new data
- parse list_item, column
- list << list_item
-
- else
- unget
- break
- end
- end
-
- p :list_end => margin if @debug
-
- return nil if list.empty?
-
- list
- end
-
- ##
- # Builds a Paragraph that is flush to +margin+
-
- def build_paragraph margin
- p :paragraph_start => margin if @debug
-
- paragraph = RDoc::Markup::Paragraph.new
-
- until @tokens.empty? do
- type, data, column, = get
-
- if type == :TEXT && column == margin then
- paragraph << data
- skip :NEWLINE
- else
- unget
- break
- end
- end
-
- p :paragraph_end => margin if @debug
-
- paragraph
- end
-
- ##
- # Builds a Verbatim that is indented from +margin+.
- #
- # The verbatim block is shifted left (the least indented lines start in
- # column 0). Each part of the verbatim is one line of text, always
- # terminated by a newline. Blank lines always consist of a single newline
- # character, and there is never a single newline at the end of the verbatim.
-
- def build_verbatim margin
- p :verbatim_begin => margin if @debug
- verbatim = RDoc::Markup::Verbatim.new
-
- min_indent = nil
- generate_leading_spaces = true
- line = ''
-
- until @tokens.empty? do
- type, data, column, = get
-
- if type == :NEWLINE then
- line << data
- verbatim << line
- line = ''
- generate_leading_spaces = true
- next
- end
-
- if column <= margin
- unget
- break
- end
-
- if generate_leading_spaces then
- indent = column - margin
- line << ' ' * indent
- min_indent = indent if min_indent.nil? || indent < min_indent
- generate_leading_spaces = false
- end
-
- case type
- when :HEADER then
- line << '=' * data
- _, _, peek_column, = peek_token
- peek_column ||= column + data
- indent = peek_column - column - data
- line << ' ' * indent
- when :RULE then
- width = 2 + data
- line << '-' * width
- _, _, peek_column, = peek_token
- peek_column ||= column + width
- indent = peek_column - column - width
- line << ' ' * indent
- when :TEXT then
- line << data
- else # *LIST_TOKENS
- list_marker = case type
- when :BULLET then data
- when :LABEL then "[#{data}]"
- when :NOTE then "#{data}::"
- else # :LALPHA, :NUMBER, :UALPHA
- "#{data}."
- end
- line << list_marker
- peek_type, _, peek_column = peek_token
- unless peek_type == :NEWLINE then
- peek_column ||= column + list_marker.length
- indent = peek_column - column - list_marker.length
- line << ' ' * indent
- end
- end
-
- end
-
- verbatim << line << "\n" unless line.empty?
- verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
- verbatim.normalize
-
- p :verbatim_end => margin if @debug
-
- verbatim
- end
-
- ##
- # Pulls the next token from the stream.
-
- def get
- @current_token = @tokens.shift
- p :get => @current_token if @debug
- @current_token
- end
-
- ##
- # Parses the tokens into an array of RDoc::Markup::XXX objects,
- # and appends them to the passed +parent+ RDoc::Markup::YYY object.
- #
- # Exits at the end of the token stream, or when it encounters a token
- # in a column less than +indent+ (unless it is a NEWLINE).
- #
- # Returns +parent+.
-
- def parse parent, indent = 0
- p :parse_start => indent if @debug
-
- until @tokens.empty? do
- type, data, column, = get
-
- if type == :NEWLINE then
- # trailing newlines are skipped below, so this is a blank line
- parent << RDoc::Markup::BlankLine.new
- skip :NEWLINE, false
- next
- end
-
- # indentation change: break or verbatim
- if column < indent then
- unget
- break
- elsif column > indent then
- unget
- parent << build_verbatim(indent)
- next
- end
-
- # indentation is the same
- case type
- when :HEADER then
- parent << build_heading(data)
- when :RULE then
- parent << RDoc::Markup::Rule.new(data)
- skip :NEWLINE
- when :TEXT then
- unget
- parent << build_paragraph(indent)
- when *LIST_TOKENS then
- unget
- parent << build_list(indent)
- else
- type, data, column, line = @current_token
- raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
- end
- end
-
- p :parse_end => indent if @debug
-
- parent
-
- end
-
- ##
- # Returns the next token on the stream without modifying the stream
-
- def peek_token
- token = @tokens.first || []
- p :peek => token if @debug
- token
- end
-
- ##
- # Skips the next token if its type is +token_type+.
- #
- # Optionally raises an error if the next token is not of the expected type.
-
- def skip token_type, error = true
- type, = get
- return unless type # end of stream
- return @current_token if token_type == type
- unget
- raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
- end
-
- ##
- # Turns text +input+ into a stream of tokens
-
- def tokenize input
- s = StringScanner.new input
-
- @line = 0
- @line_pos = 0
-
- until s.eos? do
- pos = s.pos
-
- # leading spaces will be reflected by the column of the next token
- # the only thing we loose are trailing spaces at the end of the file
- next if s.scan(/ +/)
-
- # note: after BULLET, LABEL, etc.,
- # indent will be the column of the next non-newline token
-
- @tokens << case
- # [CR]LF => :NEWLINE
- when s.scan(/\r?\n/) then
- token = [:NEWLINE, s.matched, *token_pos(pos)]
- @line_pos = s.pos
- @line += 1
- token
- # === text => :HEADER then :TEXT
- when s.scan(/(=+)(\s*)/) then
- level = s[1].length
- header = [:HEADER, level, *token_pos(pos)]
-
- if s[2] =~ /^\r?\n/ then
- s.pos -= s[2].length
- header
- else
- pos = s.pos
- s.scan(/.*/)
- @tokens << header
- [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
- end
- # --- (at least 3) and nothing else on the line => :RULE
- when s.scan(/(-{3,}) *$/) then
- [:RULE, s[1].length - 2, *token_pos(pos)]
- # * or - followed by white space and text => :BULLET
- when s.scan(/([*-]) +(\S)/) then
- s.pos -= s[2].bytesize # unget \S
- [:BULLET, s[1], *token_pos(pos)]
- # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
- when s.scan(/([a-z]|\d+)\. +(\S)/i) then
- # FIXME if tab(s), the column will be wrong
- # either support tabs everywhere by first expanding them to
- # spaces, or assume that they will have been replaced
- # before (and provide a check for that at least in debug
- # mode)
- list_label = s[1]
- s.pos -= s[2].bytesize # unget \S
- list_type =
- case list_label
- when /[a-z]/ then :LALPHA
- when /[A-Z]/ then :UALPHA
- when /\d/ then :NUMBER
- else
- raise ParseError, "BUG token #{list_label}"
- end
- [list_type, list_label, *token_pos(pos)]
- # [text] followed by spaces or end of line => :LABEL
- when s.scan(/\[(.*?)\]( +|$)/) then
- [:LABEL, s[1], *token_pos(pos)]
- # text:: followed by spaces or end of line => :NOTE
- when s.scan(/(.*?)::( +|$)/) then
- [:NOTE, s[1], *token_pos(pos)]
- # anything else: :TEXT
- else s.scan(/.*/)
- [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
- end
- end
-
- self
- end
-
- ##
- # Calculates the column and line of the current token based on +offset+.
-
- def token_pos offset
- [offset - @line_pos, @line]
- end
-
- ##
- # Returns the current token to the token stream
-
- def unget
- token = @current_token
- p :unget => token if @debug
- raise Error, 'too many #ungets' if token == @tokens.first
- @tokens.unshift token if token
- end
-
-end
-
-require 'rdoc/markup/blank_line'
-require 'rdoc/markup/document'
-require 'rdoc/markup/heading'
-require 'rdoc/markup/list'
-require 'rdoc/markup/list_item'
-require 'rdoc/markup/raw'
-require 'rdoc/markup/paragraph'
-require 'rdoc/markup/indented_paragraph'
-require 'rdoc/markup/rule'
-require 'rdoc/markup/verbatim'
-