summaryrefslogtreecommitdiff
path: root/ruby_2_2/lib/rdoc/markup/parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'ruby_2_2/lib/rdoc/markup/parser.rb')
-rw-r--r--ruby_2_2/lib/rdoc/markup/parser.rb558
1 files changed, 0 insertions, 558 deletions
diff --git a/ruby_2_2/lib/rdoc/markup/parser.rb b/ruby_2_2/lib/rdoc/markup/parser.rb
deleted file mode 100644
index cc828a4513..0000000000
--- a/ruby_2_2/lib/rdoc/markup/parser.rb
+++ /dev/null
@@ -1,558 +0,0 @@
-require 'strscan'
-
-##
-# A recursive-descent parser for RDoc markup.
-#
-# The parser tokenizes an input string then parses the tokens into a Document.
-# Documents can be converted into output formats by writing a visitor like
-# RDoc::Markup::ToHTML.
-#
-# The parser only handles the block-level constructs Paragraph, List,
-# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as
-# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager.
-#
-# To see what markup the Parser implements read RDoc. To see how to use
-# RDoc markup to format text in your program read RDoc::Markup.
-
-class RDoc::Markup::Parser
-
- include RDoc::Text
-
- ##
- # List token types
-
- LIST_TOKENS = [
- :BULLET,
- :LABEL,
- :LALPHA,
- :NOTE,
- :NUMBER,
- :UALPHA,
- ]
-
- ##
- # Parser error subclass
-
- class Error < RuntimeError; end
-
- ##
- # Raised when the parser is unable to handle the given markup
-
- class ParseError < Error; end
-
- ##
- # Enables display of debugging information
-
- attr_accessor :debug
-
- ##
- # Token accessor
-
- attr_reader :tokens
-
- ##
- # Parses +str+ into a Document.
- #
- # Use RDoc::Markup#parse instead of this method.
-
- def self.parse str
- parser = new
- parser.tokenize str
- doc = RDoc::Markup::Document.new
- parser.parse doc
- end
-
- ##
- # Returns a token stream for +str+, for testing
-
- def self.tokenize str
- parser = new
- parser.tokenize str
- parser.tokens
- end
-
- ##
- # Creates a new Parser. See also ::parse
-
- def initialize
- @binary_input = nil
- @current_token = nil
- @debug = false
- @have_encoding = Object.const_defined? :Encoding
- @have_byteslice = ''.respond_to? :byteslice
- @input = nil
- @input_encoding = nil
- @line = 0
- @line_pos = 0
- @s = nil
- @tokens = []
- end
-
- ##
- # Builds a Heading of +level+
-
- def build_heading level
- type, text, = get
-
- text = case type
- when :TEXT then
- skip :NEWLINE
- text
- else
- unget
- ''
- end
-
- RDoc::Markup::Heading.new level, text
- end
-
- ##
- # Builds a List flush to +margin+
-
- def build_list margin
- p :list_start => margin if @debug
-
- list = RDoc::Markup::List.new
- label = nil
-
- until @tokens.empty? do
- type, data, column, = get
-
- case type
- when *LIST_TOKENS then
- if column < margin || (list.type && list.type != type) then
- unget
- break
- end
-
- list.type = type
- peek_type, _, column, = peek_token
-
- case type
- when :NOTE, :LABEL then
- label = [] unless label
-
- if peek_type == :NEWLINE then
- # description not on the same line as LABEL/NOTE
- # skip the trailing newline & any blank lines below
- while peek_type == :NEWLINE
- get
- peek_type, _, column, = peek_token
- end
-
- # we may be:
- # - at end of stream
- # - at a column < margin:
- # [text]
- # blah blah blah
- # - at the same column, but with a different type of list item
- # [text]
- # * blah blah
- # - at the same column, with the same type of list item
- # [one]
- # [two]
- # In all cases, we have an empty description.
- # In the last case only, we continue.
- if peek_type.nil? || column < margin then
- empty = true
- elsif column == margin then
- case peek_type
- when type
- empty = :continue
- when *LIST_TOKENS
- empty = true
- else
- empty = false
- end
- else
- empty = false
- end
-
- if empty then
- label << data
- next if empty == :continue
- break
- end
- end
- else
- data = nil
- end
-
- if label then
- data = label << data
- label = nil
- end
-
- list_item = RDoc::Markup::ListItem.new data
- parse list_item, column
- list << list_item
-
- else
- unget
- break
- end
- end
-
- p :list_end => margin if @debug
-
- if list.empty? then
- return nil unless label
- return nil unless [:LABEL, :NOTE].include? list.type
-
- list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new
- list << list_item
- end
-
- list
- end
-
- ##
- # Builds a Paragraph that is flush to +margin+
-
- def build_paragraph margin
- p :paragraph_start => margin if @debug
-
- paragraph = RDoc::Markup::Paragraph.new
-
- until @tokens.empty? do
- type, data, column, = get
-
- if type == :TEXT and column == margin then
- paragraph << data
-
- break if peek_token.first == :BREAK
-
- data << ' ' if skip :NEWLINE
- else
- unget
- break
- end
- end
-
- paragraph.parts.last.sub!(/ \z/, '') # cleanup
-
- p :paragraph_end => margin if @debug
-
- paragraph
- end
-
- ##
- # Builds a Verbatim that is indented from +margin+.
- #
- # The verbatim block is shifted left (the least indented lines start in
- # column 0). Each part of the verbatim is one line of text, always
- # terminated by a newline. Blank lines always consist of a single newline
- # character, and there is never a single newline at the end of the verbatim.
-
- def build_verbatim margin
- p :verbatim_begin => margin if @debug
- verbatim = RDoc::Markup::Verbatim.new
-
- min_indent = nil
- generate_leading_spaces = true
- line = ''
-
- until @tokens.empty? do
- type, data, column, = get
-
- if type == :NEWLINE then
- line << data
- verbatim << line
- line = ''
- generate_leading_spaces = true
- next
- end
-
- if column <= margin
- unget
- break
- end
-
- if generate_leading_spaces then
- indent = column - margin
- line << ' ' * indent
- min_indent = indent if min_indent.nil? || indent < min_indent
- generate_leading_spaces = false
- end
-
- case type
- when :HEADER then
- line << '=' * data
- _, _, peek_column, = peek_token
- peek_column ||= column + data
- indent = peek_column - column - data
- line << ' ' * indent
- when :RULE then
- width = 2 + data
- line << '-' * width
- _, _, peek_column, = peek_token
- peek_column ||= column + width
- indent = peek_column - column - width
- line << ' ' * indent
- when :BREAK, :TEXT then
- line << data
- else # *LIST_TOKENS
- list_marker = case type
- when :BULLET then data
- when :LABEL then "[#{data}]"
- when :NOTE then "#{data}::"
- else # :LALPHA, :NUMBER, :UALPHA
- "#{data}."
- end
- line << list_marker
- peek_type, _, peek_column = peek_token
- unless peek_type == :NEWLINE then
- peek_column ||= column + list_marker.length
- indent = peek_column - column - list_marker.length
- line << ' ' * indent
- end
- end
-
- end
-
- verbatim << line << "\n" unless line.empty?
- verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
- verbatim.normalize
-
- p :verbatim_end => margin if @debug
-
- verbatim
- end
-
- ##
- # The character offset for the input string at the given +byte_offset+
-
- def char_pos byte_offset
- if @have_byteslice then
- @input.byteslice(0, byte_offset).length
- elsif @have_encoding then
- matched = @binary_input[0, byte_offset]
- matched.force_encoding @input_encoding
- matched.length
- else
- byte_offset
- end
- end
-
- ##
- # Pulls the next token from the stream.
-
- def get
- @current_token = @tokens.shift
- p :get => @current_token if @debug
- @current_token
- end
-
- ##
- # Parses the tokens into an array of RDoc::Markup::XXX objects,
- # and appends them to the passed +parent+ RDoc::Markup::YYY object.
- #
- # Exits at the end of the token stream, or when it encounters a token
- # in a column less than +indent+ (unless it is a NEWLINE).
- #
- # Returns +parent+.
-
- def parse parent, indent = 0
- p :parse_start => indent if @debug
-
- until @tokens.empty? do
- type, data, column, = get
-
- case type
- when :BREAK then
- parent << RDoc::Markup::BlankLine.new
- skip :NEWLINE, false
- next
- when :NEWLINE then
- # trailing newlines are skipped below, so this is a blank line
- parent << RDoc::Markup::BlankLine.new
- skip :NEWLINE, false
- next
- end
-
- # indentation change: break or verbatim
- if column < indent then
- unget
- break
- elsif column > indent then
- unget
- parent << build_verbatim(indent)
- next
- end
-
- # indentation is the same
- case type
- when :HEADER then
- parent << build_heading(data)
- when :RULE then
- parent << RDoc::Markup::Rule.new(data)
- skip :NEWLINE
- when :TEXT then
- unget
- parse_text parent, indent
- when *LIST_TOKENS then
- unget
- parent << build_list(indent)
- else
- type, data, column, line = @current_token
- raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
- end
- end
-
- p :parse_end => indent if @debug
-
- parent
-
- end
-
- ##
- # Small hook that is overridden by RDoc::TomDoc
-
- def parse_text parent, indent # :nodoc:
- parent << build_paragraph(indent)
- end
-
- ##
- # Returns the next token on the stream without modifying the stream
-
- def peek_token
- token = @tokens.first || []
- p :peek => token if @debug
- token
- end
-
- ##
- # Creates the StringScanner
-
- def setup_scanner input
- @line = 0
- @line_pos = 0
- @input = input.dup
-
- if @have_encoding and not @have_byteslice then
- @input_encoding = @input.encoding
- @binary_input = @input.force_encoding Encoding::BINARY
- end
-
- @s = StringScanner.new input
- end
-
- ##
- # Skips the next token if its type is +token_type+.
- #
- # Optionally raises an error if the next token is not of the expected type.
-
- def skip token_type, error = true
- type, = get
- return unless type # end of stream
- return @current_token if token_type == type
- unget
- raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
- end
-
- ##
- # Turns text +input+ into a stream of tokens
-
- def tokenize input
- setup_scanner input
-
- until @s.eos? do
- pos = @s.pos
-
- # leading spaces will be reflected by the column of the next token
- # the only thing we loose are trailing spaces at the end of the file
- next if @s.scan(/ +/)
-
- # note: after BULLET, LABEL, etc.,
- # indent will be the column of the next non-newline token
-
- @tokens << case
- # [CR]LF => :NEWLINE
- when @s.scan(/\r?\n/) then
- token = [:NEWLINE, @s.matched, *token_pos(pos)]
- @line_pos = char_pos @s.pos
- @line += 1
- token
- # === text => :HEADER then :TEXT
- when @s.scan(/(=+)(\s*)/) then
- level = @s[1].length
- header = [:HEADER, level, *token_pos(pos)]
-
- if @s[2] =~ /^\r?\n/ then
- @s.pos -= @s[2].length
- header
- else
- pos = @s.pos
- @s.scan(/.*/)
- @tokens << header
- [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
- end
- # --- (at least 3) and nothing else on the line => :RULE
- when @s.scan(/(-{3,}) *\r?$/) then
- [:RULE, @s[1].length - 2, *token_pos(pos)]
- # * or - followed by white space and text => :BULLET
- when @s.scan(/([*-]) +(\S)/) then
- @s.pos -= @s[2].bytesize # unget \S
- [:BULLET, @s[1], *token_pos(pos)]
- # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
- when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
- # FIXME if tab(s), the column will be wrong
- # either support tabs everywhere by first expanding them to
- # spaces, or assume that they will have been replaced
- # before (and provide a check for that at least in debug
- # mode)
- list_label = @s[1]
- @s.pos -= @s[2].bytesize # unget \S
- list_type =
- case list_label
- when /[a-z]/ then :LALPHA
- when /[A-Z]/ then :UALPHA
- when /\d/ then :NUMBER
- else
- raise ParseError, "BUG token #{list_label}"
- end
- [list_type, list_label, *token_pos(pos)]
- # [text] followed by spaces or end of line => :LABEL
- when @s.scan(/\[(.*?)\]( +|\r?$)/) then
- [:LABEL, @s[1], *token_pos(pos)]
- # text:: followed by spaces or end of line => :NOTE
- when @s.scan(/(.*?)::( +|\r?$)/) then
- [:NOTE, @s[1], *token_pos(pos)]
- # anything else: :TEXT
- else @s.scan(/(.*?)( )?\r?$/)
- token = [:TEXT, @s[1], *token_pos(pos)]
-
- if @s[2] then
- @tokens << token
- [:BREAK, @s[2], *token_pos(pos + @s[1].length)]
- else
- token
- end
- end
- end
-
- self
- end
-
- ##
- # Calculates the column (by character) and line of the current token based
- # on +byte_offset+.
-
- def token_pos byte_offset
- offset = char_pos byte_offset
-
- [offset - @line_pos, @line]
- end
-
- ##
- # Returns the current token to the token stream
-
- def unget
- token = @current_token
- p :unget => token if @debug
- raise Error, 'too many #ungets' if token == @tokens.first
- @tokens.unshift token if token
- end
-
-end
-