diff options
Diffstat (limited to 'ruby_2_2/lib/rdoc/markup/parser.rb')
-rw-r--r-- | ruby_2_2/lib/rdoc/markup/parser.rb | 558 |
1 files changed, 0 insertions, 558 deletions
diff --git a/ruby_2_2/lib/rdoc/markup/parser.rb b/ruby_2_2/lib/rdoc/markup/parser.rb deleted file mode 100644 index cc828a4513..0000000000 --- a/ruby_2_2/lib/rdoc/markup/parser.rb +++ /dev/null @@ -1,558 +0,0 @@ -require 'strscan' - -## -# A recursive-descent parser for RDoc markup. -# -# The parser tokenizes an input string then parses the tokens into a Document. -# Documents can be converted into output formats by writing a visitor like -# RDoc::Markup::ToHTML. -# -# The parser only handles the block-level constructs Paragraph, List, -# ListItem, Heading, Verbatim, BlankLine and Rule. Inline markup such as -# <tt>\+blah\+</tt> is handled separately by RDoc::Markup::AttributeManager. -# -# To see what markup the Parser implements read RDoc. To see how to use -# RDoc markup to format text in your program read RDoc::Markup. - -class RDoc::Markup::Parser - - include RDoc::Text - - ## - # List token types - - LIST_TOKENS = [ - :BULLET, - :LABEL, - :LALPHA, - :NOTE, - :NUMBER, - :UALPHA, - ] - - ## - # Parser error subclass - - class Error < RuntimeError; end - - ## - # Raised when the parser is unable to handle the given markup - - class ParseError < Error; end - - ## - # Enables display of debugging information - - attr_accessor :debug - - ## - # Token accessor - - attr_reader :tokens - - ## - # Parses +str+ into a Document. - # - # Use RDoc::Markup#parse instead of this method. - - def self.parse str - parser = new - parser.tokenize str - doc = RDoc::Markup::Document.new - parser.parse doc - end - - ## - # Returns a token stream for +str+, for testing - - def self.tokenize str - parser = new - parser.tokenize str - parser.tokens - end - - ## - # Creates a new Parser. See also ::parse - - def initialize - @binary_input = nil - @current_token = nil - @debug = false - @have_encoding = Object.const_defined? :Encoding - @have_byteslice = ''.respond_to? :byteslice - @input = nil - @input_encoding = nil - @line = 0 - @line_pos = 0 - @s = nil - @tokens = [] - end - - ## - # Builds a Heading of +level+ - - def build_heading level - type, text, = get - - text = case type - when :TEXT then - skip :NEWLINE - text - else - unget - '' - end - - RDoc::Markup::Heading.new level, text - end - - ## - # Builds a List flush to +margin+ - - def build_list margin - p :list_start => margin if @debug - - list = RDoc::Markup::List.new - label = nil - - until @tokens.empty? do - type, data, column, = get - - case type - when *LIST_TOKENS then - if column < margin || (list.type && list.type != type) then - unget - break - end - - list.type = type - peek_type, _, column, = peek_token - - case type - when :NOTE, :LABEL then - label = [] unless label - - if peek_type == :NEWLINE then - # description not on the same line as LABEL/NOTE - # skip the trailing newline & any blank lines below - while peek_type == :NEWLINE - get - peek_type, _, column, = peek_token - end - - # we may be: - # - at end of stream - # - at a column < margin: - # [text] - # blah blah blah - # - at the same column, but with a different type of list item - # [text] - # * blah blah - # - at the same column, with the same type of list item - # [one] - # [two] - # In all cases, we have an empty description. - # In the last case only, we continue. - if peek_type.nil? || column < margin then - empty = true - elsif column == margin then - case peek_type - when type - empty = :continue - when *LIST_TOKENS - empty = true - else - empty = false - end - else - empty = false - end - - if empty then - label << data - next if empty == :continue - break - end - end - else - data = nil - end - - if label then - data = label << data - label = nil - end - - list_item = RDoc::Markup::ListItem.new data - parse list_item, column - list << list_item - - else - unget - break - end - end - - p :list_end => margin if @debug - - if list.empty? then - return nil unless label - return nil unless [:LABEL, :NOTE].include? list.type - - list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new - list << list_item - end - - list - end - - ## - # Builds a Paragraph that is flush to +margin+ - - def build_paragraph margin - p :paragraph_start => margin if @debug - - paragraph = RDoc::Markup::Paragraph.new - - until @tokens.empty? do - type, data, column, = get - - if type == :TEXT and column == margin then - paragraph << data - - break if peek_token.first == :BREAK - - data << ' ' if skip :NEWLINE - else - unget - break - end - end - - paragraph.parts.last.sub!(/ \z/, '') # cleanup - - p :paragraph_end => margin if @debug - - paragraph - end - - ## - # Builds a Verbatim that is indented from +margin+. - # - # The verbatim block is shifted left (the least indented lines start in - # column 0). Each part of the verbatim is one line of text, always - # terminated by a newline. Blank lines always consist of a single newline - # character, and there is never a single newline at the end of the verbatim. - - def build_verbatim margin - p :verbatim_begin => margin if @debug - verbatim = RDoc::Markup::Verbatim.new - - min_indent = nil - generate_leading_spaces = true - line = '' - - until @tokens.empty? do - type, data, column, = get - - if type == :NEWLINE then - line << data - verbatim << line - line = '' - generate_leading_spaces = true - next - end - - if column <= margin - unget - break - end - - if generate_leading_spaces then - indent = column - margin - line << ' ' * indent - min_indent = indent if min_indent.nil? || indent < min_indent - generate_leading_spaces = false - end - - case type - when :HEADER then - line << '=' * data - _, _, peek_column, = peek_token - peek_column ||= column + data - indent = peek_column - column - data - line << ' ' * indent - when :RULE then - width = 2 + data - line << '-' * width - _, _, peek_column, = peek_token - peek_column ||= column + width - indent = peek_column - column - width - line << ' ' * indent - when :BREAK, :TEXT then - line << data - else # *LIST_TOKENS - list_marker = case type - when :BULLET then data - when :LABEL then "[#{data}]" - when :NOTE then "#{data}::" - else # :LALPHA, :NUMBER, :UALPHA - "#{data}." - end - line << list_marker - peek_type, _, peek_column = peek_token - unless peek_type == :NEWLINE then - peek_column ||= column + list_marker.length - indent = peek_column - column - list_marker.length - line << ' ' * indent - end - end - - end - - verbatim << line << "\n" unless line.empty? - verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0 - verbatim.normalize - - p :verbatim_end => margin if @debug - - verbatim - end - - ## - # The character offset for the input string at the given +byte_offset+ - - def char_pos byte_offset - if @have_byteslice then - @input.byteslice(0, byte_offset).length - elsif @have_encoding then - matched = @binary_input[0, byte_offset] - matched.force_encoding @input_encoding - matched.length - else - byte_offset - end - end - - ## - # Pulls the next token from the stream. - - def get - @current_token = @tokens.shift - p :get => @current_token if @debug - @current_token - end - - ## - # Parses the tokens into an array of RDoc::Markup::XXX objects, - # and appends them to the passed +parent+ RDoc::Markup::YYY object. - # - # Exits at the end of the token stream, or when it encounters a token - # in a column less than +indent+ (unless it is a NEWLINE). - # - # Returns +parent+. - - def parse parent, indent = 0 - p :parse_start => indent if @debug - - until @tokens.empty? do - type, data, column, = get - - case type - when :BREAK then - parent << RDoc::Markup::BlankLine.new - skip :NEWLINE, false - next - when :NEWLINE then - # trailing newlines are skipped below, so this is a blank line - parent << RDoc::Markup::BlankLine.new - skip :NEWLINE, false - next - end - - # indentation change: break or verbatim - if column < indent then - unget - break - elsif column > indent then - unget - parent << build_verbatim(indent) - next - end - - # indentation is the same - case type - when :HEADER then - parent << build_heading(data) - when :RULE then - parent << RDoc::Markup::Rule.new(data) - skip :NEWLINE - when :TEXT then - unget - parse_text parent, indent - when *LIST_TOKENS then - unget - parent << build_list(indent) - else - type, data, column, line = @current_token - raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}" - end - end - - p :parse_end => indent if @debug - - parent - - end - - ## - # Small hook that is overridden by RDoc::TomDoc - - def parse_text parent, indent # :nodoc: - parent << build_paragraph(indent) - end - - ## - # Returns the next token on the stream without modifying the stream - - def peek_token - token = @tokens.first || [] - p :peek => token if @debug - token - end - - ## - # Creates the StringScanner - - def setup_scanner input - @line = 0 - @line_pos = 0 - @input = input.dup - - if @have_encoding and not @have_byteslice then - @input_encoding = @input.encoding - @binary_input = @input.force_encoding Encoding::BINARY - end - - @s = StringScanner.new input - end - - ## - # Skips the next token if its type is +token_type+. - # - # Optionally raises an error if the next token is not of the expected type. - - def skip token_type, error = true - type, = get - return unless type # end of stream - return @current_token if token_type == type - unget - raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error - end - - ## - # Turns text +input+ into a stream of tokens - - def tokenize input - setup_scanner input - - until @s.eos? do - pos = @s.pos - - # leading spaces will be reflected by the column of the next token - # the only thing we loose are trailing spaces at the end of the file - next if @s.scan(/ +/) - - # note: after BULLET, LABEL, etc., - # indent will be the column of the next non-newline token - - @tokens << case - # [CR]LF => :NEWLINE - when @s.scan(/\r?\n/) then - token = [:NEWLINE, @s.matched, *token_pos(pos)] - @line_pos = char_pos @s.pos - @line += 1 - token - # === text => :HEADER then :TEXT - when @s.scan(/(=+)(\s*)/) then - level = @s[1].length - header = [:HEADER, level, *token_pos(pos)] - - if @s[2] =~ /^\r?\n/ then - @s.pos -= @s[2].length - header - else - pos = @s.pos - @s.scan(/.*/) - @tokens << header - [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)] - end - # --- (at least 3) and nothing else on the line => :RULE - when @s.scan(/(-{3,}) *\r?$/) then - [:RULE, @s[1].length - 2, *token_pos(pos)] - # * or - followed by white space and text => :BULLET - when @s.scan(/([*-]) +(\S)/) then - @s.pos -= @s[2].bytesize # unget \S - [:BULLET, @s[1], *token_pos(pos)] - # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER - when @s.scan(/([a-z]|\d+)\. +(\S)/i) then - # FIXME if tab(s), the column will be wrong - # either support tabs everywhere by first expanding them to - # spaces, or assume that they will have been replaced - # before (and provide a check for that at least in debug - # mode) - list_label = @s[1] - @s.pos -= @s[2].bytesize # unget \S - list_type = - case list_label - when /[a-z]/ then :LALPHA - when /[A-Z]/ then :UALPHA - when /\d/ then :NUMBER - else - raise ParseError, "BUG token #{list_label}" - end - [list_type, list_label, *token_pos(pos)] - # [text] followed by spaces or end of line => :LABEL - when @s.scan(/\[(.*?)\]( +|\r?$)/) then - [:LABEL, @s[1], *token_pos(pos)] - # text:: followed by spaces or end of line => :NOTE - when @s.scan(/(.*?)::( +|\r?$)/) then - [:NOTE, @s[1], *token_pos(pos)] - # anything else: :TEXT - else @s.scan(/(.*?)( )?\r?$/) - token = [:TEXT, @s[1], *token_pos(pos)] - - if @s[2] then - @tokens << token - [:BREAK, @s[2], *token_pos(pos + @s[1].length)] - else - token - end - end - end - - self - end - - ## - # Calculates the column (by character) and line of the current token based - # on +byte_offset+. - - def token_pos byte_offset - offset = char_pos byte_offset - - [offset - @line_pos, @line] - end - - ## - # Returns the current token to the token stream - - def unget - token = @current_token - p :unget => token if @debug - raise Error, 'too many #ungets' if token == @tokens.first - @tokens.unshift token if token - end - -end - |