diff options
Diffstat (limited to 'trunk/lib/rdoc/markup.rb')
-rw-r--r-- | trunk/lib/rdoc/markup.rb | 473 |
1 files changed, 0 insertions, 473 deletions
diff --git a/trunk/lib/rdoc/markup.rb b/trunk/lib/rdoc/markup.rb deleted file mode 100644 index 0e1b596255..0000000000 --- a/trunk/lib/rdoc/markup.rb +++ /dev/null @@ -1,473 +0,0 @@ -require 'rdoc' - -## -# RDoc::Markup parses plain text documents and attempts to decompose them into -# their constituent parts. Some of these parts are high-level: paragraphs, -# chunks of verbatim text, list entries and the like. Other parts happen at -# the character level: a piece of bold text, a word in code font. This markup -# is similar in spirit to that used on WikiWiki webs, where folks create web -# pages using a simple set of formatting rules. -# -# RDoc::Markup itself does no output formatting: this is left to a different -# set of classes. -# -# RDoc::Markup is extendable at runtime: you can add \new markup elements to -# be recognised in the documents that RDoc::Markup parses. -# -# RDoc::Markup is intended to be the basis for a family of tools which share -# the common requirement that simple, plain-text should be rendered in a -# variety of different output formats and media. It is envisaged that -# RDoc::Markup could be the basis for formatting RDoc style comment blocks, -# Wiki entries, and online FAQs. -# -# = Basic Formatting -# -# * RDoc::Markup looks for a document's natural left margin. This is -# used as the initial margin for the document. -# -# * Consecutive lines starting at this margin are considered to be a -# paragraph. -# -# * If a paragraph starts with a "*", "-", or with "<digit>.", then it is -# taken to be the start of a list. The margin in increased to be the first -# non-space following the list start flag. Subsequent lines should be -# indented to this \new margin until the list ends. For example: -# -# * this is a list with three paragraphs in -# the first item. This is the first paragraph. -# -# And this is the second paragraph. -# -# 1. This is an indented, numbered list. -# 2. This is the second item in that list -# -# This is the third conventional paragraph in the -# first list item. -# -# * This is the second item in the original list -# -# * You can also construct labeled lists, sometimes called description -# or definition lists. Do this by putting the label in square brackets -# and indenting the list body: -# -# [cat] a small furry mammal -# that seems to sleep a lot -# -# [ant] a little insect that is known -# to enjoy picnics -# -# A minor variation on labeled lists uses two colons to separate the -# label from the list body: -# -# cat:: a small furry mammal -# that seems to sleep a lot -# -# ant:: a little insect that is known -# to enjoy picnics -# -# This latter style guarantees that the list bodies' left margins are -# aligned: think of them as a two column table. -# -# * Any line that starts to the right of the current margin is treated -# as verbatim text. This is useful for code listings. The example of a -# list above is also verbatim text. -# -# * A line starting with an equals sign (=) is treated as a -# heading. Level one headings have one equals sign, level two headings -# have two,and so on. -# -# * A line starting with three or more hyphens (at the current indent) -# generates a horizontal rule. The more hyphens, the thicker the rule -# (within reason, and if supported by the output device) -# -# * You can use markup within text (except verbatim) to change the -# appearance of parts of that text. Out of the box, RDoc::Markup -# supports word-based and general markup. -# -# Word-based markup uses flag characters around individual words: -# -# [\*word*] displays word in a *bold* font -# [\_word_] displays word in an _emphasized_ font -# [\+word+] displays word in a +code+ font -# -# General markup affects text between a start delimiter and and end -# delimiter. Not surprisingly, these delimiters look like HTML markup. -# -# [\<b>text...</b>] displays word in a *bold* font -# [\<em>text...</em>] displays word in an _emphasized_ font -# [\<i>text...</i>] displays word in an _emphasized_ font -# [\<tt>text...</tt>] displays word in a +code+ font -# -# Unlike conventional Wiki markup, general markup can cross line -# boundaries. You can turn off the interpretation of markup by -# preceding the first character with a backslash, so \\\<b>bold -# text</b> and \\\*bold* produce \<b>bold text</b> and \*bold* -# respectively. -# -# * Hyperlinks to the web starting http:, mailto:, ftp:, or www. are -# recognized. An HTTP url that references an external image file is -# converted into an inline <IMG..>. Hyperlinks starting 'link:' are -# assumed to refer to local files whose path is relative to the --op -# directory. -# -# Hyperlinks can also be of the form <tt>label</tt>[url], in which -# case the label is used in the displayed text, and <tt>url</tt> is -# used as the target. If <tt>label</tt> contains multiple words, -# put it in braces: <em>{multi word label}[</em>url<em>]</em>. -# -# == Synopsis -# -# This code converts +input_string+ to HTML. The conversion takes place in -# the +convert+ method, so you can use the same RDoc::Markup converter to -# convert multiple input strings. -# -# require 'rdoc/markup/to_html' -# -# h = RDoc::Markup::ToHtml.new -# -# puts h.convert(input_string) -# -# You can extend the RDoc::Markup parser to recognise new markup -# sequences, and to add special processing for text that matches a -# regular expression. Here we make WikiWords significant to the parser, -# and also make the sequences {word} and \<no>text...</no> signify -# strike-through text. When then subclass the HTML output class to deal -# with these: -# -# require 'rdoc/markup' -# require 'rdoc/markup/to_html' -# -# class WikiHtml < RDoc::Markup::ToHtml -# def handle_special_WIKIWORD(special) -# "<font color=red>" + special.text + "</font>" -# end -# end -# -# m = RDoc::Markup.new -# m.add_word_pair("{", "}", :STRIKE) -# m.add_html("no", :STRIKE) -# -# m.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD) -# -# wh = WikiHtml.new -# wh.add_tag(:STRIKE, "<strike>", "</strike>") -# -# puts "<body>#{wh.convert ARGF.read}</body>" -# -#-- -# Author:: Dave Thomas, dave@pragmaticprogrammer.com -# License:: Ruby license - -class RDoc::Markup - - SPACE = ?\s - - # List entries look like: - # * text - # 1. text - # [label] text - # label:: text - # - # Flag it as a list entry, and work out the indent for subsequent lines - - SIMPLE_LIST_RE = /^( - ( \* (?# bullet) - |- (?# bullet) - |\d+\. (?# numbered ) - |[A-Za-z]\. (?# alphabetically numbered ) - ) - \s+ - )\S/x - - LABEL_LIST_RE = /^( - ( \[.*?\] (?# labeled ) - |\S.*:: (?# note ) - )(?:\s+|$) - )/x - - ## - # Take a block of text and use various heuristics to determine it's - # structure (paragraphs, lists, and so on). Invoke an event handler as we - # identify significant chunks. - - def initialize - @am = RDoc::Markup::AttributeManager.new - @output = nil - end - - ## - # Add to the sequences used to add formatting to an individual word (such - # as *bold*). Matching entries will generate attributes that the output - # formatters can recognize by their +name+. - - def add_word_pair(start, stop, name) - @am.add_word_pair(start, stop, name) - end - - ## - # Add to the sequences recognized as general markup. - - def add_html(tag, name) - @am.add_html(tag, name) - end - - ## - # Add to other inline sequences. For example, we could add WikiWords using - # something like: - # - # parser.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD) - # - # Each wiki word will be presented to the output formatter via the - # accept_special method. - - def add_special(pattern, name) - @am.add_special(pattern, name) - end - - ## - # We take a string, split it into lines, work out the type of each line, - # and from there deduce groups of lines (for example all lines in a - # paragraph). We then invoke the output formatter using a Visitor to - # display the result. - - def convert(str, op) - lines = str.split(/\r?\n/).map { |line| Line.new line } - @lines = Lines.new lines - - return "" if @lines.empty? - @lines.normalize - assign_types_to_lines - group = group_lines - # call the output formatter to handle the result - #group.each { |line| p line } - group.accept @am, op - end - - private - - ## - # Look through the text at line indentation. We flag each line as being - # Blank, a paragraph, a list element, or verbatim text. - - def assign_types_to_lines(margin = 0, level = 0) - while line = @lines.next - if line.blank? then - line.stamp :BLANK, level - next - end - - # if a line contains non-blanks before the margin, then it must belong - # to an outer level - - text = line.text - - for i in 0...margin - if text[i] != SPACE - @lines.unget - return - end - end - - active_line = text[margin..-1] - - # Rules (horizontal lines) look like - # - # --- (three or more hyphens) - # - # The more hyphens, the thicker the rule - # - - if /^(---+)\s*$/ =~ active_line - line.stamp :RULE, level, $1.length-2 - next - end - - # Then look for list entries. First the ones that have to have - # text following them (* xxx, - xxx, and dd. xxx) - - if SIMPLE_LIST_RE =~ active_line - offset = margin + $1.length - prefix = $2 - prefix_length = prefix.length - - flag = case prefix - when "*","-" then :BULLET - when /^\d/ then :NUMBER - when /^[A-Z]/ then :UPPERALPHA - when /^[a-z]/ then :LOWERALPHA - else raise "Invalid List Type: #{self.inspect}" - end - - line.stamp :LIST, level+1, prefix, flag - text[margin, prefix_length] = " " * prefix_length - assign_types_to_lines(offset, level + 1) - next - end - - if LABEL_LIST_RE =~ active_line - offset = margin + $1.length - prefix = $2 - prefix_length = prefix.length - - next if handled_labeled_list(line, level, margin, offset, prefix) - end - - # Headings look like - # = Main heading - # == Second level - # === Third - # - # Headings reset the level to 0 - - if active_line[0] == ?= and active_line =~ /^(=+)\s*(.*)/ - prefix_length = $1.length - prefix_length = 6 if prefix_length > 6 - line.stamp :HEADING, 0, prefix_length - line.strip_leading(margin + prefix_length) - next - end - - # If the character's a space, then we have verbatim text, - # otherwise - - if active_line[0] == SPACE - line.strip_leading(margin) if margin > 0 - line.stamp :VERBATIM, level - else - line.stamp :PARAGRAPH, level - end - end - end - - ## - # Handle labeled list entries, We have a special case to deal with. - # Because the labels can be long, they force the remaining block of text - # over the to right: - # - # this is a long label that I wrote:: and here is the - # block of text with - # a silly margin - # - # So we allow the special case. If the label is followed by nothing, and - # if the following line is indented, then we take the indent of that line - # as the new margin. - # - # this is a long label that I wrote:: - # here is a more reasonably indented block which - # will be attached to the label. - # - - def handled_labeled_list(line, level, margin, offset, prefix) - prefix_length = prefix.length - text = line.text - flag = nil - - case prefix - when /^\[/ then - flag = :LABELED - prefix = prefix[1, prefix.length-2] - when /:$/ then - flag = :NOTE - prefix.chop! - else - raise "Invalid List Type: #{self.inspect}" - end - - # body is on the next line - if text.length <= offset then - original_line = line - line = @lines.next - return false unless line - text = line.text - - for i in 0..margin - if text[i] != SPACE - @lines.unget - return false - end - end - - i = margin - i += 1 while text[i] == SPACE - - if i >= text.length then - @lines.unget - return false - else - offset = i - prefix_length = 0 - - if text[offset..-1] =~ SIMPLE_LIST_RE then - @lines.unget - line = original_line - line.text = '' - else - @lines.delete original_line - end - end - end - - line.stamp :LIST, level+1, prefix, flag - text[margin, prefix_length] = " " * prefix_length - assign_types_to_lines(offset, level + 1) - return true - end - - ## - # Return a block consisting of fragments which are paragraphs, list - # entries or verbatim text. We merge consecutive lines of the same type - # and level together. We are also slightly tricky with lists: the lines - # following a list introduction look like paragraph lines at the next - # level, and we remap them into list entries instead. - - def group_lines - @lines.rewind - - in_list = false - wanted_type = wanted_level = nil - - block = LineCollection.new - group = nil - - while line = @lines.next - if line.level == wanted_level and line.type == wanted_type - group.add_text(line.text) - else - group = block.fragment_for(line) - block.add(group) - - if line.type == :LIST - wanted_type = :PARAGRAPH - else - wanted_type = line.type - end - - wanted_level = line.type == :HEADING ? line.param : line.level - end - end - - block.normalize - block - end - - ## - # For debugging, we allow access to our line contents as text. - - def content - @lines.as_text - end - public :content - - ## - # For debugging, return the list of line types. - - def get_line_types - @lines.line_types - end - public :get_line_types - -end - -require 'rdoc/markup/fragments' -require 'rdoc/markup/inline' -require 'rdoc/markup/lines' |