12 files changed, 2708 insertions, 0 deletions
diff --git a/lib/rdoc/markup/sample/rdoc2latex.rb b/lib/rdoc/markup/sample/rdoc2latex.rb
new file mode 100644
index 0000000000..26563b75da
--- /dev/null
+++ b/lib/rdoc/markup/sample/rdoc2latex.rb
@@ -0,0 +1,16 @@
+#!/usr/local/bin/ruby
+# Illustration of a script to convert an RDoc-style file to a LaTeX
+# document
+
+require 'rdoc/markup/simple_markup'
+require 'rdoc/markup/simple_markup/to_latex'
+
+p = SM::SimpleMarkup.new
+h = SM::ToLaTeX.new
+
+#puts "\\documentclass{report}"
+#puts "\\usepackage{tabularx}"
+#puts "\\usepackage{parskip}"
+#puts "\\begin{document}"
+puts p.convert(ARGF.read, h)
+#puts "\\end{document}"
diff --git a/lib/rdoc/markup/sample/sample.rb b/lib/rdoc/markup/sample/sample.rb
new file mode 100644
index 0000000000..a375b54564
--- /dev/null
+++ b/lib/rdoc/markup/sample/sample.rb
@@ -0,0 +1,42 @@
+# This program illustrates the basic use of the SimpleMarkup
+# class. It extracts the first comment block from the 
+# simple_markup.rb file and converts it into HTML on
+# standard output. Run it using
+#
+#  % ruby sample.rb
+#
+# You should be in the sample/ directory when you do this,
+# as it hardwires the path to the files it needs to require.
+# This isn't necessary in the code you write once you've 
+# installed the package.
+#
+# For a better way of formatting code comment blocks (and more)
+# see the rdoc package.
+#
+
+$:.unshift "../../.."
+
+require 'rdoc/markup/simple_markup'
+require 'rdoc/markup/simple_markup/to_html'
+
+# Extract the comment block from the source file
+
+input_string = ""
+
+File.foreach("../simple_markup.rb") do |line|
+  break unless line.gsub!(/^\# ?/, '')
+  input_string << line
+end
+
+# Create a markup object
+markup = SM::SimpleMarkup.new
+
+# Attach it to an HTML formatter
+h = SM::ToHtml.new
+
+# And convert out comment block to html. Wrap it a body
+# tag pair to let browsers view it
+
+puts "<html><body>"
+puts markup.convert(input_string, h)
+puts "</body></html>"
diff --git a/lib/rdoc/markup/simple_markup.rb b/lib/rdoc/markup/simple_markup.rb
new file mode 100644
index 0000000000..18971e23e1
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup.rb
@@ -0,0 +1,477 @@
+# = Introduction
+#
+# SimpleMarkup parses plain text documents and attempts to decompose
+# them into their constituent parts. Some of these parts are high-level:
+# paragraphs, chunks of verbatim text, list entries and the like. Other
+# parts happen at the character level: a piece of bold text, a word in
+# code font. This markup is similar in spirit to that used on WikiWiki
+# webs, where folks create web pages using a simple set of formatting
+# rules.
+#
+# SimpleMarkup itself does no output formatting: this is left to a
+# different set of classes.
+#
+# SimpleMarkup is extendable at runtime: you can add new markup
+# elements to be recognised in the documents that SimpleMarkup parses.
+#
+# SimpleMarkup is intended to be the basis for a family of tools which
+# share the common requirement that simple, plain-text should be
+# rendered in a variety of different output formats and media. It is
+# envisaged that SimpleMarkup could be the basis for formating RDoc
+# style comment blocks, Wiki entries, and online FAQs.
+#
+# = Basic Formatting
+#
+# * SimpleMarkup looks for a document's natural left margin. This is
+#   used as the initial margin for the document.
+#
+# * Consecutive lines starting at this margin are considered to be a
+#   paragraph.
+#
+# * If a paragraph starts with a "*", "-", or with "<digit>.", then it is
+#   taken to be the start of a list. The margin in increased to be the
+#   first non-space following the list start flag. Subsequent lines
+#   should be indented to this new margin until the list ends. For
+#   example:
+#
+#      * this is a list with three paragraphs in
+#        the first item. This is the first paragraph.
+#
+#        And this is the second paragraph.
+#
+#        1. This is an indented, numbered list.
+#        2. This is the second item in that list
+#
+#        This is the third conventional paragraph in the
+#        first list item.
+#
+#      * This is the second item in the original list
+#
+# * You can also construct labeled lists, sometimes called description
+#   or definition lists. Do this by putting the label in square brackets
+#   and indenting the list body:
+#
+#       [cat]  a small furry mammal
+#              that seems to sleep a lot
+#
+#       [ant]  a little insect that is known
+#              to enjoy picnics
+#
+#   A minor variation on labeled lists uses two colons to separate the
+#   label from the list body:
+#
+#       cat::  a small furry mammal
+#              that seems to sleep a lot
+#
+#       ant::  a little insect that is known
+#              to enjoy picnics
+#     
+#   This latter style guarantees that the list bodies' left margins are
+#   aligned: think of them as a two column table.
+#
+# * Any line that starts to the right of the current margin is treated
+#   as verbatim text. This is useful for code listings. The example of a
+#   list above is also verbatim text.
+#
+# * A line starting with an equals sign (=) is treated as a
+#   heading. Level one headings have one equals sign, level two headings
+#   have two,and so on.
+#
+# * A line starting with three or more hyphens (at the current indent)
+#   generates a horizontal rule. THe more hyphens, the thicker the rule
+#   (within reason, and if supported by the output device)
+#
+# * You can use markup within text (except verbatim) to change the
+#   appearance of parts of that text. Out of the box, SimpleMarkup
+#   supports word-based and general markup.
+#
+#   Word-based markup uses flag characters around individual words:
+#
+#   [\*word*]  displays word in a *bold* font
+#   [\_word_]  displays word in an _emphasized_ font
+#   [\+word+]  displays word in a +code+ font
+#
+#   General markup affects text between a start delimiter and and end
+#   delimiter. Not surprisingly, these delimiters look like HTML markup.
+#
+#   [\<b>text...</b>]    displays word in a *bold* font
+#   [\<em>text...</em>]  displays word in an _emphasized_ font
+#   [\<i>text...</i>]    displays word in an _emphasized_ font
+#   [\<tt>text...</tt>]  displays word in a +code+ font
+#
+#   Unlike conventional Wiki markup, general markup can cross line
+#   boundaries. You can turn off the interpretation of markup by
+#   preceding the first character with a backslash, so \\\<b>bold
+#   text</b> and \\\*bold* produce \<b>bold text</b> and \*bold
+#   respectively.
+#
+# = Using SimpleMarkup
+#
+# For information on using SimpleMarkup programatically, 
+# see SM::SimpleMarkup.
+#
+# Author::   Dave Thomas,  dave@pragmaticprogrammer.com
+# Version::  0.0
+# License::  Ruby license
+
+
+
+require 'rdoc/markup/simple_markup/fragments'
+require 'rdoc/markup/simple_markup/lines.rb'
+
+module SM  #:nodoc:
+
+  # == Synopsis
+  #
+  # This code converts <tt>input_string</tt>, which is in the format
+  # described in markup/simple_markup.rb, to HTML. The conversion
+  # takes place in the +convert+ method, so you can use the same
+  # SimpleMarkup object to convert multiple input strings.
+  #
+  #   require 'rdoc/markup/simple_markup'
+  #   require 'rdoc/markup/simple_markup/to_html'
+  #
+  #   p = SM::SimpleMarkup.new
+  #   h = SM::ToHtml.new
+  #
+  #   puts p.convert(input_string, h)
+  #
+  # You can extend the SimpleMarkup parser to recognise new markup
+  # sequences, and to add special processing for text that matches a
+  # regular epxression. Here we make WikiWords significant to the parser,
+  # and also make the sequences {word} and \<no>text...</no> signify
+  # strike-through text. When then subclass the HTML output class to deal
+  # with these:
+  #
+  #   require 'rdoc/markup/simple_markup'
+  #   require 'rdoc/markup/simple_markup/to_html'
+  #
+  #   class WikiHtml < SM::ToHtml
+  #     def handle_special_WIKIWORD(special)
+  #       "<font color=red>" + special.text + "</font>"
+  #     end
+  #   end
+  #
+  #   p = SM::SimpleMarkup.new
+  #   p.add_word_pair("{", "}", :STRIKE)
+  #   p.add_html("no", :STRIKE)
+  #
+  #   p.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD)
+  #
+  #   h = WikiHtml.new
+  #   h.add_tag(:STRIKE, "<strike>", "</strike>")
+  #
+  #   puts "<body>" + p.convert(ARGF.read, h) + "</body>"
+  #
+  # == Output Formatters
+  #
+  # _missing_
+  #
+  #
+
+  class SimpleMarkup
+
+    SPACE = ?\s
+
+    # List entries look like:
+    #  *       text
+    #  1.      text
+    #  [label] text
+    #  label:: text
+    #
+    # Flag it as a list entry, and
+    # work out the indent for subsequent lines
+
+    SIMPLE_LIST_RE = /^(
+                  (  \*          (?# bullet)
+                    |-           (?# bullet)
+                    |\d+\.       (?# numbered )
+                    |[A-Za-z]\.  (?# alphabetically numbered )
+                  )
+                  \s+
+                )\S/x
+
+    LABEL_LIST_RE = /^(
+                        (  \[.*?\]    (?# labeled  )
+                          |\S.*::     (?# note     )
+                        )(?=\s|$)
+                        \s*
+                      )/x
+
+
+    ##
+    # take a block of text and use various heuristics to determine
+    # it's structure (paragraphs, lists, and so on). Invoke an
+    # event handler as we identify significant chunks.
+    #
+
+    def initialize
+      @am = AttributeManager.new
+      @output = nil
+    end
+
+    ##
+    # Add to the sequences used to add formatting to an individual word 
+    # (such as *bold*). Matching entries will generate attibutes
+    # that the output formatters can recognize by their +name+
+
+    def add_word_pair(start, stop, name)
+      @am.add_word_pair(start, stop, name)
+    end
+
+    ##
+    # Add to the sequences recognized as general markup
+    #
+
+    def add_html(tag, name)
+      @am.add_html(tag, name)
+    end
+
+    ##
+    # Add to other inline sequences. For example, we could add
+    # WikiWords using something like:
+    #
+    #    parser.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD)
+    #
+    # Each wiki word will be presented to the output formatter 
+    # via the accept_special method
+    #
+
+    def add_special(pattern, name)
+      @am.add_special(pattern, name)
+    end
+
+
+    # We take a string, split it into lines, work out the type of
+    # each line, and from there deduce groups of lines (for example
+    # all lines in a paragraph). We then invoke the output formatter
+    # using a Visitor to display the result
+
+    def convert(str, op)
+      @lines = Lines.new(str.split(/\r?\n/).collect { |aLine| 
+                           Line.new(aLine) })
+      return "" if @lines.empty?
+      @lines.normalize
+      assign_types_to_lines
+      group = group_lines
+      # call the output formatter to handle the result
+      #      group.to_a.each {|i| p i}
+      group.accept(@am, op)
+    end
+
+
+    #######
+    private
+    #######
+
+
+    ##
+    # Look through the text at line indentation. We flag each line as being
+    # Blank, a paragraph, a list element, or verbatim text
+    #
+
+    def assign_types_to_lines(margin = 0, level = 0)
+
+      while line = @lines.next
+        if line.isBlank?
+          line.stamp(Line::BLANK, level)
+          next
+        end
+        
+        # if a line contains non-blanks before the margin, then it must belong
+        # to an outer level
+
+        text = line.text
+        
+        for i in 0...margin
+          if text[i] != SPACE
+            @lines.unget
+            return
+          end
+        end
+
+        active_line = text[margin..-1]
+
+        # Rules (horizontal lines) look like
+        #
+        #  ---   (three or more hyphens)
+        #
+        # The more hyphens, the thicker the rule
+        #
+
+        if /^(---+)\s*$/ =~ active_line
+          line.stamp(Line::RULE, level, $1.length-2)
+          next
+        end
+
+        # Then look for list entries. First the ones that have to have
+        # text following them (* xxx, - xxx, and dd. xxx)
+
+        if SIMPLE_LIST_RE =~ active_line
+
+          offset = margin + $1.length
+          prefix = $2
+          prefix_length = prefix.length
+
+          flag = case prefix
+                 when "*","-" then ListBase::BULLET
+                 when /^\d/   then ListBase::NUMBER
+                 when /^[A-Z]/ then ListBase::UPPERALPHA
+                 when /^[a-z]/ then ListBase::LOWERALPHA
+                 else raise "Invalid List Type: #{self.inspect}"
+                 end
+
+          line.stamp(Line::LIST, level+1, prefix, flag)
+          text[margin, prefix_length] = " " * prefix_length
+          assign_types_to_lines(offset, level + 1)
+          next
+        end
+
+
+        if LABEL_LIST_RE =~ active_line
+          offset = margin + $1.length
+          prefix = $2
+          prefix_length = prefix.length
+
+          next if handled_labeled_list(line, level, margin, offset, prefix)
+        end
+
+        # Headings look like
+        # = Main heading
+        # == Second level
+        # === Third
+        #
+        # Headings reset the level to 0
+
+        if active_line[0] == ?= and active_line =~ /^(=+)\s*(.*)/
+          prefix_length = $1.length
+          prefix_length = 6 if prefix_length > 6
+          line.stamp(Line::HEADING, 0, prefix_length)
+          line.strip_leading(margin + prefix_length)
+          next
+        end
+        
+        # If the character's a space, then we have verbatim text,
+        # otherwise 
+
+        if active_line[0] == SPACE
+          line.strip_leading(margin) if margin > 0
+          line.stamp(Line::VERBATIM, level)
+        else
+          line.stamp(Line::PARAGRAPH, level)
+        end
+      end
+    end
+
+    # Handle labeled list entries, We have a special case
+    # to deal with. Because the labels can be long, they force
+    # the remaining block of text over the to right:
+    #
+    # this is a long label that I wrote:: and here is the
+    #                                     block of text with
+    #                                     a silly margin
+    #
+    # So we allow the special case. If the label is followed
+    # by nothing, and if the following line is indented, then
+    # we take the indent of that line as the new margin
+    #
+    # this is a long label that I wrote::
+    #     here is a more reasonably indented block which
+    #     will ab attached to the label.
+    #
+    
+    def handled_labeled_list(line, level, margin, offset, prefix)
+      prefix_length = prefix.length
+      text = line.text
+      flag = nil
+      case prefix
+      when /^\[/
+        flag = ListBase::LABELED
+        prefix = prefix[1, prefix.length-2]
+      when /:$/
+        flag = ListBase::NOTE
+        prefix.chop!
+      else raise "Invalid List Type: #{self.inspect}"
+      end
+      
+      # body is on the next line
+      
+      if text.length <= offset
+        original_line = line
+        line = @lines.next
+        return(false) unless line
+        text = line.text
+        
+        for i in 0..margin
+          if text[i] != SPACE
+            @lines.unget
+            return false
+          end
+        end
+        i = margin
+        i += 1 while text[i] == SPACE
+        if i >= text.length
+          @lines.unget
+          return false
+        else
+          offset = i
+          prefix_length = 0
+          @lines.delete(original_line)
+        end
+      end
+      
+      line.stamp(Line::LIST, level+1, prefix, flag)
+      text[margin, prefix_length] = " " * prefix_length
+      assign_types_to_lines(offset, level + 1)
+      return true
+    end
+
+    # Return a block consisting of fragments which are
+    # paragraphs, list entries or verbatim text. We merge consecutive
+    # lines of the same type and level together. We are also slightly
+    # tricky with lists: the lines following a list introduction
+    # look like paragraph lines at the next level, and we remap them
+    # into list entries instead
+
+    def group_lines
+      @lines.rewind
+
+      inList = false
+      wantedType = wantedLevel = nil
+
+      block = LineCollection.new
+      group = nil
+
+      while line = @lines.next
+        if line.level == wantedLevel and line.type == wantedType
+          group.add_text(line.text)
+        else
+          group = block.fragment_for(line)
+          block.add(group)
+          if line.type == Line::LIST
+            wantedType = Line::PARAGRAPH
+          else
+            wantedType = line.type
+          end
+          wantedLevel = line.level
+        end
+      end
+
+      block.normalize
+      block
+    end
+
+    ## for debugging, we allow access to our line contents as text
+    def content
+      @lines.as_text
+    end
+    public :content
+
+    ## for debugging, return the list of line types
+    def get_line_types
+      @lines.line_types
+    end
+    public :get_line_types
+  end
+
+end
diff --git a/lib/rdoc/markup/simple_markup/fragments.rb b/lib/rdoc/markup/simple_markup/fragments.rb
new file mode 100644
index 0000000000..83388fcc0b
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/fragments.rb
@@ -0,0 +1,328 @@
+require 'rdoc/markup/simple_markup/lines.rb'
+require 'rdoc/markup/simple_markup/inline.rb'
+
+module SM
+
+  ##
+  # A Fragment is a chunk of text, subclassed as a paragraph, a list
+  # entry, or verbatim text
+
+  class Fragment
+    attr_reader   :level, :param, :txt
+    attr_accessor :type
+
+    def initialize(level, param, type, txt)
+      @level = level
+      @param = param
+      @type  = type
+      @txt   = ""
+      add_text(txt) if txt
+    end
+
+    def add_text(txt)
+      @txt << " " if @txt.length > 0
+      @txt << txt.tr_s("\n ", "  ").strip
+    end
+
+    def to_s
+      "L#@level: #{self.class.name.split('::')[-1]}\n#@txt"
+    end
+
+    ######
+    # This is a simple factory system that lets us associate fragement
+    # types (a string) with a subclass of fragment
+
+    TYPE_MAP = {}
+
+    def Fragment.type_name(name)
+      TYPE_MAP[name] = self
+    end
+
+    def Fragment.for(line)
+      klass =  TYPE_MAP[line.type] ||
+        raise("Unknown line type: '#{line.type.inspect}:' '#{line.text}'")
+      return klass.new(line.level, line.param, line.flag, line.text)
+    end
+  end
+
+  ##
+  # A paragraph is a fragment which gets wrapped to fit. We remove all
+  # newlines when we're created, and have them put back on output
+
+  class Paragraph < Fragment
+    type_name Line::PARAGRAPH
+  end
+
+  class BlankLine < Paragraph
+    type_name Line::BLANK
+  end
+
+  class Heading < Paragraph
+    type_name Line::HEADING
+
+    def head_level
+      @param.to_i
+    end
+  end
+
+  ##
+  # A List is a fragment with some kind of label
+  #
+
+  class ListBase < Paragraph
+    # List types
+    BULLET  = :BULLET
+    NUMBER  = :NUMBER
+    UPPERALPHA  = :UPPERALPHA
+    LOWERALPHA  = :LOWERALPHA
+    LABELED = :LABELED
+    NOTE    = :NOTE
+  end
+
+  class ListItem < ListBase
+    type_name Line::LIST
+
+    #  def label
+    #    am = AttributeManager.new(@param)
+    #    am.flow
+    #  end
+  end
+
+  class ListStart < ListBase
+    def initialize(level, param, type)
+      super(level, param, type, nil)
+    end
+  end
+
+  class ListEnd < ListBase
+    def initialize(level, type)
+      super(level, "", type, nil)
+    end
+  end
+
+  ##
+  # Verbatim code contains lines that don't get wrapped.
+
+  class Verbatim < Fragment
+    type_name  Line::VERBATIM
+
+    def add_text(txt)
+      @txt << txt.chomp << "\n"
+    end
+
+  end
+
+  ##
+  # A horizontal rule
+  class Rule < Fragment
+    type_name Line::RULE
+  end
+
+
+  # Collect groups of lines together. Each group
+  # will end up containing a flow of text
+
+  class LineCollection
+    
+    def initialize
+      @fragments = []
+    end
+
+    def add(fragment)
+      @fragments << fragment
+    end
+
+    def each(&b)
+      @fragments.each(&b)
+    end
+
+    # For testing
+    def to_a
+      @fragments.map {|fragment| fragment.to_s}
+    end
+
+    # Factory for different fragment types
+    def fragment_for(*args)
+      Fragment.for(*args)
+    end
+
+    # tidy up at the end
+    def normalize
+      change_verbatim_blank_lines
+      add_list_start_and_ends
+      add_list_breaks
+      tidy_blank_lines
+    end
+
+    def to_s
+      @fragments.join("\n----\n")
+    end
+
+    def accept(am, visitor)
+
+      visitor.start_accepting
+
+      @fragments.each do |fragment|
+        case fragment
+        when Verbatim
+          visitor.accept_verbatim(am, fragment)
+        when Rule
+          visitor.accept_rule(am, fragment)
+        when ListStart
+          visitor.accept_list_start(am, fragment)
+        when ListEnd
+          visitor.accept_list_end(am, fragment)
+        when ListItem
+          visitor.accept_list_item(am, fragment)
+        when BlankLine
+          visitor.accept_blank_line(am, fragment)
+        when Heading
+          visitor.accept_heading(am, fragment)
+        when Paragraph
+          visitor.accept_paragraph(am, fragment)
+        end
+      end
+
+      visitor.end_accepting
+    end
+    #######
+    private
+    #######
+
+    # If you have:
+    #
+    #    normal paragraph text.
+    #
+    #       this is code
+    #   
+    #       and more code
+    #
+    # You'll end up with the fragments Paragraph, BlankLine, 
+    # Verbatim, BlankLine, Verbatim, BlankLine, etc
+    #
+    # The BlankLine in the middle of the verbatim chunk needs to
+    # be changed to a real verbatim newline, and the two
+    # verbatim blocks merged
+    #
+    #    
+    def change_verbatim_blank_lines
+      frag_block = nil
+      blank_count = 0
+      @fragments.each_with_index do |frag, i|
+        if frag_block.nil?
+          frag_block = frag if Verbatim === frag
+        else
+          case frag
+          when Verbatim
+            blank_count.times { frag_block.add_text("\n") }
+            blank_count = 0
+            frag_block.add_text(frag.txt)
+            @fragments[i] = nil    # remove out current fragment
+          when BlankLine
+            if frag_block
+              blank_count += 1
+              @fragments[i] = nil
+            end
+          else
+            frag_block = nil
+            blank_count = 0
+          end
+        end
+      end
+      @fragments.compact!
+    end
+
+    # List nesting is implicit given the level of
+    # Make it explicit, just to make life a tad
+    # easier for the output processors
+
+    def add_list_start_and_ends
+      level = 0
+      res = []
+      type_stack = []
+
+      @fragments.each do |fragment|
+        # $stderr.puts "#{level} : #{fragment.class.name} : #{fragment.level}"
+        new_level = fragment.level
+        while (level < new_level)
+          level += 1
+          type = fragment.type
+          res << ListStart.new(level, fragment.param, type) if type
+          type_stack.push type
+          # $stderr.puts "Start: #{level}"
+        end
+
+        while level > new_level
+          type = type_stack.pop
+          res << ListEnd.new(level, type) if type
+          level -= 1
+          # $stderr.puts "End: #{level}, #{type}"
+        end
+
+        res << fragment
+        level = fragment.level
+      end
+      level.downto(1) do |i|
+        type = type_stack.pop
+        res << ListEnd.new(i, type) if type
+      end
+
+      @fragments = res
+    end
+
+    # now insert start/ends between list entries at the
+    # same level that have different element types
+
+    def add_list_breaks
+      res = @fragments
+
+      @fragments = []
+      list_stack = []
+
+      res.each do |fragment|
+        case fragment
+        when ListStart
+          list_stack.push fragment
+        when ListEnd
+          start = list_stack.pop
+          fragment.type = start.type
+        when ListItem
+          l = list_stack.last
+          if fragment.type != l.type
+            @fragments << ListEnd.new(l.level, l.type)
+            start = ListStart.new(l.level, fragment.param, fragment.type)
+            @fragments << start
+            list_stack.pop
+            list_stack.push start
+          end
+        else
+          ;
+        end
+        @fragments << fragment
+      end
+    end
+
+    # Finally tidy up the blank lines:
+    # * change Blank/ListEnd into ListEnd/Blank
+    # * remove blank lines at the front
+
+    def tidy_blank_lines
+      (@fragments.size - 1).times do |i|
+        if @fragments[i].kind_of?(BlankLine) and 
+            @fragments[i+1].kind_of?(ListEnd)
+          @fragments[i], @fragments[i+1] = @fragments[i+1], @fragments[i] 
+        end
+      end
+
+      # remove leading blanks
+      @fragments.each_with_index do |f, i|
+        break unless f.kind_of? BlankLine
+        @fragments[i] = nil
+      end
+
+      @fragments.compact!
+    end
+
+  end
+  
+end
diff --git a/lib/rdoc/markup/simple_markup/inline.rb b/lib/rdoc/markup/simple_markup/inline.rb
new file mode 100644
index 0000000000..684ff4b275
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/inline.rb
@@ -0,0 +1,348 @@
+module SM
+
+  # We manage a set of attributes. Each attribute has a symbol name
+  # and a bit value
+
+  class Attribute
+    SPECIAL = 1
+
+    @@name_to_bitmap = { :_SPECIAL_ => SPECIAL }
+    @@next_bitmap = 2
+
+    def Attribute.bitmap_for(name)
+      bitmap = @@name_to_bitmap[name]
+      if !bitmap
+        bitmap = @@next_bitmap
+        @@next_bitmap <<= 1
+        @@name_to_bitmap[name] = bitmap
+      end
+      bitmap
+    end
+
+    def Attribute.as_string(bitmap)
+      return "none" if bitmap.zero?
+      res = []
+      @@name_to_bitmap.each do |name, bit|
+        res << name if (bitmap & bit) != 0
+      end
+      res.join(",")
+    end
+
+    def Attribute.each_name_of(bitmap)
+      @@name_to_bitmap.each do |name, bit|
+        next if bit == SPECIAL
+        yield name.to_s if (bitmap & bit) != 0
+      end
+    end
+  end
+
+
+  # An AttrChanger records a change in attributes. It contains
+  # a bitmap of the attributes to turn on, and a bitmap of those to
+  # turn off
+
+  AttrChanger = Struct.new(:turn_on, :turn_off)
+  class AttrChanger
+    def to_s
+      "Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}"
+    end
+  end
+
+  # An array of attributes which parallels the characters in a string
+  class AttrSpan
+    def initialize(length)
+      @attrs = Array.new(length, 0)
+    end
+
+    def set_attrs(start, length, bits)
+      for i in start ... (start+length)
+        @attrs[i] |= bits
+      end
+    end
+
+    def [](n)
+      @attrs[n]
+    end
+  end
+
+  ##
+  # Hold details of a special sequence
+
+  class Special
+    attr_reader   :type
+    attr_accessor :text
+
+    def initialize(type, text)
+      @type, @text = type, text
+    end
+
+    def ==(o)
+      self.text == o.text && self.type == o.type
+    end
+
+    def to_s
+      "Special: type=#{type}, text=#{text.dump}"
+    end
+  end
+  
+  class AttributeManager
+
+    NULL = "\000".freeze
+
+    ##
+    # We work by substituting non-printing characters in to the
+    # text. For now I'm assuming that I can substitute
+    # a character in the range 0..8 for a 7 bit character
+    # without damaging the encoded string, but this might
+    # be optimistic
+    #
+
+=begin
+    ATTR_FLAG  = 001
+    A_START    = 002
+    A_END      = 003
+    A_SPECIAL_START = 005
+    A_SPECIAL_END   = 006
+
+    START_ATTR   = ATTR_FLAG.chr + A_START.chr
+    END_ATTR     = ATTR_FLAG.chr + A_END.chr
+
+    START_SPECIAL = ATTR_FLAG.chr + A_SPECIAL_START.chr
+    END_SPECIAL   = ATTR_FLAG.chr + A_SPECIAL_END.chr
+
+=end
+    A_PROTECT  = 004
+    PROTECT_ATTR  = A_PROTECT.chr
+
+    # This maps delimiters that occur around words (such as
+    # *bold* or +tt+) where the start and end delimiters
+    # and the same. This lets us optimize the regexp
+    MATCHING_WORD_PAIRS = {}
+
+    # And this is used when the delimiters aren't the same. In this
+    # case the hash maps a pattern to the attribute character
+    WORD_PAIR_MAP = {}
+
+    # This maps HTML tags to the corresponding attribute char
+    HTML_TAGS = {}
+
+    # And this maps _special_ sequences to a name. A special sequence
+    # is something like a WikiWord
+    SPECIAL = {}
+
+    # Return an attribute object with the given turn_on
+    # and turn_off bits set
+
+    def attribute(turn_on, turn_off)
+      AttrChanger.new(turn_on, turn_off)
+    end
+
+
+    def change_attribute(current, new)
+      diff = current ^ new
+      attribute(new & diff, current & diff)
+    end
+
+    def changed_attribute_by_name(current_set, new_set)
+      current = new = 0
+      current_set.each {|name| current |= Attribute.bitmap_for(name) }
+      new_set.each {|name| new |= Attribute.bitmap_for(name) }
+      change_attribute(current, new)
+    end
+
+    def copy_string(start_pos, end_pos)
+      res = @str[start_pos...end_pos]
+      res.gsub!(/\000/, '')
+      res
+    end
+
+    # Map attributes like <b>text</b>to the sequence \001\002<char>\001\003<char>,
+    # where <char> is a per-attribute specific character
+
+    def convert_attrs(str, attrs)
+      # first do matching ones
+      tags = MATCHING_WORD_PAIRS.keys.join("")
+      re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)"
+#      re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)"
+      1 while str.gsub!(Regexp.new(re)) {
+        attr = MATCHING_WORD_PAIRS[$2];
+        attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
+        $1 + NULL*$2.length + $3 + NULL*$2.length + $4
+      }
+
+      # then non-matching
+      unless WORD_PAIR_MAP.empty?
+        WORD_PAIR_MAP.each do |regexp, attr|
+          str.gsub!(regexp) { 
+            attrs.set_attrs($`.length + $1.length, $2.length, attr)
+            NULL*$1.length + $2 + NULL*$3.length
+          }
+        end
+      end
+    end
+
+    def convert_html(str, attrs)
+      tags = HTML_TAGS.keys.join("|")
+      re = "<(#{tags})>(.*?)</\\1>"
+      1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) {
+        attr = HTML_TAGS[$1.downcase]
+        html_length = $1.length + 2
+        seq = NULL * html_length
+        attrs.set_attrs($`.length + html_length, $2.length, attr)
+        seq + $2 + seq + NULL
+      }
+    end
+
+    def convert_specials(str, attrs)
+      unless SPECIAL.empty?
+        SPECIAL.each do |regexp, attr|
+          str.scan(regexp) do
+            attrs.set_attrs($`.length, $1.length, attr | Attribute::SPECIAL)
+          end
+        end
+      end
+    end
+
+    # A \ in front of a character that would normally be
+    # processed turns off processing. We do this by turning
+    # \< into <#{PROTECT}
+    
+    PROTECTABLE = [ "<" << "\\" ]  #"
+
+
+    def mask_protected_sequences
+      protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
+      @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
+    end
+
+    def unmask_protected_sequences
+      @str.gsub!(/(.)#{PROTECT_ATTR}/, '\1')
+    end
+
+    def initialize
+      add_word_pair("*", "*", :BOLD)
+      add_word_pair("_", "_", :EM)
+      add_word_pair("+", "+", :TT)
+      
+      add_html("em", :EM)
+      add_html("i",  :EM)
+      add_html("b",  :BOLD)
+      add_html("tt", :TT)
+    end
+
+    def add_word_pair(start, stop, name)
+      raise "Word flags may not start '<'" if start[0] == ?<
+      bitmap = Attribute.bitmap_for(name)
+      if start == stop
+        MATCHING_WORD_PAIRS[start] = bitmap
+      else
+        pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
+#                             "([A-Za-z]+)" +
+                             "(\\S+)" +
+                             "(" + Regexp.escape(stop) +")")
+        WORD_PAIR_MAP[pattern] = bitmap
+      end
+      PROTECTABLE << start[0,1]
+      PROTECTABLE.uniq!
+    end
+
+    def add_html(tag, name)
+      HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
+    end
+
+    def add_special(pattern, name)
+      SPECIAL[pattern] = Attribute.bitmap_for(name)
+    end
+
+    def flow(str)
+      @str = str
+      @attrs = AttrSpan.new(str.length)
+
+      puts("Before flow, str='#{@str.dump}'") if $DEBUG
+      mask_protected_sequences
+      convert_attrs(@str, @attrs)
+      convert_html(@str, @attrs)
+      convert_specials(str, @attrs)
+      unmask_protected_sequences
+      puts("After flow, str='#{@str.dump}'") if $DEBUG
+      return split_into_flow
+    end
+
+    def display_attributes
+      puts
+      puts @str.tr(NULL, "!")
+      bit = 1
+      16.times do |bno|
+        line = ""
+        @str.length.times do |i|
+          if (@attrs[i] & bit) == 0
+            line << " "
+          else
+            if bno.zero?
+              line << "S"
+            else
+              line << ("%d" % (bno+1))
+            end
+          end
+        end
+        puts(line) unless line =~ /^ *$/
+        bit <<= 1
+      end
+    end
+
+    def split_into_flow
+
+      display_attributes if $DEBUG
+
+      res = []
+      current_attr = 0
+      str = ""
+
+      
+      str_len = @str.length
+
+      # skip leading invisible text
+      i = 0
+      i += 1 while i < str_len and @str[i].zero?
+      start_pos = i
+
+      # then scan the string, chunking it on attribute changes
+      while i < str_len
+        new_attr = @attrs[i]
+        if new_attr != current_attr
+          if i > start_pos
+            res << copy_string(start_pos, i)
+            start_pos = i
+          end
+
+          res << change_attribute(current_attr, new_attr)
+          current_attr = new_attr
+
+          if (current_attr & Attribute::SPECIAL) != 0
+            i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
+            res << Special.new(current_attr, copy_string(start_pos, i))
+            start_pos = i
+            next
+          end
+        end
+
+        # move on, skipping any invisible characters
+        begin
+          i += 1
+        end while i < str_len and @str[i].zero?
+      end
+      
+      # tidy up trailing text
+      if start_pos < str_len
+        res << copy_string(start_pos, str_len)
+      end
+
+      # and reset to all attributes off
+      res << change_attribute(current_attr, 0) if current_attr != 0
+
+      return res
+    end
+
+  end
+
+end
diff --git a/lib/rdoc/markup/simple_markup/lines.rb b/lib/rdoc/markup/simple_markup/lines.rb
new file mode 100644
index 0000000000..4e294f27dc
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/lines.rb
@@ -0,0 +1,151 @@
+##########################################################################
+#
+# We store the lines we're working on as objects of class Line.
+# These contain the text of the line, along with a flag indicating the
+# line type, and an indentation level
+
+module SM
+
+  class Line
+    INFINITY = 9999
+
+    BLANK     = :BLANK
+    HEADING   = :HEADING
+    LIST      = :LIST
+    RULE      = :RULE
+    PARAGRAPH = :PARAGRAPH
+    VERBATIM  = :VERBATIM
+    
+    # line type
+    attr_accessor :type
+
+    # The indentation nesting level
+    attr_accessor :level
+
+    # The contents
+    attr_accessor :text
+
+    # A prefix or parameter. For LIST lines, this is
+    # the text that introduced the list item (the label)
+    attr_accessor  :param
+
+    # A flag. For list lines, this is the type of the list
+    attr_accessor :flag
+
+    # the number of leading spaces
+    attr_accessor :leading_spaces
+
+    # true if this line has been deleted from the list of lines
+    attr_accessor :deleted
+    
+
+    def initialize(text)
+      @text    = text.dup
+      @deleted = false
+
+      # expand tabs
+      1 while @text.gsub!(/\t+/) { ' ' * (8*$&.length - $`.length % 8)}  && $~ #`
+
+      # Strip trailing whitespace
+      @text.sub!(/\s+$/, '')
+
+      # and look for leading whitespace
+      if @text.length > 0
+        @text =~ /^(\s*)/
+        @leading_spaces = $1.length
+      else
+        @leading_spaces = INFINITY
+      end
+    end
+
+    # Return true if this line is blank
+    def isBlank?
+      @text.length.zero?
+    end
+
+    # stamp a line with a type, a level, a prefix, and a flag
+    def stamp(type, level, param="", flag=nil)
+      @type, @level, @param, @flag = type, level, param, flag
+    end
+
+    ##
+    # Strip off the leading margin
+    #
+
+    def strip_leading(size)
+      if @text.size > size
+        @text[0,size] = ""
+      else
+        @text = ""
+      end
+    end
+
+    def to_s
+      "#@type#@level: #@text"
+    end
+  end
+
+  ###############################################################################
+  #
+  # A container for all the lines
+  #
+
+  class Lines
+    include Enumerable
+
+    attr_reader :lines   # for debugging
+
+    def initialize(lines)
+      @lines = lines
+      rewind
+    end
+
+    def empty?
+      @lines.size.zero?
+    end
+
+    def each
+      @lines.each do |line|
+        yield line unless line.deleted
+      end
+    end
+
+#    def [](index)
+#      @lines[index]
+#    end
+
+    def rewind
+      @nextline = 0
+    end
+
+    def next
+      begin
+        res = @lines[@nextline]
+        @nextline += 1 if @nextline < @lines.size
+      end while res and res.deleted and @nextline < @lines.size
+      res
+    end
+
+    def unget
+      @nextline -= 1
+    end
+
+    def delete(a_line)
+      a_line.deleted = true
+    end
+
+    def normalize
+      margin = @lines.collect{|l| l.leading_spaces}.min
+      margin = 0 if margin == Line::INFINITY
+      @lines.each {|line| line.strip_leading(margin) } if margin > 0
+    end
+
+    def as_text
+      @lines.map {|l| l.text}.join("\n")
+    end
+
+    def line_types
+      @lines.map {|l| l.type }
+    end
+  end
+end
diff --git a/lib/rdoc/markup/simple_markup/preprocess.rb b/lib/rdoc/markup/simple_markup/preprocess.rb
new file mode 100644
index 0000000000..09892c2b6c
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/preprocess.rb
@@ -0,0 +1,68 @@
+module SM
+
+  ## 
+  # Handle common directives that can occur in a block of text:
+  #
+  # : include : filename
+  #
+
+  class PreProcess
+
+    def initialize(input_file_name, include_path)
+      @input_file_name = input_file_name
+      @include_path = include_path
+    end
+
+    # Look for common options in a chunk of text. Options that
+    # we don't handle are passed back to our caller
+    # as |directive, param| 
+
+    def handle(text)
+      text.gsub!(/^([ \t#]*):(\w+):\s*(.+)?\n/) do 
+
+        directive = $2.downcase
+        param     = $3
+
+        case directive
+
+        when "include"
+          include_file($3, $1)
+
+        else
+          yield(directive, param)
+        end
+      end
+    end
+
+    #######
+    private
+    #######
+
+    # Include a file, indenting it correctly
+
+    def include_file(name, indent)
+      if (full_name = find_include_file(name))
+        content = File.open(full_name) {|f| f.read}
+        res = content.gsub(/^#?/, indent)
+      else
+        $stderr.puts "Couldn't find file to include: '#{name}'"
+        ''
+      end
+    end
+
+    # Look for the given file in the directory containing the current
+    # file, and then in each of the directories specified in the
+    # RDOC_INCLUDE path
+
+    def find_include_file(name)
+      to_search = [ File.dirname(@input_file_name) ].concat @include_path
+      to_search.each do |dir|
+        full_name = File.join(dir, name)
+        stat = File.stat(full_name) rescue next
+        return full_name if stat.readable?
+      end
+      nil
+    end
+
+  end
+end
diff --git a/lib/rdoc/markup/simple_markup/to_html.rb b/lib/rdoc/markup/simple_markup/to_html.rb
new file mode 100644
index 0000000000..26b5f4ce70
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/to_html.rb
@@ -0,0 +1,289 @@
+require 'rdoc/markup/simple_markup/fragments'
+require 'rdoc/markup/simple_markup/inline'
+
+require 'cgi'
+
+module SM
+
+  class ToHtml
+
+    LIST_TYPE_TO_HTML = {
+      ListBase::BULLET =>  [ "<ul>", "</ul>" ],
+      ListBase::NUMBER =>  [ "<ol>", "</ol>" ],
+      ListBase::UPPERALPHA =>  [ "<ol>", "</ol>" ],
+      ListBase::LOWERALPHA =>  [ "<ol>", "</ol>" ],
+      ListBase::LABELED => [ "<dl>", "</dl>" ],
+      ListBase::NOTE    => [ "<table>", "</table>" ],
+    }
+
+    InlineTag = Struct.new(:bit, :on, :off)
+
+    def initialize
+      init_tags
+    end
+
+    ##
+    # Set up the standard mapping of attributes to HTML tags
+    #
+    def init_tags
+      @attr_tags = [
+        InlineTag.new(SM::Attribute.bitmap_for(:BOLD), "<b>", "</b>"),
+        InlineTag.new(SM::Attribute.bitmap_for(:TT),   "<tt>", "</tt>"),
+        InlineTag.new(SM::Attribute.bitmap_for(:EM),   "<em>", "</em>"),
+      ]
+    end
+
+    ##
+    # Add a new set of HTML tags for an attribute. We allow
+    # separate start and end tags for flexibility
+    #
+    def add_tag(name, start, stop)
+      @attr_tags << InlineTag.new(SM::Attribute.bitmap_for(name), start, stop)
+    end
+
+    ##
+    # Given an HTML tag, decorate it with class information
+    # and the like if required. This is a no-op in the base
+    # class, but is overridden in HTML output classes that
+    # implement style sheets
+
+    def annotate(tag)
+      tag
+    end
+
+    ## 
+    # Here's the client side of the visitor pattern
+
+    def start_accepting
+      @res = ""
+      @in_list_entry = []
+    end
+
+    def end_accepting
+      @res
+    end
+
+    def accept_paragraph(am, fragment)
+      @res << annotate("<p>") + "\n"
+      @res << wrap(convert_flow(am.flow(fragment.txt)))
+      @res << annotate("</p>") + "\n"
+    end
+
+    def accept_verbatim(am, fragment)
+      @res << annotate("<pre>") + "\n"
+      @res << CGI.escapeHTML(fragment.txt)
+      @res << annotate("</pre>") << "\n"
+    end
+
+    def accept_rule(am, fragment)
+      size = fragment.param
+      size = 10 if size > 10
+      @res << "<hr size=\"#{size}\"></hr>"
+    end
+
+    def accept_list_start(am, fragment)
+      @res << html_list_name(fragment.type, true) <<"\n"
+      @in_list_entry.push false
+    end
+
+    def accept_list_end(am, fragment)
+      if tag = @in_list_entry.pop
+        @res << annotate(tag) << "\n"
+      end
+      @res << html_list_name(fragment.type, false) <<"\n"
+    end
+
+    def accept_list_item(am, fragment)
+      if tag = @in_list_entry.last
+        @res << annotate(tag) << "\n"
+      end
+      @res << list_item_start(am, fragment)
+      @res << wrap(convert_flow(am.flow(fragment.txt))) << "\n"
+      @in_list_entry[-1] = list_end_for(fragment.type)
+    end
+
+    def accept_blank_line(am, fragment)
+      # @res << annotate("<p />") << "\n"
+    end
+
+    def accept_heading(am, fragment)
+      @res << convert_heading(fragment.head_level, am.flow(fragment.txt))
+    end
+
+    # This is a higher speed (if messier) version of wrap
+
+    def wrap(txt, line_len = 76)
+      res = ""
+      sp = 0
+      ep = txt.length
+      while sp < ep
+        # scan back for a space
+        p = sp + line_len - 1
+        if p >= ep
+          p = ep
+        else
+          while p > sp and txt[p] != ?\s
+            p -= 1
+          end
+          if p <= sp
+            p = sp + line_len
+            while p < ep and txt[p] != ?\s
+              p += 1
+            end
+          end
+        end
+        res << txt[sp...p] << "\n"
+        sp = p
+        sp += 1 while sp < ep and txt[sp] == ?\s
+      end
+      res
+    end
+
+    #######################################################################
+
+    private
+
+    #######################################################################
+
+    def on_tags(res, item)
+      attr_mask = item.turn_on
+      return if attr_mask.zero?
+
+      @attr_tags.each do |tag|
+        if attr_mask & tag.bit != 0
+          res << annotate(tag.on)
+        end
+      end
+    end
+
+    def off_tags(res, item)
+      attr_mask = item.turn_off
+      return if attr_mask.zero?
+
+      @attr_tags.reverse_each do |tag|
+        if attr_mask & tag.bit != 0
+          res << annotate(tag.off)
+        end
+      end
+    end
+
+    def convert_flow(flow)
+      res = ""
+      flow.each do |item|
+        case item
+        when String
+          res << convert_string(item)
+        when AttrChanger
+          off_tags(res, item)
+          on_tags(res,  item)
+        when Special
+          res << convert_special(item)
+        else
+          raise "Unknown flow element: #{item.inspect}"
+        end
+      end
+      res
+    end
+
+    # some of these patterns are taken from SmartyPants...
+
+    def convert_string(item)
+      CGI.escapeHTML(item).
+      
+      
+      # convert -- to em-dash, (-- to en-dash)
+        gsub(/---?/, '&#8212;'). #gsub(/--/, '&#8211;').
+
+      # convert ... to elipsis (and make sure .... becomes .<elipsis>)
+        gsub(/\.\.\.\./, '.&#8230;').gsub(/\.\.\./, '&#8230;').
+
+      # convert single closing quote
+        gsub(%r{([^ \t\r\n\[\{\(])\'}) { "#$1&#8217;" }.
+        gsub(%r{\'(?=\W|s\b)}) { "&#8217;" }.
+
+      # convert single opening quote
+        gsub(/'/, '&#8216;').
+
+      # convert double closing quote
+        gsub(%r{([^ \t\r\n\[\{\(])\'(?=\W)}) { "#$1&#8221;" }.
+
+      # convert double opening quote
+        gsub(/'/, '&#8220;').
+
+      # convert copyright
+        gsub(/\(c\)/, '&#169;').
+
+      # convert and registered trademark
+        gsub(/\(r\)/, '&#174;')
+
+    end
+
+    def convert_special(special)
+      handled = false
+      Attribute.each_name_of(special.type) do |name|
+        method_name = "handle_special_#{name}"
+        if self.respond_to? method_name
+          special.text = send(method_name, special)
+          handled = true
+        end
+      end
+      raise "Unhandled special: #{special}" unless handled
+      special.text
+    end
+
+    def convert_heading(level, flow)
+      res =
+        annotate("<h#{level}>") + 
+        convert_flow(flow) + 
+        annotate("</h#{level}>\n")
+    end
+
+    def html_list_name(list_type, is_open_tag)
+      tags = LIST_TYPE_TO_HTML[list_type] || raise("Invalid list type: #{list_type.inspect}")
+      annotate(tags[ is_open_tag ? 0 : 1])
+    end
+
+    def list_item_start(am, fragment)
+      case fragment.type
+      when ListBase::BULLET, ListBase::NUMBER
+        annotate("<li>")
+
+      when ListBase::UPPERALPHA
+	annotate("<li type=\"A\">")
+
+      when ListBase::LOWERALPHA
+	annotate("<li type=\"a\">")
+
+      when ListBase::LABELED
+        annotate("<dt>") +
+          convert_flow(am.flow(fragment.param)) + 
+          annotate("</dt>") +
+          annotate("<dd>")
+
+      when ListBase::NOTE
+        annotate("<tr>") +
+          annotate("<td valign=\"top\">") +
+          convert_flow(am.flow(fragment.param)) + 
+          annotate("</td>") +
+          annotate("<td>")
+      else
+        raise "Invalid list type"
+      end
+    end
+
+    def list_end_for(fragment_type)
+      case fragment_type
+      when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA
+        "</li>"
+      when ListBase::LABELED
+        "</dd>"
+      when ListBase::NOTE
+        "</td></tr>"
+      else
+        raise "Invalid list type"
+      end
+    end
+
+  end
+
+end
diff --git a/lib/rdoc/markup/simple_markup/to_latex.rb b/lib/rdoc/markup/simple_markup/to_latex.rb
new file mode 100644
index 0000000000..6c16278652
--- /dev/null
+++ b/lib/rdoc/markup/simple_markup/to_latex.rb
@@ -0,0 +1,333 @@
+require 'rdoc/markup/simple_markup/fragments'
+require 'rdoc/markup/simple_markup/inline'
+
+require 'cgi'
+
+module SM
+
+  # Convert SimpleMarkup to basic LaTeX report format
+
+  class ToLaTeX
+
+    BS = "\020"   # \
+    OB = "\021"   # {
+    CB = "\022"   # }
+    DL = "\023"   # Dollar
+
+    BACKSLASH   = "#{BS}symbol#{OB}92#{CB}"
+    HAT         = "#{BS}symbol#{OB}94#{CB}"
+    BACKQUOTE   = "#{BS}symbol#{OB}0#{CB}"
+    TILDE       = "#{DL}#{BS}sim#{DL}"
+    LESSTHAN    = "#{DL}<#{DL}"
+    GREATERTHAN = "#{DL}>#{DL}"
+
+    def self.l(str)
+      str.tr('\\', BS).tr('{', OB).tr('}', CB).tr('$', DL)
+    end
+
+    def l(arg)
+      SM::ToLaTeX.l(arg)
+    end
+
+    LIST_TYPE_TO_LATEX = {
+      ListBase::BULLET =>  [ l("\\begin{itemize}"), l("\\end{itemize}") ],
+      ListBase::NUMBER =>  [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\arabic" ],
+      ListBase::UPPERALPHA =>  [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\Alph" ],
+      ListBase::LOWERALPHA =>  [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\alph" ],
+      ListBase::LABELED => [ l("\\begin{description}"), l("\\end{description}") ],
+      ListBase::NOTE    => [
+        l("\\begin{tabularx}{\\linewidth}{@{} l X @{}}"), 
+        l("\\end{tabularx}") ],
+    }
+
+    InlineTag = Struct.new(:bit, :on, :off)
+
+    def initialize
+      init_tags
+      @list_depth = 0
+      @prev_list_types = []
+    end
+
+    ##
+    # Set up the standard mapping of attributes to LaTeX
+    #
+    def init_tags
+      @attr_tags = [
+        InlineTag.new(SM::Attribute.bitmap_for(:BOLD), l("\\textbf{"), l("}")),
+        InlineTag.new(SM::Attribute.bitmap_for(:TT),   l("\\texttt{"), l("}")),
+        InlineTag.new(SM::Attribute.bitmap_for(:EM),   l("\\emph{"), l("}")),
+      ]
+    end
+
+    ##
+    # Escape a LaTeX string
+    def escape(str)
+# $stderr.print "FE: ", str
+      s = str.
+#        sub(/\s+$/, '').
+        gsub(/([_\${}&%#])/, "#{BS}\\1").
+        gsub(/\\/, BACKSLASH).
+        gsub(/\^/, HAT).
+        gsub(/~/,  TILDE).
+        gsub(/</,  LESSTHAN).
+        gsub(/>/,  GREATERTHAN).
+        gsub(/,,/, ",{},").
+        gsub(/\`/,  BACKQUOTE)
+# $stderr.print "-> ", s, "\n"
+      s
+    end
+
+    ##
+    # Add a new set of LaTeX tags for an attribute. We allow
+    # separate start and end tags for flexibility
+    #
+    def add_tag(name, start, stop)
+      @attr_tags << InlineTag.new(SM::Attribute.bitmap_for(name), start, stop)
+    end
+
+
+    ## 
+    # Here's the client side of the visitor pattern
+
+    def start_accepting
+      @res = ""
+      @in_list_entry = []
+    end
+
+    def end_accepting
+      @res.tr(BS, '\\').tr(OB, '{').tr(CB, '}').tr(DL, '$')
+    end
+
+    def accept_paragraph(am, fragment)
+      @res << wrap(convert_flow(am.flow(fragment.txt)))
+      @res << "\n"
+    end
+
+    def accept_verbatim(am, fragment)
+      @res << "\n\\begin{code}\n"
+      @res << fragment.txt.sub(/[\n\s]+\Z/, '')
+      @res << "\n\\end{code}\n\n"
+    end
+
+    def accept_rule(am, fragment)
+      size = fragment.param
+      size = 10 if size > 10
+      @res << "\n\n\\rule{\\linewidth}{#{size}pt}\n\n"
+    end
+
+    def accept_list_start(am, fragment)
+      @res << list_name(fragment.type, true) <<"\n"
+      @in_list_entry.push false
+    end
+
+    def accept_list_end(am, fragment)
+      if tag = @in_list_entry.pop
+        @res << tag << "\n"
+      end
+      @res << list_name(fragment.type, false) <<"\n"
+    end
+
+    def accept_list_item(am, fragment)
+      if tag = @in_list_entry.last
+        @res << tag << "\n"
+      end
+      @res << list_item_start(am, fragment)
+      @res << wrap(convert_flow(am.flow(fragment.txt))) << "\n"
+      @in_list_entry[-1] = list_end_for(fragment.type)
+    end
+
+    def accept_blank_line(am, fragment)
+      # @res << "\n"
+    end
+
+    def accept_heading(am, fragment)
+      @res << convert_heading(fragment.head_level, am.flow(fragment.txt))
+    end
+
+    # This is a higher speed (if messier) version of wrap
+
+    def wrap(txt, line_len = 76)
+      res = ""
+      sp = 0
+      ep = txt.length
+      while sp < ep
+        # scan back for a space
+        p = sp + line_len - 1
+        if p >= ep
+          p = ep
+        else
+          while p > sp and txt[p] != ?\s
+            p -= 1
+          end
+          if p <= sp
+            p = sp + line_len
+            while p < ep and txt[p] != ?\s
+              p += 1
+            end
+          end
+        end
+        res << txt[sp...p] << "\n"
+        sp = p
+        sp += 1 while sp < ep and txt[sp] == ?\s
+      end
+      res
+    end
+
+    #######################################################################
+
+    private
+
+    #######################################################################
+
+    def on_tags(res, item)
+      attr_mask = item.turn_on
+      return if attr_mask.zero?
+
+      @attr_tags.each do |tag|
+        if attr_mask & tag.bit != 0
+          res << tag.on
+        end
+      end
+    end
+
+    def off_tags(res, item)
+      attr_mask = item.turn_off
+      return if attr_mask.zero?
+
+      @attr_tags.reverse_each do |tag|
+        if attr_mask & tag.bit != 0
+          res << tag.off
+        end
+      end
+    end
+
+    def convert_flow(flow)
+      res = ""
+      flow.each do |item|
+        case item
+        when String
+#          $stderr.puts "Converting '#{item}'"
+          res << convert_string(item)
+        when AttrChanger
+          off_tags(res, item)
+          on_tags(res,  item)
+        when Special
+          res << convert_special(item)
+        else
+          raise "Unknown flow element: #{item.inspect}"
+        end
+      end
+      res
+    end
+
+    # some of these patterns are taken from SmartyPants...
+
+    def convert_string(item)
+
+      escape(item).
+      
+      
+      # convert ... to elipsis (and make sure .... becomes .<elipsis>)
+        gsub(/\.\.\.\./, '.\ldots{}').gsub(/\.\.\./, '\ldots{}').
+
+      # convert single closing quote
+        gsub(%r{([^ \t\r\n\[\{\(])\'}) { "#$1'" }.
+        gsub(%r{\'(?=\W|s\b)}) { "'" }.
+
+      # convert single opening quote
+        gsub(/'/, '`').
+
+      # convert double closing quote
+        gsub(%r{([^ \t\r\n\[\{\(])\"(?=\W)}) { "#$1''" }.
+
+      # convert double opening quote
+        gsub(/"/, "``").
+
+      # convert copyright
+        gsub(/\(c\)/, '\copyright{}')
+
+    end
+
+    def convert_special(special)
+      handled = false
+      Attribute.each_name_of(special.type) do |name|
+        method_name = "handle_special_#{name}"
+        if self.respond_to? method_name
+          special.text = send(method_name, special)
+          handled = true
+        end
+      end
+      raise "Unhandled special: #{special}" unless handled
+      special.text
+    end
+
+    def convert_heading(level, flow)
+      res =
+        case level
+        when 1 then "\\chapter{"
+        when 2 then "\\section{"
+        when 3 then "\\subsection{"
+        when 4 then "\\subsubsection{"
+        else  "\\paragraph{"
+        end +
+        convert_flow(flow) + 
+        "}\n"
+    end
+
+    def list_name(list_type, is_open_tag)
+      tags = LIST_TYPE_TO_LATEX[list_type] || raise("Invalid list type: #{list_type.inspect}")
+      if tags[2] # enumerate
+        if is_open_tag
+          @list_depth += 1
+          if @prev_list_types[@list_depth] != tags[2]
+            case @list_depth
+            when 1
+              roman = "i"
+            when 2
+              roman = "ii"
+            when 3
+              roman = "iii"
+            when 4
+              roman = "iv"
+            else
+              raise("Too deep list: level #{@list_depth}")
+            end
+            @prev_list_types[@list_depth] = tags[2]
+            return l("\\renewcommand{\\labelenum#{roman}}{#{tags[2]}{enum#{roman}}}") + "\n" + tags[0]
+          end
+        else
+          @list_depth -= 1
+        end
+      end
+      tags[ is_open_tag ? 0 : 1]
+    end
+
+    def list_item_start(am, fragment)
+      case fragment.type
+      when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA
+        "\\item "
+
+      when ListBase::LABELED
+        "\\item[" + convert_flow(am.flow(fragment.param)) + "] "
+
+      when ListBase::NOTE
+          convert_flow(am.flow(fragment.param)) + " & "
+      else
+        raise "Invalid list type"
+      end
+    end
+
+    def list_end_for(fragment_type)
+      case fragment_type
+      when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA, ListBase::LABELED
+        ""
+      when ListBase::NOTE
+        "\\\\\n"
+      else
+        raise "Invalid list type"
+      end
+    end
+
+  end
+
+end
diff --git a/lib/rdoc/markup/test/AllTests.rb b/lib/rdoc/markup/test/AllTests.rb
new file mode 100644
index 0000000000..b9c8c9dfcc
--- /dev/null
+++ b/lib/rdoc/markup/test/AllTests.rb
@@ -0,0 +1,2 @@
+require 'TestParse.rb'
+require 'TestInline.rb'
diff --git a/lib/rdoc/markup/test/TestInline.rb b/lib/rdoc/markup/test/TestInline.rb
new file mode 100644
index 0000000000..c76f21dce5
--- /dev/null
+++ b/lib/rdoc/markup/test/TestInline.rb
@@ -0,0 +1,151 @@
+require "test/unit"
+
+$:.unshift "../../.."
+
+require "rdoc/markup/simple_markup/inline"
+
+class TestInline < Test::Unit::TestCase
+
+
+  def setup
+    @am = SM::AttributeManager.new
+
+    @bold_on  = @am.changed_attribute_by_name([], [:BOLD])
+    @bold_off = @am.changed_attribute_by_name([:BOLD], [])
+    
+    @tt_on    = @am.changed_attribute_by_name([], [:TT])
+    @tt_off   = @am.changed_attribute_by_name([:TT], [])
+    
+    @em_on    = @am.changed_attribute_by_name([], [:EM])
+    @em_off   = @am.changed_attribute_by_name([:EM], [])
+    
+    @bold_em_on   = @am.changed_attribute_by_name([], [:BOLD] | [:EM])
+    @bold_em_off  = @am.changed_attribute_by_name([:BOLD] | [:EM], [])
+    
+    @em_then_bold = @am.changed_attribute_by_name([:EM], [:EM] | [:BOLD])
+    
+    @em_to_bold   = @am.changed_attribute_by_name([:EM], [:BOLD])
+    
+    @am.add_word_pair("{", "}", :WOMBAT)
+    @wombat_on    = @am.changed_attribute_by_name([], [:WOMBAT])
+    @wombat_off   = @am.changed_attribute_by_name([:WOMBAT], [])
+  end
+
+  def crossref(text)
+    [ @am.changed_attribute_by_name([], [:CROSSREF] | [:_SPECIAL_]),
+      SM::Special.new(33, text),
+      @am.changed_attribute_by_name([:CROSSREF] | [:_SPECIAL_], [])
+    ]
+  end
+
+  def test_special
+    # class names, variable names, file names, or instance variables
+    @am.add_special(/(
+                       \b([A-Z]\w+(::\w+)*)
+                       | \#\w+[!?=]?
+                       | \b\w+([_\/\.]+\w+)+[!?=]?
+                      )/x, 
+                    :CROSSREF)
+    
+    assert_equal(["cat"], @am.flow("cat"))
+
+    assert_equal(["cat ", crossref("#fred"), " dog"].flatten,
+                  @am.flow("cat #fred dog"))
+
+    assert_equal([crossref("#fred"), " dog"].flatten,
+                  @am.flow("#fred dog"))
+
+    assert_equal(["cat ", crossref("#fred")].flatten, @am.flow("cat #fred"))
+  end
+
+  def test_basic
+    assert_equal(["cat"], @am.flow("cat"))
+
+    assert_equal(["cat ", @bold_on, "and", @bold_off, " dog"],
+                  @am.flow("cat *and* dog"))
+
+    assert_equal(["cat ", @bold_on, "AND", @bold_off, " dog"],
+                  @am.flow("cat *AND* dog"))
+
+    assert_equal(["cat ", @em_on, "And", @em_off, " dog"],
+                  @am.flow("cat _And_ dog"))
+
+    assert_equal(["cat *and dog*"], @am.flow("cat *and dog*"))
+
+    assert_equal(["*cat and* dog"], @am.flow("*cat and* dog"))
+
+    assert_equal(["cat *and ", @bold_on, "dog", @bold_off],
+                  @am.flow("cat *and *dog*"))
+
+    assert_equal(["cat ", @em_on, "and", @em_off, " dog"],
+                  @am.flow("cat _and_ dog"))
+
+    assert_equal(["cat_and_dog"],
+                  @am.flow("cat_and_dog"))
+
+    assert_equal(["cat ", @tt_on, "and", @tt_off, " dog"],
+                  @am.flow("cat +and+ dog"))
+
+    assert_equal(["cat ", @bold_on, "a_b_c", @bold_off, " dog"],
+                  @am.flow("cat *a_b_c* dog"))
+
+    assert_equal(["cat __ dog"],
+                  @am.flow("cat __ dog"))
+
+    assert_equal(["cat ", @em_on, "_", @em_off, " dog"],
+                  @am.flow("cat ___ dog"))
+
+  end
+
+  def test_combined
+    assert_equal(["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off],
+                  @am.flow("cat _and_ *dog*"))
+
+    assert_equal(["cat ", @em_on, "a__nd", @em_off, " ", @bold_on, "dog", @bold_off], 
+                  @am.flow("cat _a__nd_ *dog*"))
+  end
+
+  def test_html_like
+    assert_equal(["cat ", @tt_on, "dog", @tt_off], @am.flow("cat <tt>dog</Tt>"))
+
+    assert_equal(["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off], 
+                  @am.flow("cat <i>and</i> <B>dog</b>"))
+    
+    assert_equal(["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], 
+                  @am.flow("cat <i>and <B>dog</B></I>"))
+    
+    assert_equal(["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], 
+                  @am.flow("cat <i>and </i><b>dog</b>"))
+    
+    assert_equal(["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], 
+                  @am.flow("cat <i>and <b></i>dog</b>"))
+    
+    assert_equal([@tt_on, "cat", @tt_off, " ", @em_on, "and ", @em_to_bold, "dog", @bold_off], 
+                  @am.flow("<tt>cat</tt> <i>and <b></i>dog</b>"))
+
+    assert_equal(["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], 
+                  @am.flow("cat <i>and <b>dog</b></i>"))
+    
+    assert_equal(["cat ", @bold_em_on, "and", @bold_em_off, " dog"], 
+                  @am.flow("cat <i><b>and</b></i> dog"))
+    
+    
+  end
+
+  def test_protect
+    assert_equal(['cat \\ dog'], @am.flow('cat \\ dog'))
+
+    assert_equal(["cat <tt>dog</Tt>"], @am.flow("cat \\<tt>dog</Tt>"))
+
+    assert_equal(["cat ", @em_on, "and", @em_off, " <B>dog</b>"], 
+                  @am.flow("cat <i>and</i> \\<B>dog</b>"))
+    
+    assert_equal(["*word* or <b>text</b>"], @am.flow("\\*word* or \\<b>text</b>"))
+  end
+
+  def test_adding
+    assert_equal(["cat ", @wombat_on, "and", @wombat_off, " dog" ],
+                  @am.flow("cat {and} dog"))
+#    assert_equal(["cat {and} dog" ], @am.flow("cat \\{and} dog"))
+  end
+end
diff --git a/lib/rdoc/markup/test/TestParse.rb b/lib/rdoc/markup/test/TestParse.rb
new file mode 100644
index 0000000000..3ec541ce7a
--- /dev/null
+++ b/lib/rdoc/markup/test/TestParse.rb
@@ -0,0 +1,503 @@
+require 'test/unit'
+
+$:.unshift "../../.."
+
+require 'rdoc/markup/simple_markup'
+
+include SM
+
+class TestParse < Test::Unit::TestCase
+
+  class MockOutput
+    def start_accepting
+      @res = []
+      end
+    
+    def end_accepting
+      @res
+    end
+
+    def accept_paragraph(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_verbatim(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_list_start(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_list_end(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_list_item(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_blank_line(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_heading(am, fragment)
+      @res << fragment.to_s
+    end
+
+    def accept_rule(am, fragment)
+      @res << fragment.to_s
+    end
+
+  end
+
+  def basic_conv(str)
+    sm = SimpleMarkup.new
+    mock = MockOutput.new
+    sm.convert(str, mock)
+    sm.content
+  end
+
+  def line_types(str, expected)
+    p = SimpleMarkup.new
+    mock = MockOutput.new
+    p.convert(str, mock)
+    assert_equal(expected, p.get_line_types.map{|type| type.to_s[0,1]}.join(''))
+  end
+
+  def line_groups(str, expected)
+    p = SimpleMarkup.new
+    mock = MockOutput.new
+
+    block = p.convert(str, mock)
+
+    if block != expected
+      rows = (0...([expected.size, block.size].max)).collect{|i|
+        [expected[i]||"nil", block[i]||"nil"] 
+      }
+      printf "\n\n%35s %35s\n", "Expected", "Got"
+      rows.each {|e,g| printf "%35s %35s\n", e.dump, g.dump }
+    end
+
+    assert_equal(expected, block)
+  end
+
+  def test_tabs
+    str = "hello\n  dave"
+    assert_equal(str, basic_conv(str))
+    str = "hello\n\tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n  \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n   \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n    \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n     \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n      \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n       \tdave"
+    assert_equal("hello\n        dave", basic_conv(str))
+    str = "hello\n        \tdave"
+    assert_equal("hello\n                dave", basic_conv(str))
+    str = ".\t\t."
+    assert_equal(".               .", basic_conv(str))
+  end
+
+  def test_whitespace
+    assert_equal("hello", basic_conv("hello"))
+    assert_equal("hello", basic_conv(" hello "))
+    assert_equal("hello", basic_conv(" \t \t hello\t\t"))
+
+    assert_equal("1\n 2\n  3", basic_conv("1\n 2\n  3"))
+    assert_equal("1\n 2\n  3", basic_conv("  1\n   2\n    3"))
+
+    assert_equal("1\n 2\n  3\n1\n 2", basic_conv("1\n 2\n  3\n1\n 2"))
+    assert_equal("1\n 2\n  3\n1\n 2", basic_conv("  1\n   2\n    3\n  1\n   2"))
+
+    assert_equal("1\n 2\n\n  3", basic_conv("  1\n   2\n\n    3"))
+  end
+
+  def test_types
+    str = "now is the time"
+    line_types(str, 'P')
+
+    str = "now is the time\nfor all good men"
+    line_types(str, 'PP')
+
+    str = "now is the time\n  code\nfor all good men"
+    line_types(str, 'PVP')
+
+    str = "now is the time\n  code\n more code\nfor all good men"
+    line_types(str, 'PVVP')
+
+    str = "now is\n---\nthe time"
+    line_types(str, 'PRP')
+
+    str = %{\
+       now is
+       * l1
+       * l2
+       the time}
+    line_types(str, 'PLLP')
+
+    str = %{\
+       now is
+       * l1
+         l1+
+       * l2
+       the time}
+    line_types(str, 'PLPLP')
+
+    str = %{\
+       now is
+       * l1
+         * l1.1
+       * l2
+       the time}
+    line_types(str, 'PLLLP')
+
+    str = %{\
+       now is
+       * l1
+         * l1.1
+           text
+             code
+             code
+
+           text
+       * l2
+       the time}
+    line_types(str, 'PLLPVVBPLP')
+
+    str = %{\
+       now is
+       1. l1
+          * l1.1
+       2. l2
+       the time}
+    line_types(str, 'PLLLP')
+
+    str = %{\
+       now is
+       [cat] l1
+             * l1.1
+       [dog] l2
+       the time}
+    line_types(str, 'PLLLP')
+
+    str = %{\
+       now is
+       [cat] l1
+             continuation
+       [dog] l2
+       the time}
+    line_types(str, 'PLPLP')
+  end
+
+  def test_groups
+    str = "now is the time"
+    line_groups(str, ["L0: Paragraph\nnow is the time"] )
+
+    str = "now is the time\nfor all good men"
+    line_groups(str, ["L0: Paragraph\nnow is the time for all good men"] )
+
+    str = %{\
+      now is the time
+        code _line_ here
+      for all good men}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is the time",
+                  "L0: Verbatim\n  code _line_ here\n",
+                  "L0: Paragraph\nfor all good men"
+                ] )
+
+    str = "now is the time\n  code\n more code\nfor all good men"
+    line_groups(str,
+                [ "L0: Paragraph\nnow is the time",
+                  "L0: Verbatim\n  code\n more code\n",
+                  "L0: Paragraph\nfor all good men"
+                ] )
+
+    str = %{\
+       now is
+       * l1
+       * l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+    str = %{\
+       now is
+       * l1
+         l1+
+       * l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1 l1+",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+    str = %{\
+       now is
+       * l1
+         * l1.1
+       * l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L2: ListStart\n",
+                  "L2: ListItem\nl1.1",
+                  "L2: ListEnd\n",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+       * l1
+         * l1.1
+           text
+             code
+               code
+
+           text
+       * l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L2: ListStart\n",
+                  "L2: ListItem\nl1.1 text",
+                  "L2: Verbatim\n  code\n    code\n",
+                  "L2: Paragraph\ntext",
+                  "L2: ListEnd\n",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+       1. l1
+          * l1.1
+       2. l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L2: ListStart\n",
+                  "L2: ListItem\nl1.1",
+                  "L2: ListEnd\n",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+    str = %{\
+       now is
+       [cat] l1
+             * l1.1
+       [dog] l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L2: ListStart\n",
+                  "L2: ListItem\nl1.1",
+                  "L2: ListEnd\n",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+    str = %{\
+       now is
+       [cat] l1
+             continuation
+       [dog] l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1 continuation",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+    
+  end
+
+  def test_verbatim_merge
+    str = %{\
+       now is
+          code
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+          code
+          code1
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n   code1\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+          code
+
+          code1
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n\n   code1\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+          code
+
+          code1
+
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n\n   code1\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    str = %{\
+       now is
+          code
+
+          code1
+
+          code2
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n\n   code1\n\n   code2\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+    # Folds multiple blank lines
+    str = %{\
+       now is
+          code
+
+
+          code1
+
+       the time}
+
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L0: Verbatim\n   code\n\n   code1\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+
+  end
+ 
+  def test_list_split
+    str = %{\
+       now is
+       * l1
+       1. n1
+       2. n2
+       * l2
+       the time}
+    line_groups(str,
+                [ "L0: Paragraph\nnow is",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl1",
+                  "L1: ListEnd\n",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nn1",
+                  "L1: ListItem\nn2",
+                  "L1: ListEnd\n",
+                  "L1: ListStart\n",
+                  "L1: ListItem\nl2",
+                  "L1: ListEnd\n",
+                  "L0: Paragraph\nthe time"
+                ])
+
+  end
+
+
+  def test_headings
+    str = "= heading one"
+    line_groups(str, 
+                [ "L0: Heading\nheading one"
+                ])
+
+    str = "=== heading three"
+    line_groups(str, 
+                [ "L0: Heading\nheading three"
+                ])
+
+    str = "text\n   === heading three"
+    line_groups(str, 
+                [ "L0: Paragraph\ntext",
+                  "L0: Verbatim\n   === heading three\n"
+                ])
+
+    str = "text\n   code\n   === heading three"
+    line_groups(str, 
+                [ "L0: Paragraph\ntext",
+                  "L0: Verbatim\n   code\n   === heading three\n"
+                ])
+
+    str = "text\n   code\n=== heading three"
+    line_groups(str, 
+                [ "L0: Paragraph\ntext",
+                  "L0: Verbatim\n   code\n",
+                  "L0: Heading\nheading three"
+                ])
+
+  end
+
+  
+end