From 87762adcb0d38d6c575448f67c2906964215f3a1 Mon Sep 17 00:00:00 2001 From: dave Date: Mon, 1 Dec 2003 07:12:49 +0000 Subject: Add RDoc git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5073 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rdoc/markup/sample/rdoc2latex.rb | 16 + lib/rdoc/markup/sample/sample.rb | 42 +++ lib/rdoc/markup/simple_markup.rb | 477 ++++++++++++++++++++++++++ lib/rdoc/markup/simple_markup/fragments.rb | 328 ++++++++++++++++++ lib/rdoc/markup/simple_markup/inline.rb | 348 +++++++++++++++++++ lib/rdoc/markup/simple_markup/lines.rb | 151 +++++++++ lib/rdoc/markup/simple_markup/preprocess.rb | 68 ++++ lib/rdoc/markup/simple_markup/to_html.rb | 289 ++++++++++++++++ lib/rdoc/markup/simple_markup/to_latex.rb | 333 ++++++++++++++++++ lib/rdoc/markup/test/AllTests.rb | 2 + lib/rdoc/markup/test/TestInline.rb | 151 +++++++++ lib/rdoc/markup/test/TestParse.rb | 503 ++++++++++++++++++++++++++++ 12 files changed, 2708 insertions(+) create mode 100644 lib/rdoc/markup/sample/rdoc2latex.rb create mode 100644 lib/rdoc/markup/sample/sample.rb create mode 100644 lib/rdoc/markup/simple_markup.rb create mode 100644 lib/rdoc/markup/simple_markup/fragments.rb create mode 100644 lib/rdoc/markup/simple_markup/inline.rb create mode 100644 lib/rdoc/markup/simple_markup/lines.rb create mode 100644 lib/rdoc/markup/simple_markup/preprocess.rb create mode 100644 lib/rdoc/markup/simple_markup/to_html.rb create mode 100644 lib/rdoc/markup/simple_markup/to_latex.rb create mode 100644 lib/rdoc/markup/test/AllTests.rb create mode 100644 lib/rdoc/markup/test/TestInline.rb create mode 100644 lib/rdoc/markup/test/TestParse.rb (limited to 'lib/rdoc/markup') diff --git a/lib/rdoc/markup/sample/rdoc2latex.rb b/lib/rdoc/markup/sample/rdoc2latex.rb new file mode 100644 index 0000000000..26563b75da --- /dev/null +++ b/lib/rdoc/markup/sample/rdoc2latex.rb @@ -0,0 +1,16 @@ +#!/usr/local/bin/ruby +# Illustration of a script to convert an RDoc-style file to a LaTeX +# document + +require 'rdoc/markup/simple_markup' +require 'rdoc/markup/simple_markup/to_latex' + +p = SM::SimpleMarkup.new +h = SM::ToLaTeX.new + +#puts "\\documentclass{report}" +#puts "\\usepackage{tabularx}" +#puts "\\usepackage{parskip}" +#puts "\\begin{document}" +puts p.convert(ARGF.read, h) +#puts "\\end{document}" diff --git a/lib/rdoc/markup/sample/sample.rb b/lib/rdoc/markup/sample/sample.rb new file mode 100644 index 0000000000..a375b54564 --- /dev/null +++ b/lib/rdoc/markup/sample/sample.rb @@ -0,0 +1,42 @@ +# This program illustrates the basic use of the SimpleMarkup +# class. It extracts the first comment block from the +# simple_markup.rb file and converts it into HTML on +# standard output. Run it using +# +# % ruby sample.rb +# +# You should be in the sample/ directory when you do this, +# as it hardwires the path to the files it needs to require. +# This isn't necessary in the code you write once you've +# installed the package. +# +# For a better way of formatting code comment blocks (and more) +# see the rdoc package. +# + +$:.unshift "../../.." + +require 'rdoc/markup/simple_markup' +require 'rdoc/markup/simple_markup/to_html' + +# Extract the comment block from the source file + +input_string = "" + +File.foreach("../simple_markup.rb") do |line| + break unless line.gsub!(/^\# ?/, '') + input_string << line +end + +# Create a markup object +markup = SM::SimpleMarkup.new + +# Attach it to an HTML formatter +h = SM::ToHtml.new + +# And convert out comment block to html. Wrap it a body +# tag pair to let browsers view it + +puts "" +puts markup.convert(input_string, h) +puts "" diff --git a/lib/rdoc/markup/simple_markup.rb b/lib/rdoc/markup/simple_markup.rb new file mode 100644 index 0000000000..18971e23e1 --- /dev/null +++ b/lib/rdoc/markup/simple_markup.rb @@ -0,0 +1,477 @@ +# = Introduction +# +# SimpleMarkup parses plain text documents and attempts to decompose +# them into their constituent parts. Some of these parts are high-level: +# paragraphs, chunks of verbatim text, list entries and the like. Other +# parts happen at the character level: a piece of bold text, a word in +# code font. This markup is similar in spirit to that used on WikiWiki +# webs, where folks create web pages using a simple set of formatting +# rules. +# +# SimpleMarkup itself does no output formatting: this is left to a +# different set of classes. +# +# SimpleMarkup is extendable at runtime: you can add new markup +# elements to be recognised in the documents that SimpleMarkup parses. +# +# SimpleMarkup is intended to be the basis for a family of tools which +# share the common requirement that simple, plain-text should be +# rendered in a variety of different output formats and media. It is +# envisaged that SimpleMarkup could be the basis for formating RDoc +# style comment blocks, Wiki entries, and online FAQs. +# +# = Basic Formatting +# +# * SimpleMarkup looks for a document's natural left margin. This is +# used as the initial margin for the document. +# +# * Consecutive lines starting at this margin are considered to be a +# paragraph. +# +# * If a paragraph starts with a "*", "-", or with ".", then it is +# taken to be the start of a list. The margin in increased to be the +# first non-space following the list start flag. Subsequent lines +# should be indented to this new margin until the list ends. For +# example: +# +# * this is a list with three paragraphs in +# the first item. This is the first paragraph. +# +# And this is the second paragraph. +# +# 1. This is an indented, numbered list. +# 2. This is the second item in that list +# +# This is the third conventional paragraph in the +# first list item. +# +# * This is the second item in the original list +# +# * You can also construct labeled lists, sometimes called description +# or definition lists. Do this by putting the label in square brackets +# and indenting the list body: +# +# [cat] a small furry mammal +# that seems to sleep a lot +# +# [ant] a little insect that is known +# to enjoy picnics +# +# A minor variation on labeled lists uses two colons to separate the +# label from the list body: +# +# cat:: a small furry mammal +# that seems to sleep a lot +# +# ant:: a little insect that is known +# to enjoy picnics +# +# This latter style guarantees that the list bodies' left margins are +# aligned: think of them as a two column table. +# +# * Any line that starts to the right of the current margin is treated +# as verbatim text. This is useful for code listings. The example of a +# list above is also verbatim text. +# +# * A line starting with an equals sign (=) is treated as a +# heading. Level one headings have one equals sign, level two headings +# have two,and so on. +# +# * A line starting with three or more hyphens (at the current indent) +# generates a horizontal rule. THe more hyphens, the thicker the rule +# (within reason, and if supported by the output device) +# +# * You can use markup within text (except verbatim) to change the +# appearance of parts of that text. Out of the box, SimpleMarkup +# supports word-based and general markup. +# +# Word-based markup uses flag characters around individual words: +# +# [\*word*] displays word in a *bold* font +# [\_word_] displays word in an _emphasized_ font +# [\+word+] displays word in a +code+ font +# +# General markup affects text between a start delimiter and and end +# delimiter. Not surprisingly, these delimiters look like HTML markup. +# +# [\text...] displays word in a *bold* font +# [\text...] displays word in an _emphasized_ font +# [\text...] displays word in an _emphasized_ font +# [\text...] displays word in a +code+ font +# +# Unlike conventional Wiki markup, general markup can cross line +# boundaries. You can turn off the interpretation of markup by +# preceding the first character with a backslash, so \\\bold +# text and \\\*bold* produce \bold text and \*bold +# respectively. +# +# = Using SimpleMarkup +# +# For information on using SimpleMarkup programatically, +# see SM::SimpleMarkup. +# +# Author:: Dave Thomas, dave@pragmaticprogrammer.com +# Version:: 0.0 +# License:: Ruby license + + + +require 'rdoc/markup/simple_markup/fragments' +require 'rdoc/markup/simple_markup/lines.rb' + +module SM #:nodoc: + + # == Synopsis + # + # This code converts input_string, which is in the format + # described in markup/simple_markup.rb, to HTML. The conversion + # takes place in the +convert+ method, so you can use the same + # SimpleMarkup object to convert multiple input strings. + # + # require 'rdoc/markup/simple_markup' + # require 'rdoc/markup/simple_markup/to_html' + # + # p = SM::SimpleMarkup.new + # h = SM::ToHtml.new + # + # puts p.convert(input_string, h) + # + # You can extend the SimpleMarkup parser to recognise new markup + # sequences, and to add special processing for text that matches a + # regular epxression. Here we make WikiWords significant to the parser, + # and also make the sequences {word} and \text... signify + # strike-through text. When then subclass the HTML output class to deal + # with these: + # + # require 'rdoc/markup/simple_markup' + # require 'rdoc/markup/simple_markup/to_html' + # + # class WikiHtml < SM::ToHtml + # def handle_special_WIKIWORD(special) + # "" + special.text + "" + # end + # end + # + # p = SM::SimpleMarkup.new + # p.add_word_pair("{", "}", :STRIKE) + # p.add_html("no", :STRIKE) + # + # p.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD) + # + # h = WikiHtml.new + # h.add_tag(:STRIKE, "~~", "~~") + # + # puts "" + p.convert(ARGF.read, h) + "" + # + # == Output Formatters + # + # _missing_ + # + # + + class SimpleMarkup + + SPACE = ?\s + + # List entries look like: + # * text + # 1. text + # [label] text + # label:: text + # + # Flag it as a list entry, and + # work out the indent for subsequent lines + + SIMPLE_LIST_RE = /^( + ( \* (?# bullet) + |- (?# bullet) + |\d+\. (?# numbered ) + |[A-Za-z]\. (?# alphabetically numbered ) + ) + \s+ + )\S/x + + LABEL_LIST_RE = /^( + ( \[.*?\] (?# labeled ) + |\S.*:: (?# note ) + )(?=\s|$) + \s* + )/x + + + ## + # take a block of text and use various heuristics to determine + # it's structure (paragraphs, lists, and so on). Invoke an + # event handler as we identify significant chunks. + # + + def initialize + @am = AttributeManager.new + @output = nil + end + + ## + # Add to the sequences used to add formatting to an individual word + # (such as *bold*). Matching entries will generate attibutes + # that the output formatters can recognize by their +name+ + + def add_word_pair(start, stop, name) + @am.add_word_pair(start, stop, name) + end + + ## + # Add to the sequences recognized as general markup + # + + def add_html(tag, name) + @am.add_html(tag, name) + end + + ## + # Add to other inline sequences. For example, we could add + # WikiWords using something like: + # + # parser.add_special(/\b([A-Z][a-z]+[A-Z]\w+)/, :WIKIWORD) + # + # Each wiki word will be presented to the output formatter + # via the accept_special method + # + + def add_special(pattern, name) + @am.add_special(pattern, name) + end + + + # We take a string, split it into lines, work out the type of + # each line, and from there deduce groups of lines (for example + # all lines in a paragraph). We then invoke the output formatter + # using a Visitor to display the result + + def convert(str, op) + @lines = Lines.new(str.split(/\r?\n/).collect { |aLine| + Line.new(aLine) }) + return "" if @lines.empty? + @lines.normalize + assign_types_to_lines + group = group_lines + # call the output formatter to handle the result + # group.to_a.each {|i| p i} + group.accept(@am, op) + end + + + ####### + private + ####### + + + ## + # Look through the text at line indentation. We flag each line as being + # Blank, a paragraph, a list element, or verbatim text + # + + def assign_types_to_lines(margin = 0, level = 0) + + while line = @lines.next + if line.isBlank? + line.stamp(Line::BLANK, level) + next + end + + # if a line contains non-blanks before the margin, then it must belong + # to an outer level + + text = line.text + + for i in 0...margin + if text[i] != SPACE + @lines.unget + return + end + end + + active_line = text[margin..-1] + + # Rules (horizontal lines) look like + # + # --- (three or more hyphens) + # + # The more hyphens, the thicker the rule + # + + if /^(---+)\s*$/ =~ active_line + line.stamp(Line::RULE, level, $1.length-2) + next + end + + # Then look for list entries. First the ones that have to have + # text following them (* xxx, - xxx, and dd. xxx) + + if SIMPLE_LIST_RE =~ active_line + + offset = margin + $1.length + prefix = $2 + prefix_length = prefix.length + + flag = case prefix + when "*","-" then ListBase::BULLET + when /^\d/ then ListBase::NUMBER + when /^[A-Z]/ then ListBase::UPPERALPHA + when /^[a-z]/ then ListBase::LOWERALPHA + else raise "Invalid List Type: #{self.inspect}" + end + + line.stamp(Line::LIST, level+1, prefix, flag) + text[margin, prefix_length] = " " * prefix_length + assign_types_to_lines(offset, level + 1) + next + end + + + if LABEL_LIST_RE =~ active_line + offset = margin + $1.length + prefix = $2 + prefix_length = prefix.length + + next if handled_labeled_list(line, level, margin, offset, prefix) + end + + # Headings look like + # = Main heading + # == Second level + # === Third + # + # Headings reset the level to 0 + + if active_line[0] == ?= and active_line =~ /^(=+)\s*(.*)/ + prefix_length = $1.length + prefix_length = 6 if prefix_length > 6 + line.stamp(Line::HEADING, 0, prefix_length) + line.strip_leading(margin + prefix_length) + next + end + + # If the character's a space, then we have verbatim text, + # otherwise + + if active_line[0] == SPACE + line.strip_leading(margin) if margin > 0 + line.stamp(Line::VERBATIM, level) + else + line.stamp(Line::PARAGRAPH, level) + end + end + end + + # Handle labeled list entries, We have a special case + # to deal with. Because the labels can be long, they force + # the remaining block of text over the to right: + # + # this is a long label that I wrote:: and here is the + # block of text with + # a silly margin + # + # So we allow the special case. If the label is followed + # by nothing, and if the following line is indented, then + # we take the indent of that line as the new margin + # + # this is a long label that I wrote:: + # here is a more reasonably indented block which + # will ab attached to the label. + # + + def handled_labeled_list(line, level, margin, offset, prefix) + prefix_length = prefix.length + text = line.text + flag = nil + case prefix + when /^\[/ + flag = ListBase::LABELED + prefix = prefix[1, prefix.length-2] + when /:$/ + flag = ListBase::NOTE + prefix.chop! + else raise "Invalid List Type: #{self.inspect}" + end + + # body is on the next line + + if text.length <= offset + original_line = line + line = @lines.next + return(false) unless line + text = line.text + + for i in 0..margin + if text[i] != SPACE + @lines.unget + return false + end + end + i = margin + i += 1 while text[i] == SPACE + if i >= text.length + @lines.unget + return false + else + offset = i + prefix_length = 0 + @lines.delete(original_line) + end + end + + line.stamp(Line::LIST, level+1, prefix, flag) + text[margin, prefix_length] = " " * prefix_length + assign_types_to_lines(offset, level + 1) + return true + end + + # Return a block consisting of fragments which are + # paragraphs, list entries or verbatim text. We merge consecutive + # lines of the same type and level together. We are also slightly + # tricky with lists: the lines following a list introduction + # look like paragraph lines at the next level, and we remap them + # into list entries instead + + def group_lines + @lines.rewind + + inList = false + wantedType = wantedLevel = nil + + block = LineCollection.new + group = nil + + while line = @lines.next + if line.level == wantedLevel and line.type == wantedType + group.add_text(line.text) + else + group = block.fragment_for(line) + block.add(group) + if line.type == Line::LIST + wantedType = Line::PARAGRAPH + else + wantedType = line.type + end + wantedLevel = line.level + end + end + + block.normalize + block + end + + ## for debugging, we allow access to our line contents as text + def content + @lines.as_text + end + public :content + + ## for debugging, return the list of line types + def get_line_types + @lines.line_types + end + public :get_line_types + end + +end diff --git a/lib/rdoc/markup/simple_markup/fragments.rb b/lib/rdoc/markup/simple_markup/fragments.rb new file mode 100644 index 0000000000..83388fcc0b --- /dev/null +++ b/lib/rdoc/markup/simple_markup/fragments.rb @@ -0,0 +1,328 @@ +require 'rdoc/markup/simple_markup/lines.rb' +require 'rdoc/markup/simple_markup/inline.rb' + +module SM + + ## + # A Fragment is a chunk of text, subclassed as a paragraph, a list + # entry, or verbatim text + + class Fragment + attr_reader :level, :param, :txt + attr_accessor :type + + def initialize(level, param, type, txt) + @level = level + @param = param + @type = type + @txt = "" + add_text(txt) if txt + end + + def add_text(txt) + @txt << " " if @txt.length > 0 + @txt << txt.tr_s("\n ", " ").strip + end + + def to_s + "L#@level: #{self.class.name.split('::')[-1]}\n#@txt" + end + + ###### + # This is a simple factory system that lets us associate fragement + # types (a string) with a subclass of fragment + + TYPE_MAP = {} + + def Fragment.type_name(name) + TYPE_MAP[name] = self + end + + def Fragment.for(line) + klass = TYPE_MAP[line.type] || + raise("Unknown line type: '#{line.type.inspect}:' '#{line.text}'") + return klass.new(line.level, line.param, line.flag, line.text) + end + end + + ## + # A paragraph is a fragment which gets wrapped to fit. We remove all + # newlines when we're created, and have them put back on output + + class Paragraph < Fragment + type_name Line::PARAGRAPH + end + + class BlankLine < Paragraph + type_name Line::BLANK + end + + class Heading < Paragraph + type_name Line::HEADING + + def head_level + @param.to_i + end + end + + ## + # A List is a fragment with some kind of label + # + + class ListBase < Paragraph + # List types + BULLET = :BULLET + NUMBER = :NUMBER + UPPERALPHA = :UPPERALPHA + LOWERALPHA = :LOWERALPHA + LABELED = :LABELED + NOTE = :NOTE + end + + class ListItem < ListBase + type_name Line::LIST + + # def label + # am = AttributeManager.new(@param) + # am.flow + # end + end + + class ListStart < ListBase + def initialize(level, param, type) + super(level, param, type, nil) + end + end + + class ListEnd < ListBase + def initialize(level, type) + super(level, "", type, nil) + end + end + + ## + # Verbatim code contains lines that don't get wrapped. + + class Verbatim < Fragment + type_name Line::VERBATIM + + def add_text(txt) + @txt << txt.chomp << "\n" + end + + end + + ## + # A horizontal rule + class Rule < Fragment + type_name Line::RULE + end + + + # Collect groups of lines together. Each group + # will end up containing a flow of text + + class LineCollection + + def initialize + @fragments = [] + end + + def add(fragment) + @fragments << fragment + end + + def each(&b) + @fragments.each(&b) + end + + # For testing + def to_a + @fragments.map {|fragment| fragment.to_s} + end + + # Factory for different fragment types + def fragment_for(*args) + Fragment.for(*args) + end + + # tidy up at the end + def normalize + change_verbatim_blank_lines + add_list_start_and_ends + add_list_breaks + tidy_blank_lines + end + + def to_s + @fragments.join("\n----\n") + end + + def accept(am, visitor) + + visitor.start_accepting + + @fragments.each do |fragment| + case fragment + when Verbatim + visitor.accept_verbatim(am, fragment) + when Rule + visitor.accept_rule(am, fragment) + when ListStart + visitor.accept_list_start(am, fragment) + when ListEnd + visitor.accept_list_end(am, fragment) + when ListItem + visitor.accept_list_item(am, fragment) + when BlankLine + visitor.accept_blank_line(am, fragment) + when Heading + visitor.accept_heading(am, fragment) + when Paragraph + visitor.accept_paragraph(am, fragment) + end + end + + visitor.end_accepting + end + ####### + private + ####### + + # If you have: + # + # normal paragraph text. + # + # this is code + # + # and more code + # + # You'll end up with the fragments Paragraph, BlankLine, + # Verbatim, BlankLine, Verbatim, BlankLine, etc + # + # The BlankLine in the middle of the verbatim chunk needs to + # be changed to a real verbatim newline, and the two + # verbatim blocks merged + # + # + def change_verbatim_blank_lines + frag_block = nil + blank_count = 0 + @fragments.each_with_index do |frag, i| + if frag_block.nil? + frag_block = frag if Verbatim === frag + else + case frag + when Verbatim + blank_count.times { frag_block.add_text("\n") } + blank_count = 0 + frag_block.add_text(frag.txt) + @fragments[i] = nil # remove out current fragment + when BlankLine + if frag_block + blank_count += 1 + @fragments[i] = nil + end + else + frag_block = nil + blank_count = 0 + end + end + end + @fragments.compact! + end + + # List nesting is implicit given the level of + # Make it explicit, just to make life a tad + # easier for the output processors + + def add_list_start_and_ends + level = 0 + res = [] + type_stack = [] + + @fragments.each do |fragment| + # $stderr.puts "#{level} : #{fragment.class.name} : #{fragment.level}" + new_level = fragment.level + while (level < new_level) + level += 1 + type = fragment.type + res << ListStart.new(level, fragment.param, type) if type + type_stack.push type + # $stderr.puts "Start: #{level}" + end + + while level > new_level + type = type_stack.pop + res << ListEnd.new(level, type) if type + level -= 1 + # $stderr.puts "End: #{level}, #{type}" + end + + res << fragment + level = fragment.level + end + level.downto(1) do |i| + type = type_stack.pop + res << ListEnd.new(i, type) if type + end + + @fragments = res + end + + # now insert start/ends between list entries at the + # same level that have different element types + + def add_list_breaks + res = @fragments + + @fragments = [] + list_stack = [] + + res.each do |fragment| + case fragment + when ListStart + list_stack.push fragment + when ListEnd + start = list_stack.pop + fragment.type = start.type + when ListItem + l = list_stack.last + if fragment.type != l.type + @fragments << ListEnd.new(l.level, l.type) + start = ListStart.new(l.level, fragment.param, fragment.type) + @fragments << start + list_stack.pop + list_stack.push start + end + else + ; + end + @fragments << fragment + end + end + + # Finally tidy up the blank lines: + # * change Blank/ListEnd into ListEnd/Blank + # * remove blank lines at the front + + def tidy_blank_lines + (@fragments.size - 1).times do |i| + if @fragments[i].kind_of?(BlankLine) and + @fragments[i+1].kind_of?(ListEnd) + @fragments[i], @fragments[i+1] = @fragments[i+1], @fragments[i] + end + end + + # remove leading blanks + @fragments.each_with_index do |f, i| + break unless f.kind_of? BlankLine + @fragments[i] = nil + end + + @fragments.compact! + end + + end + +end diff --git a/lib/rdoc/markup/simple_markup/inline.rb b/lib/rdoc/markup/simple_markup/inline.rb new file mode 100644 index 0000000000..684ff4b275 --- /dev/null +++ b/lib/rdoc/markup/simple_markup/inline.rb @@ -0,0 +1,348 @@ +module SM + + # We manage a set of attributes. Each attribute has a symbol name + # and a bit value + + class Attribute + SPECIAL = 1 + + @@name_to_bitmap = { :_SPECIAL_ => SPECIAL } + @@next_bitmap = 2 + + def Attribute.bitmap_for(name) + bitmap = @@name_to_bitmap[name] + if !bitmap + bitmap = @@next_bitmap + @@next_bitmap <<= 1 + @@name_to_bitmap[name] = bitmap + end + bitmap + end + + def Attribute.as_string(bitmap) + return "none" if bitmap.zero? + res = [] + @@name_to_bitmap.each do |name, bit| + res << name if (bitmap & bit) != 0 + end + res.join(",") + end + + def Attribute.each_name_of(bitmap) + @@name_to_bitmap.each do |name, bit| + next if bit == SPECIAL + yield name.to_s if (bitmap & bit) != 0 + end + end + end + + + # An AttrChanger records a change in attributes. It contains + # a bitmap of the attributes to turn on, and a bitmap of those to + # turn off + + AttrChanger = Struct.new(:turn_on, :turn_off) + class AttrChanger + def to_s + "Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}" + end + end + + # An array of attributes which parallels the characters in a string + class AttrSpan + def initialize(length) + @attrs = Array.new(length, 0) + end + + def set_attrs(start, length, bits) + for i in start ... (start+length) + @attrs[i] |= bits + end + end + + def [](n) + @attrs[n] + end + end + + ## + # Hold details of a special sequence + + class Special + attr_reader :type + attr_accessor :text + + def initialize(type, text) + @type, @text = type, text + end + + def ==(o) + self.text == o.text && self.type == o.type + end + + def to_s + "Special: type=#{type}, text=#{text.dump}" + end + end + + class AttributeManager + + NULL = "\000".freeze + + ## + # We work by substituting non-printing characters in to the + # text. For now I'm assuming that I can substitute + # a character in the range 0..8 for a 7 bit character + # without damaging the encoded string, but this might + # be optimistic + # + +=begin + ATTR_FLAG = 001 + A_START = 002 + A_END = 003 + A_SPECIAL_START = 005 + A_SPECIAL_END = 006 + + START_ATTR = ATTR_FLAG.chr + A_START.chr + END_ATTR = ATTR_FLAG.chr + A_END.chr + + START_SPECIAL = ATTR_FLAG.chr + A_SPECIAL_START.chr + END_SPECIAL = ATTR_FLAG.chr + A_SPECIAL_END.chr + +=end + A_PROTECT = 004 + PROTECT_ATTR = A_PROTECT.chr + + # This maps delimiters that occur around words (such as + # *bold* or +tt+) where the start and end delimiters + # and the same. This lets us optimize the regexp + MATCHING_WORD_PAIRS = {} + + # And this is used when the delimiters aren't the same. In this + # case the hash maps a pattern to the attribute character + WORD_PAIR_MAP = {} + + # This maps HTML tags to the corresponding attribute char + HTML_TAGS = {} + + # And this maps _special_ sequences to a name. A special sequence + # is something like a WikiWord + SPECIAL = {} + + # Return an attribute object with the given turn_on + # and turn_off bits set + + def attribute(turn_on, turn_off) + AttrChanger.new(turn_on, turn_off) + end + + + def change_attribute(current, new) + diff = current ^ new + attribute(new & diff, current & diff) + end + + def changed_attribute_by_name(current_set, new_set) + current = new = 0 + current_set.each {|name| current |= Attribute.bitmap_for(name) } + new_set.each {|name| new |= Attribute.bitmap_for(name) } + change_attribute(current, new) + end + + def copy_string(start_pos, end_pos) + res = @str[start_pos...end_pos] + res.gsub!(/\000/, '') + res + end + + # Map attributes like textto the sequence \001\002\001\003, + # where is a per-attribute specific character + + def convert_attrs(str, attrs) + # first do matching ones + tags = MATCHING_WORD_PAIRS.keys.join("") + re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)" +# re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)" + 1 while str.gsub!(Regexp.new(re)) { + attr = MATCHING_WORD_PAIRS[$2]; + attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr) + $1 + NULL*$2.length + $3 + NULL*$2.length + $4 + } + + # then non-matching + unless WORD_PAIR_MAP.empty? + WORD_PAIR_MAP.each do |regexp, attr| + str.gsub!(regexp) { + attrs.set_attrs($`.length + $1.length, $2.length, attr) + NULL*$1.length + $2 + NULL*$3.length + } + end + end + end + + def convert_html(str, attrs) + tags = HTML_TAGS.keys.join("|") + re = "<(#{tags})>(.*?)" + 1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) { + attr = HTML_TAGS[$1.downcase] + html_length = $1.length + 2 + seq = NULL * html_length + attrs.set_attrs($`.length + html_length, $2.length, attr) + seq + $2 + seq + NULL + } + end + + def convert_specials(str, attrs) + unless SPECIAL.empty? + SPECIAL.each do |regexp, attr| + str.scan(regexp) do + attrs.set_attrs($`.length, $1.length, attr | Attribute::SPECIAL) + end + end + end + end + + # A \ in front of a character that would normally be + # processed turns off processing. We do this by turning + # \< into <#{PROTECT} + + PROTECTABLE = [ "<" << "\\" ] #" + + + def mask_protected_sequences + protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])") + @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}") + end + + def unmask_protected_sequences + @str.gsub!(/(.)#{PROTECT_ATTR}/, '\1') + end + + def initialize + add_word_pair("*", "*", :BOLD) + add_word_pair("_", "_", :EM) + add_word_pair("+", "+", :TT) + + add_html("em", :EM) + add_html("i", :EM) + add_html("b", :BOLD) + add_html("tt", :TT) + end + + def add_word_pair(start, stop, name) + raise "Word flags may not start '<'" if start[0] == ?< + bitmap = Attribute.bitmap_for(name) + if start == stop + MATCHING_WORD_PAIRS[start] = bitmap + else + pattern = Regexp.new("(" + Regexp.escape(start) + ")" + +# "([A-Za-z]+)" + + "(\\S+)" + + "(" + Regexp.escape(stop) +")") + WORD_PAIR_MAP[pattern] = bitmap + end + PROTECTABLE << start[0,1] + PROTECTABLE.uniq! + end + + def add_html(tag, name) + HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name) + end + + def add_special(pattern, name) + SPECIAL[pattern] = Attribute.bitmap_for(name) + end + + def flow(str) + @str = str + @attrs = AttrSpan.new(str.length) + + puts("Before flow, str='#{@str.dump}'") if $DEBUG + mask_protected_sequences + convert_attrs(@str, @attrs) + convert_html(@str, @attrs) + convert_specials(str, @attrs) + unmask_protected_sequences + puts("After flow, str='#{@str.dump}'") if $DEBUG + return split_into_flow + end + + def display_attributes + puts + puts @str.tr(NULL, "!") + bit = 1 + 16.times do |bno| + line = "" + @str.length.times do |i| + if (@attrs[i] & bit) == 0 + line << " " + else + if bno.zero? + line << "S" + else + line << ("%d" % (bno+1)) + end + end + end + puts(line) unless line =~ /^ *$/ + bit <<= 1 + end + end + + def split_into_flow + + display_attributes if $DEBUG + + res = [] + current_attr = 0 + str = "" + + + str_len = @str.length + + # skip leading invisible text + i = 0 + i += 1 while i < str_len and @str[i].zero? + start_pos = i + + # then scan the string, chunking it on attribute changes + while i < str_len + new_attr = @attrs[i] + if new_attr != current_attr + if i > start_pos + res << copy_string(start_pos, i) + start_pos = i + end + + res << change_attribute(current_attr, new_attr) + current_attr = new_attr + + if (current_attr & Attribute::SPECIAL) != 0 + i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0 + res << Special.new(current_attr, copy_string(start_pos, i)) + start_pos = i + next + end + end + + # move on, skipping any invisible characters + begin + i += 1 + end while i < str_len and @str[i].zero? + end + + # tidy up trailing text + if start_pos < str_len + res << copy_string(start_pos, str_len) + end + + # and reset to all attributes off + res << change_attribute(current_attr, 0) if current_attr != 0 + + return res + end + + end + +end diff --git a/lib/rdoc/markup/simple_markup/lines.rb b/lib/rdoc/markup/simple_markup/lines.rb new file mode 100644 index 0000000000..4e294f27dc --- /dev/null +++ b/lib/rdoc/markup/simple_markup/lines.rb @@ -0,0 +1,151 @@ +########################################################################## +# +# We store the lines we're working on as objects of class Line. +# These contain the text of the line, along with a flag indicating the +# line type, and an indentation level + +module SM + + class Line + INFINITY = 9999 + + BLANK = :BLANK + HEADING = :HEADING + LIST = :LIST + RULE = :RULE + PARAGRAPH = :PARAGRAPH + VERBATIM = :VERBATIM + + # line type + attr_accessor :type + + # The indentation nesting level + attr_accessor :level + + # The contents + attr_accessor :text + + # A prefix or parameter. For LIST lines, this is + # the text that introduced the list item (the label) + attr_accessor :param + + # A flag. For list lines, this is the type of the list + attr_accessor :flag + + # the number of leading spaces + attr_accessor :leading_spaces + + # true if this line has been deleted from the list of lines + attr_accessor :deleted + + + def initialize(text) + @text = text.dup + @deleted = false + + # expand tabs + 1 while @text.gsub!(/\t+/) { ' ' * (8*$&.length - $`.length % 8)} && $~ #` + + # Strip trailing whitespace + @text.sub!(/\s+$/, '') + + # and look for leading whitespace + if @text.length > 0 + @text =~ /^(\s*)/ + @leading_spaces = $1.length + else + @leading_spaces = INFINITY + end + end + + # Return true if this line is blank + def isBlank? + @text.length.zero? + end + + # stamp a line with a type, a level, a prefix, and a flag + def stamp(type, level, param="", flag=nil) + @type, @level, @param, @flag = type, level, param, flag + end + + ## + # Strip off the leading margin + # + + def strip_leading(size) + if @text.size > size + @text[0,size] = "" + else + @text = "" + end + end + + def to_s + "#@type#@level: #@text" + end + end + + ############################################################################### + # + # A container for all the lines + # + + class Lines + include Enumerable + + attr_reader :lines # for debugging + + def initialize(lines) + @lines = lines + rewind + end + + def empty? + @lines.size.zero? + end + + def each + @lines.each do |line| + yield line unless line.deleted + end + end + +# def [](index) +# @lines[index] +# end + + def rewind + @nextline = 0 + end + + def next + begin + res = @lines[@nextline] + @nextline += 1 if @nextline < @lines.size + end while res and res.deleted and @nextline < @lines.size + res + end + + def unget + @nextline -= 1 + end + + def delete(a_line) + a_line.deleted = true + end + + def normalize + margin = @lines.collect{|l| l.leading_spaces}.min + margin = 0 if margin == Line::INFINITY + @lines.each {|line| line.strip_leading(margin) } if margin > 0 + end + + def as_text + @lines.map {|l| l.text}.join("\n") + end + + def line_types + @lines.map {|l| l.type } + end + end +end diff --git a/lib/rdoc/markup/simple_markup/preprocess.rb b/lib/rdoc/markup/simple_markup/preprocess.rb new file mode 100644 index 0000000000..09892c2b6c --- /dev/null +++ b/lib/rdoc/markup/simple_markup/preprocess.rb @@ -0,0 +1,68 @@ +module SM + + ## + # Handle common directives that can occur in a block of text: + # + # : include : filename + # + + class PreProcess + + def initialize(input_file_name, include_path) + @input_file_name = input_file_name + @include_path = include_path + end + + # Look for common options in a chunk of text. Options that + # we don't handle are passed back to our caller + # as |directive, param| + + def handle(text) + text.gsub!(/^([ \t#]*):(\w+):\s*(.+)?\n/) do + + directive = $2.downcase + param = $3 + + case directive + + when "include" + include_file($3, $1) + + else + yield(directive, param) + end + end + end + + ####### + private + ####### + + # Include a file, indenting it correctly + + def include_file(name, indent) + if (full_name = find_include_file(name)) + content = File.open(full_name) {|f| f.read} + res = content.gsub(/^#?/, indent) + else + $stderr.puts "Couldn't find file to include: '#{name}'" + '' + end + end + + # Look for the given file in the directory containing the current + # file, and then in each of the directories specified in the + # RDOC_INCLUDE path + + def find_include_file(name) + to_search = [ File.dirname(@input_file_name) ].concat @include_path + to_search.each do |dir| + full_name = File.join(dir, name) + stat = File.stat(full_name) rescue next + return full_name if stat.readable? + end + nil + end + + end +end diff --git a/lib/rdoc/markup/simple_markup/to_html.rb b/lib/rdoc/markup/simple_markup/to_html.rb new file mode 100644 index 0000000000..26b5f4ce70 --- /dev/null +++ b/lib/rdoc/markup/simple_markup/to_html.rb @@ -0,0 +1,289 @@ +require 'rdoc/markup/simple_markup/fragments' +require 'rdoc/markup/simple_markup/inline' + +require 'cgi' + +module SM + + class ToHtml + + LIST_TYPE_TO_HTML = { + ListBase::BULLET => [ "

", "" ], + ListBase::NUMBER => [ "

", "" ], + ListBase::UPPERALPHA => [ "

", "" ], + ListBase::LOWERALPHA => [ "

", "" ], + ListBase::LABELED => [ "

", "" ], + ListBase::NOTE => [ "", "

" ], + } + + InlineTag = Struct.new(:bit, :on, :off) + + def initialize + init_tags + end + + ## + # Set up the standard mapping of attributes to HTML tags + # + def init_tags + @attr_tags = [ + InlineTag.new(SM::Attribute.bitmap_for(:BOLD), "", ""), + InlineTag.new(SM::Attribute.bitmap_for(:TT), "", ""), + InlineTag.new(SM::Attribute.bitmap_for(:EM), "", ""), + ] + end + + ## + # Add a new set of HTML tags for an attribute. We allow + # separate start and end tags for flexibility + # + def add_tag(name, start, stop) + @attr_tags << InlineTag.new(SM::Attribute.bitmap_for(name), start, stop) + end + + ## + # Given an HTML tag, decorate it with class information + # and the like if required. This is a no-op in the base + # class, but is overridden in HTML output classes that + # implement style sheets + + def annotate(tag) + tag + end + + ## + # Here's the client side of the visitor pattern + + def start_accepting + @res = "" + @in_list_entry = [] + end + + def end_accepting + @res + end + + def accept_paragraph(am, fragment) + @res << annotate("

") + "\n" + @res << wrap(convert_flow(am.flow(fragment.txt))) + @res << annotate("

") + "\n" + end + + def accept_verbatim(am, fragment) + @res << annotate("

") + "\n"
+      @res << CGI.escapeHTML(fragment.txt)
+      @res << annotate("

") << "\n" + end + + def accept_rule(am, fragment) + size = fragment.param + size = 10 if size > 10 + @res << "

" + end + + def accept_list_start(am, fragment) + @res << html_list_name(fragment.type, true) <<"\n" + @in_list_entry.push false + end + + def accept_list_end(am, fragment) + if tag = @in_list_entry.pop + @res << annotate(tag) << "\n" + end + @res << html_list_name(fragment.type, false) <<"\n" + end + + def accept_list_item(am, fragment) + if tag = @in_list_entry.last + @res << annotate(tag) << "\n" + end + @res << list_item_start(am, fragment) + @res << wrap(convert_flow(am.flow(fragment.txt))) << "\n" + @in_list_entry[-1] = list_end_for(fragment.type) + end + + def accept_blank_line(am, fragment) + # @res << annotate("

") << "\n" + end + + def accept_heading(am, fragment) + @res << convert_heading(fragment.head_level, am.flow(fragment.txt)) + end + + # This is a higher speed (if messier) version of wrap + + def wrap(txt, line_len = 76) + res = "" + sp = 0 + ep = txt.length + while sp < ep + # scan back for a space + p = sp + line_len - 1 + if p >= ep + p = ep + else + while p > sp and txt[p] != ?\s + p -= 1 + end + if p <= sp + p = sp + line_len + while p < ep and txt[p] != ?\s + p += 1 + end + end + end + res << txt[sp...p] << "\n" + sp = p + sp += 1 while sp < ep and txt[sp] == ?\s + end + res + end + + ####################################################################### + + private + + ####################################################################### + + def on_tags(res, item) + attr_mask = item.turn_on + return if attr_mask.zero? + + @attr_tags.each do |tag| + if attr_mask & tag.bit != 0 + res << annotate(tag.on) + end + end + end + + def off_tags(res, item) + attr_mask = item.turn_off + return if attr_mask.zero? + + @attr_tags.reverse_each do |tag| + if attr_mask & tag.bit != 0 + res << annotate(tag.off) + end + end + end + + def convert_flow(flow) + res = "" + flow.each do |item| + case item + when String + res << convert_string(item) + when AttrChanger + off_tags(res, item) + on_tags(res, item) + when Special + res << convert_special(item) + else + raise "Unknown flow element: #{item.inspect}" + end + end + res + end + + # some of these patterns are taken from SmartyPants... + + def convert_string(item) + CGI.escapeHTML(item). + + + # convert -- to em-dash, (-- to en-dash) + gsub(/---?/, '—'). #gsub(/--/, '–'). + + # convert ... to elipsis (and make sure .... becomes .) + gsub(/\.\.\.\./, '.…').gsub(/\.\.\./, '…'). + + # convert single closing quote + gsub(%r{([^ \t\r\n\[\{$])\'}) { "#$1’" }. + gsub(%r{\'(?=\W|s\b)}) { "’" }. + + # convert single opening quote + gsub(/'/, '‘'). + + # convert double closing quote + gsub(%r{([^ \t\r\n\[\{\(])\'(?=\W)}) { "#$1”" }. + + # convert double opening quote + gsub(/'/, '“'). + + # convert copyright + gsub(/\(c$/, '©'). + + # convert and registered trademark + gsub(/$r$/, '®') + + end + + def convert_special(special) + handled = false + Attribute.each_name_of(special.type) do |name| + method_name = "handle_special_#{name}" + if self.respond_to? method_name + special.text = send(method_name, special) + handled = true + end + end + raise "Unhandled special: #{special}" unless handled + special.text + end + + def convert_heading(level, flow) + res = + annotate("") + + convert_flow(flow) + + annotate("\n") + end + + def html_list_name(list_type, is_open_tag) + tags = LIST_TYPE_TO_HTML[list_type] || raise("Invalid list type: #{list_type.inspect}") + annotate(tags[ is_open_tag ? 0 : 1]) + end + + def list_item_start(am, fragment) + case fragment.type + when ListBase::BULLET, ListBase::NUMBER + annotate("

") + + when ListBase::UPPERALPHA + annotate("

") + + when ListBase::LOWERALPHA + annotate("

") + + when ListBase::LABELED + annotate("

") + + convert_flow(am.flow(fragment.param)) + + annotate("

") + + annotate("

") + + when ListBase::NOTE + annotate("") + + annotate("") + + convert_flow(am.flow(fragment.param)) + + annotate("") + + annotate("") + else + raise "Invalid list type" + end + end + + def list_end_for(fragment_type) + case fragment_type + when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA + "

" + when ListBase::LABELED + "" + when ListBase::NOTE + "" + else + raise "Invalid list type" + end + end + + end + +end diff --git a/lib/rdoc/markup/simple_markup/to_latex.rb b/lib/rdoc/markup/simple_markup/to_latex.rb new file mode 100644 index 0000000000..6c16278652 --- /dev/null +++ b/lib/rdoc/markup/simple_markup/to_latex.rb @@ -0,0 +1,333 @@ +require 'rdoc/markup/simple_markup/fragments' +require 'rdoc/markup/simple_markup/inline' + +require 'cgi' + +module SM + + # Convert SimpleMarkup to basic LaTeX report format + + class ToLaTeX + + BS = "\020" # \ + OB = "\021" # { + CB = "\022" # } + DL = "\023" # Dollar + + BACKSLASH = "#{BS}symbol#{OB}92#{CB}" + HAT = "#{BS}symbol#{OB}94#{CB}" + BACKQUOTE = "#{BS}symbol#{OB}0#{CB}" + TILDE = "#{DL}#{BS}sim#{DL}" + LESSTHAN = "#{DL}<#{DL}" + GREATERTHAN = "#{DL}>#{DL}" + + def self.l(str) + str.tr('\\', BS).tr('{', OB).tr('}', CB).tr('$', DL) + end + + def l(arg) + SM::ToLaTeX.l(arg) + end + + LIST_TYPE_TO_LATEX = { + ListBase::BULLET => [ l("\\begin{itemize}"), l("\\end{itemize}") ], + ListBase::NUMBER => [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\arabic" ], + ListBase::UPPERALPHA => [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\Alph" ], + ListBase::LOWERALPHA => [ l("\\begin{enumerate}"), l("\\end{enumerate}"), "\\alph" ], + ListBase::LABELED => [ l("\\begin{description}"), l("\\end{description}") ], + ListBase::NOTE => [ + l("\\begin{tabularx}{\\linewidth}{@{} l X @{}}"), + l("\\end{tabularx}") ], + } + + InlineTag = Struct.new(:bit, :on, :off) + + def initialize + init_tags + @list_depth = 0 + @prev_list_types = [] + end + + ## + # Set up the standard mapping of attributes to LaTeX + # + def init_tags + @attr_tags = [ + InlineTag.new(SM::Attribute.bitmap_for(:BOLD), l("\\textbf{"), l("}")), + InlineTag.new(SM::Attribute.bitmap_for(:TT), l("\\texttt{"), l("}")), + InlineTag.new(SM::Attribute.bitmap_for(:EM), l("\\emph{"), l("}")), + ] + end + + ## + # Escape a LaTeX string + def escape(str) +# $stderr.print "FE: ", str + s = str. +# sub(/\s+$/, ''). + gsub(/([_\${}&%#])/, "#{BS}\\1"). + gsub(/\\/, BACKSLASH). + gsub(/\^/, HAT). + gsub(/~/, TILDE). + gsub(//, GREATERTHAN). + gsub(/,,/, ",{},"). + gsub(/\`/, BACKQUOTE) +# $stderr.print "-> ", s, "\n" + s + end + + ## + # Add a new set of LaTeX tags for an attribute. We allow + # separate start and end tags for flexibility + # + def add_tag(name, start, stop) + @attr_tags << InlineTag.new(SM::Attribute.bitmap_for(name), start, stop) + end + + + ## + # Here's the client side of the visitor pattern + + def start_accepting + @res = "" + @in_list_entry = [] + end + + def end_accepting + @res.tr(BS, '\\').tr(OB, '{').tr(CB, '}').tr(DL, '$') + end + + def accept_paragraph(am, fragment) + @res << wrap(convert_flow(am.flow(fragment.txt))) + @res << "\n" + end + + def accept_verbatim(am, fragment) + @res << "\n\\begin{code}\n" + @res << fragment.txt.sub(/[\n\s]+\Z/, '') + @res << "\n\\end{code}\n\n" + end + + def accept_rule(am, fragment) + size = fragment.param + size = 10 if size > 10 + @res << "\n\n\\rule{\\linewidth}{#{size}pt}\n\n" + end + + def accept_list_start(am, fragment) + @res << list_name(fragment.type, true) <<"\n" + @in_list_entry.push false + end + + def accept_list_end(am, fragment) + if tag = @in_list_entry.pop + @res << tag << "\n" + end + @res << list_name(fragment.type, false) <<"\n" + end + + def accept_list_item(am, fragment) + if tag = @in_list_entry.last + @res << tag << "\n" + end + @res << list_item_start(am, fragment) + @res << wrap(convert_flow(am.flow(fragment.txt))) << "\n" + @in_list_entry[-1] = list_end_for(fragment.type) + end + + def accept_blank_line(am, fragment) + # @res << "\n" + end + + def accept_heading(am, fragment) + @res << convert_heading(fragment.head_level, am.flow(fragment.txt)) + end + + # This is a higher speed (if messier) version of wrap + + def wrap(txt, line_len = 76) + res = "" + sp = 0 + ep = txt.length + while sp < ep + # scan back for a space + p = sp + line_len - 1 + if p >= ep + p = ep + else + while p > sp and txt[p] != ?\s + p -= 1 + end + if p <= sp + p = sp + line_len + while p < ep and txt[p] != ?\s + p += 1 + end + end + end + res << txt[sp...p] << "\n" + sp = p + sp += 1 while sp < ep and txt[sp] == ?\s + end + res + end + + ####################################################################### + + private + + ####################################################################### + + def on_tags(res, item) + attr_mask = item.turn_on + return if attr_mask.zero? + + @attr_tags.each do |tag| + if attr_mask & tag.bit != 0 + res << tag.on + end + end + end + + def off_tags(res, item) + attr_mask = item.turn_off + return if attr_mask.zero? + + @attr_tags.reverse_each do |tag| + if attr_mask & tag.bit != 0 + res << tag.off + end + end + end + + def convert_flow(flow) + res = "" + flow.each do |item| + case item + when String +# $stderr.puts "Converting '#{item}'" + res << convert_string(item) + when AttrChanger + off_tags(res, item) + on_tags(res, item) + when Special + res << convert_special(item) + else + raise "Unknown flow element: #{item.inspect}" + end + end + res + end + + # some of these patterns are taken from SmartyPants... + + def convert_string(item) + + escape(item). + + + # convert ... to elipsis (and make sure .... becomes .) + gsub(/\.\.\.\./, '.\ldots{}').gsub(/\.\.\./, '\ldots{}'). + + # convert single closing quote + gsub(%r{([^ \t\r\n\[\{$])\'}) { "#$1'" }. + gsub(%r{\'(?=\W|s\b)}) { "'" }. + + # convert single opening quote + gsub(/'/, '`'). + + # convert double closing quote + gsub(%r{([^ \t\r\n\[\{\(])\"(?=\W)}) { "#$1''" }. + + # convert double opening quote + gsub(/"/, "``"). + + # convert copyright + gsub(/\(c$/, '\copyright{}') + + end + + def convert_special(special) + handled = false + Attribute.each_name_of(special.type) do |name| + method_name = "handle_special_#{name}" + if self.respond_to? method_name + special.text = send(method_name, special) + handled = true + end + end + raise "Unhandled special: #{special}" unless handled + special.text + end + + def convert_heading(level, flow) + res = + case level + when 1 then "\\chapter{" + when 2 then "\\section{" + when 3 then "\\subsection{" + when 4 then "\\subsubsection{" + else "\\paragraph{" + end + + convert_flow(flow) + + "}\n" + end + + def list_name(list_type, is_open_tag) + tags = LIST_TYPE_TO_LATEX[list_type] || raise("Invalid list type: #{list_type.inspect}") + if tags[2] # enumerate + if is_open_tag + @list_depth += 1 + if @prev_list_types[@list_depth] != tags[2] + case @list_depth + when 1 + roman = "i" + when 2 + roman = "ii" + when 3 + roman = "iii" + when 4 + roman = "iv" + else + raise("Too deep list: level #{@list_depth}") + end + @prev_list_types[@list_depth] = tags[2] + return l("\\renewcommand{\\labelenum#{roman}}{#{tags[2]}{enum#{roman}}}") + "\n" + tags[0] + end + else + @list_depth -= 1 + end + end + tags[ is_open_tag ? 0 : 1] + end + + def list_item_start(am, fragment) + case fragment.type + when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA + "\\item " + + when ListBase::LABELED + "\\item[" + convert_flow(am.flow(fragment.param)) + "] " + + when ListBase::NOTE + convert_flow(am.flow(fragment.param)) + " & " + else + raise "Invalid list type" + end + end + + def list_end_for(fragment_type) + case fragment_type + when ListBase::BULLET, ListBase::NUMBER, ListBase::UPPERALPHA, ListBase::LOWERALPHA, ListBase::LABELED + "" + when ListBase::NOTE + "\\\\\n" + else + raise "Invalid list type" + end + end + + end + +end diff --git a/lib/rdoc/markup/test/AllTests.rb b/lib/rdoc/markup/test/AllTests.rb new file mode 100644 index 0000000000..b9c8c9dfcc --- /dev/null +++ b/lib/rdoc/markup/test/AllTests.rb @@ -0,0 +1,2 @@ +require 'TestParse.rb' +require 'TestInline.rb' diff --git a/lib/rdoc/markup/test/TestInline.rb b/lib/rdoc/markup/test/TestInline.rb new file mode 100644 index 0000000000..c76f21dce5 --- /dev/null +++ b/lib/rdoc/markup/test/TestInline.rb @@ -0,0 +1,151 @@ +require "test/unit" + +$:.unshift "../../.." + +require "rdoc/markup/simple_markup/inline" + +class TestInline < Test::Unit::TestCase + + + def setup + @am = SM::AttributeManager.new + + @bold_on = @am.changed_attribute_by_name([], [:BOLD]) + @bold_off = @am.changed_attribute_by_name([:BOLD], []) + + @tt_on = @am.changed_attribute_by_name([], [:TT]) + @tt_off = @am.changed_attribute_by_name([:TT], []) + + @em_on = @am.changed_attribute_by_name([], [:EM]) + @em_off = @am.changed_attribute_by_name([:EM], []) + + @bold_em_on = @am.changed_attribute_by_name([], [:BOLD] | [:EM]) + @bold_em_off = @am.changed_attribute_by_name([:BOLD] | [:EM], []) + + @em_then_bold = @am.changed_attribute_by_name([:EM], [:EM] | [:BOLD]) + + @em_to_bold = @am.changed_attribute_by_name([:EM], [:BOLD]) + + @am.add_word_pair("{", "}", :WOMBAT) + @wombat_on = @am.changed_attribute_by_name([], [:WOMBAT]) + @wombat_off = @am.changed_attribute_by_name([:WOMBAT], []) + end + + def crossref(text) + [ @am.changed_attribute_by_name([], [:CROSSREF] | [:_SPECIAL_]), + SM::Special.new(33, text), + @am.changed_attribute_by_name([:CROSSREF] | [:_SPECIAL_], []) + ] + end + + def test_special + # class names, variable names, file names, or instance variables + @am.add_special(/( + \b([A-Z]\w+(::\w+)*) + | \#\w+[!?=]? + | \b\w+([_\/\.]+\w+)+[!?=]? + )/x, + :CROSSREF) + + assert_equal(["cat"], @am.flow("cat")) + + assert_equal(["cat ", crossref("#fred"), " dog"].flatten, + @am.flow("cat #fred dog")) + + assert_equal([crossref("#fred"), " dog"].flatten, + @am.flow("#fred dog")) + + assert_equal(["cat ", crossref("#fred")].flatten, @am.flow("cat #fred")) + end + + def test_basic + assert_equal(["cat"], @am.flow("cat")) + + assert_equal(["cat ", @bold_on, "and", @bold_off, " dog"], + @am.flow("cat *and* dog")) + + assert_equal(["cat ", @bold_on, "AND", @bold_off, " dog"], + @am.flow("cat *AND* dog")) + + assert_equal(["cat ", @em_on, "And", @em_off, " dog"], + @am.flow("cat _And_ dog")) + + assert_equal(["cat *and dog*"], @am.flow("cat *and dog*")) + + assert_equal(["*cat and* dog"], @am.flow("*cat and* dog")) + + assert_equal(["cat *and ", @bold_on, "dog", @bold_off], + @am.flow("cat *and *dog*")) + + assert_equal(["cat ", @em_on, "and", @em_off, " dog"], + @am.flow("cat _and_ dog")) + + assert_equal(["cat_and_dog"], + @am.flow("cat_and_dog")) + + assert_equal(["cat ", @tt_on, "and", @tt_off, " dog"], + @am.flow("cat +and+ dog")) + + assert_equal(["cat ", @bold_on, "a_b_c", @bold_off, " dog"], + @am.flow("cat *a_b_c* dog")) + + assert_equal(["cat __ dog"], + @am.flow("cat __ dog")) + + assert_equal(["cat ", @em_on, "_", @em_off, " dog"], + @am.flow("cat ___ dog")) + + end + + def test_combined + assert_equal(["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off], + @am.flow("cat _and_ *dog*")) + + assert_equal(["cat ", @em_on, "a__nd", @em_off, " ", @bold_on, "dog", @bold_off], + @am.flow("cat _a__nd_ *dog*")) + end + + def test_html_like + assert_equal(["cat ", @tt_on, "dog", @tt_off], @am.flow("cat dog")) + + assert_equal(["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off], + @am.flow("cat and dog")) + + assert_equal(["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], + @am.flow("cat and dog")) + + assert_equal(["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], + @am.flow("cat and dog")) + + assert_equal(["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], + @am.flow("cat and dog")) + + assert_equal([@tt_on, "cat", @tt_off, " ", @em_on, "and ", @em_to_bold, "dog", @bold_off], + @am.flow("cat and dog")) + + assert_equal(["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], + @am.flow("cat and dog")) + + assert_equal(["cat ", @bold_em_on, "and", @bold_em_off, " dog"], + @am.flow("cat and dog")) + + + end + + def test_protect + assert_equal(['cat \\ dog'], @am.flow('cat \\ dog')) + + assert_equal(["cat dog"], @am.flow("cat \\dog")) + + assert_equal(["cat ", @em_on, "and", @em_off, " dog"], + @am.flow("cat and \\dog")) + + assert_equal(["*word* or text"], @am.flow("\\*word* or \\text")) + end + + def test_adding + assert_equal(["cat ", @wombat_on, "and", @wombat_off, " dog" ], + @am.flow("cat {and} dog")) +# assert_equal(["cat {and} dog" ], @am.flow("cat \\{and} dog")) + end +end diff --git a/lib/rdoc/markup/test/TestParse.rb b/lib/rdoc/markup/test/TestParse.rb new file mode 100644 index 0000000000..3ec541ce7a --- /dev/null +++ b/lib/rdoc/markup/test/TestParse.rb @@ -0,0 +1,503 @@ +require 'test/unit' + +$:.unshift "../../.." + +require 'rdoc/markup/simple_markup' + +include SM + +class TestParse < Test::Unit::TestCase + + class MockOutput + def start_accepting + @res = [] + end + + def end_accepting + @res + end + + def accept_paragraph(am, fragment) + @res << fragment.to_s + end + + def accept_verbatim(am, fragment) + @res << fragment.to_s + end + + def accept_list_start(am, fragment) + @res << fragment.to_s + end + + def accept_list_end(am, fragment) + @res << fragment.to_s + end + + def accept_list_item(am, fragment) + @res << fragment.to_s + end + + def accept_blank_line(am, fragment) + @res << fragment.to_s + end + + def accept_heading(am, fragment) + @res << fragment.to_s + end + + def accept_rule(am, fragment) + @res << fragment.to_s + end + + end + + def basic_conv(str) + sm = SimpleMarkup.new + mock = MockOutput.new + sm.convert(str, mock) + sm.content + end + + def line_types(str, expected) + p = SimpleMarkup.new + mock = MockOutput.new + p.convert(str, mock) + assert_equal(expected, p.get_line_types.map{|type| type.to_s[0,1]}.join('')) + end + + def line_groups(str, expected) + p = SimpleMarkup.new + mock = MockOutput.new + + block = p.convert(str, mock) + + if block != expected + rows = (0...([expected.size, block.size].max)).collect{|i| + [expected[i]||"nil", block[i]||"nil"] + } + printf "\n\n%35s %35s\n", "Expected", "Got" + rows.each {|e,g| printf "%35s %35s\n", e.dump, g.dump } + end + + assert_equal(expected, block) + end + + def test_tabs + str = "hello\n dave" + assert_equal(str, basic_conv(str)) + str = "hello\n\tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = "hello\n \tdave" + assert_equal("hello\n dave", basic_conv(str)) + str = ".\t\t." + assert_equal(". .", basic_conv(str)) + end + + def test_whitespace + assert_equal("hello", basic_conv("hello")) + assert_equal("hello", basic_conv(" hello ")) + assert_equal("hello", basic_conv(" \t \t hello\t\t")) + + assert_equal("1\n 2\n 3", basic_conv("1\n 2\n 3")) + assert_equal("1\n 2\n 3", basic_conv(" 1\n 2\n 3")) + + assert_equal("1\n 2\n 3\n1\n 2", basic_conv("1\n 2\n 3\n1\n 2")) + assert_equal("1\n 2\n 3\n1\n 2", basic_conv(" 1\n 2\n 3\n 1\n 2")) + + assert_equal("1\n 2\n\n 3", basic_conv(" 1\n 2\n\n 3")) + end + + def test_types + str = "now is the time" + line_types(str, 'P') + + str = "now is the time\nfor all good men" + line_types(str, 'PP') + + str = "now is the time\n code\nfor all good men" + line_types(str, 'PVP') + + str = "now is the time\n code\n more code\nfor all good men" + line_types(str, 'PVVP') + + str = "now is\n---\nthe time" + line_types(str, 'PRP') + + str = %{\ + now is + * l1 + * l2 + the time} + line_types(str, 'PLLP') + + str = %{\ + now is + * l1 + l1+ + * l2 + the time} + line_types(str, 'PLPLP') + + str = %{\ + now is + * l1 + * l1.1 + * l2 + the time} + line_types(str, 'PLLLP') + + str = %{\ + now is + * l1 + * l1.1 + text + code + code + + text + * l2 + the time} + line_types(str, 'PLLPVVBPLP') + + str = %{\ + now is + 1. l1 + * l1.1 + 2. l2 + the time} + line_types(str, 'PLLLP') + + str = %{\ + now is + [cat] l1 + * l1.1 + [dog] l2 + the time} + line_types(str, 'PLLLP') + + str = %{\ + now is + [cat] l1 + continuation + [dog] l2 + the time} + line_types(str, 'PLPLP') + end + + def test_groups + str = "now is the time" + line_groups(str, ["L0: Paragraph\nnow is the time"] ) + + str = "now is the time\nfor all good men" + line_groups(str, ["L0: Paragraph\nnow is the time for all good men"] ) + + str = %{\ + now is the time + code _line_ here + for all good men} + + line_groups(str, + [ "L0: Paragraph\nnow is the time", + "L0: Verbatim\n code _line_ here\n", + "L0: Paragraph\nfor all good men" + ] ) + + str = "now is the time\n code\n more code\nfor all good men" + line_groups(str, + [ "L0: Paragraph\nnow is the time", + "L0: Verbatim\n code\n more code\n", + "L0: Paragraph\nfor all good men" + ] ) + + str = %{\ + now is + * l1 + * l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + str = %{\ + now is + * l1 + l1+ + * l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1 l1+", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + str = %{\ + now is + * l1 + * l1.1 + * l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L2: ListStart\n", + "L2: ListItem\nl1.1", + "L2: ListEnd\n", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + * l1 + * l1.1 + text + code + code + + text + * l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L2: ListStart\n", + "L2: ListItem\nl1.1 text", + "L2: Verbatim\n code\n code\n", + "L2: Paragraph\ntext", + "L2: ListEnd\n", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + 1. l1 + * l1.1 + 2. l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L2: ListStart\n", + "L2: ListItem\nl1.1", + "L2: ListEnd\n", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + str = %{\ + now is + [cat] l1 + * l1.1 + [dog] l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L2: ListStart\n", + "L2: ListItem\nl1.1", + "L2: ListEnd\n", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + str = %{\ + now is + [cat] l1 + continuation + [dog] l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1 continuation", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + + end + + def test_verbatim_merge + str = %{\ + now is + code + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + code + code1 + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n code1\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + code + + code1 + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n\n code1\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + code + + code1 + + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n\n code1\n", + "L0: Paragraph\nthe time" + ]) + + + str = %{\ + now is + code + + code1 + + code2 + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n\n code1\n\n code2\n", + "L0: Paragraph\nthe time" + ]) + + + # Folds multiple blank lines + str = %{\ + now is + code + + + code1 + + the time} + + line_groups(str, + [ "L0: Paragraph\nnow is", + "L0: Verbatim\n code\n\n code1\n", + "L0: Paragraph\nthe time" + ]) + + + end + + def test_list_split + str = %{\ + now is + * l1 + 1. n1 + 2. n2 + * l2 + the time} + line_groups(str, + [ "L0: Paragraph\nnow is", + "L1: ListStart\n", + "L1: ListItem\nl1", + "L1: ListEnd\n", + "L1: ListStart\n", + "L1: ListItem\nn1", + "L1: ListItem\nn2", + "L1: ListEnd\n", + "L1: ListStart\n", + "L1: ListItem\nl2", + "L1: ListEnd\n", + "L0: Paragraph\nthe time" + ]) + + end + + + def test_headings + str = "= heading one" + line_groups(str, + [ "L0: Heading\nheading one" + ]) + + str = "=== heading three" + line_groups(str, + [ "L0: Heading\nheading three" + ]) + + str = "text\n === heading three" + line_groups(str, + [ "L0: Paragraph\ntext", + "L0: Verbatim\n === heading three\n" + ]) + + str = "text\n code\n === heading three" + line_groups(str, + [ "L0: Paragraph\ntext", + "L0: Verbatim\n code\n === heading three\n" + ]) + + str = "text\n code\n=== heading three" + line_groups(str, + [ "L0: Paragraph\ntext", + "L0: Verbatim\n code\n", + "L0: Heading\nheading three" + ]) + + end + + +end -- cgit v1.2.3