summaryrefslogtreecommitdiff
path: root/lib/rexml/formatters/pretty.rb
blob: 296e6076871752eca8c4874568785ae05b46e19b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
require 'rexml/formatters/default'

module REXML
  module Formatters
    # Pretty-prints an XML document.  This destroys whitespace in text nodes
    # and will insert carriage returns and indentations.
    #
    # TODO: Add an option to print attributes on new lines
    class Pretty < Default

      # If compact is set to true, then the formatter will attempt to use as
      # little space as possible
      attr_accessor :compact
      # The width of a page.  Used for formatting text
      attr_accessor :width

      # Create a new pretty printer.
      #
      # output::
      #   An object implementing '<<(String)', to which the output will be written.
      # indentation::
      #   An integer greater than 0.  The indentation of each level will be
      #   this number of spaces.  If this is < 1, the behavior of this object
      #   is undefined.  Defaults to 2.
      # ie_hack::
      #   If true, the printer will insert whitespace before closing empty
      #   tags, thereby allowing Internet Explorer's feeble XML parser to
      #   function. Defaults to false.
      def initialize( indentation=2, ie_hack=false )
        @indentation = indentation
        @level = 0
        @ie_hack = ie_hack
        @width = 80
        @compact = false
      end

      protected
      def write_element(node, output)
        output << ' '*@level
        output << "<#{node.expanded_name}"

        node.attributes.each_attribute do |attr|
          output << " "
          attr.write( output )
        end unless node.attributes.empty?

        if node.children.empty?
          if @ie_hack
            output << " "
          end
          output << "/" 
        else
          output << ">"
          # If compact and all children are text, and if the formatted output
          # is less than the specified width, then try to print everything on
          # one line
          skip = false
          if compact
            if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
              string = ""
              old_level = @level
              @level = 0
              node.children.each { |child| write( child, string ) }
              @level = old_level
              if string.length < @width
                output << string
                skip = true
              end
            end
          end
          unless skip
            output << "\n"
            @level += @indentation
            node.children.each { |child|
              next if child.kind_of?(Text) and child.to_s.strip.length == 0
              write( child, output )
              output << "\n"
            }
            @level -= @indentation
            output << ' '*@level
          end
          output << "</#{node.expanded_name}"
        end
        output << ">"
      end

      def write_text( node, output )
        s = node.to_s()
        s.gsub!(/\s/,' ')
        s.squeeze!(" ")
        s = wrap(s, 80-@level)
        s = indent_text(s, @level, " ", true)
        output << (' '*@level + s)
      end

      def write_comment( node, output)
        output << ' ' * @level
        super
      end

      def write_cdata( node, output)
        output << ' ' * @level
        super
      end

      def write_document( node, output )
        # Ok, this is a bit odd.  All XML documents have an XML declaration,
        # but it may not write itself if the user didn't specifically add it,
        # either through the API or in the input document.  If it doesn't write
        # itself, then we don't need a carriage return... which makes this
        # logic more complex.
        node.children.each { |child|
          next if child == node.children[-1] and child.instance_of?(Text)
          unless child == node.children[0] or child.instance_of?(Text) or
            (child == node.children[1] and !node.children[0].writethis)
            output << "\n"
          end
          write( child, output )
        }
      end

      private
      def indent_text(string, level=1, style="\t", indentfirstline=true)
        return string if level < 0
        string.gsub(/\n/, "\n#{style*level}")
      end

      def wrap(string, width)
        # Recursively wrap string at width.
        return string if string.length <= width
        place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
        return string[0,place] + "\n" + wrap(string[place+1..-1], width)
      end

    end
  end
end