summaryrefslogtreecommitdiff
path: root/lib/rexml/doctype.rb
blob: 4a1ffb4336412f19be2625d49c2e6ded09327c33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
require "rexml/parent"
require "rexml/parseexception"
require "rexml/namespace"
require 'rexml/entity'
require 'rexml/attlistdecl'
require 'rexml/xmltokens'

module REXML
  # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
  # ... >.  DOCTYPES can be used to declare the DTD of a document, as well as
  # being used to declare entities used in the document.
  class DocType < Parent
    include XMLTokens
    START = "<!DOCTYPE"
    STOP = ">"
    SYSTEM = "SYSTEM"
    PUBLIC = "PUBLIC"
    DEFAULT_ENTITIES = { 
      'gt'=>EntityConst::GT, 
      'lt'=>EntityConst::LT, 
      'quot'=>EntityConst::QUOT, 
      "apos"=>EntityConst::APOS 
    }

    # name is the name of the doctype
    # external_id is the referenced DTD, if given
    attr_reader :name, :external_id, :entities, :namespaces

    # Constructor
    #
    #   dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
    #   # <!DOCTYPE foo '-//I/Hate/External/IDs'>
    #   dt = DocType.new( doctype_to_clone )
    #   # Incomplete.  Shallow clone of doctype
    #
    # +Note+ that the constructor: 
    #
    #  Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
    #
    # is _deprecated_.  Do not use it.  It will probably disappear.
    def initialize( first, parent=nil )
      @entities = DEFAULT_ENTITIES
      @long_name = @uri = nil
      if first.kind_of? String
        super()
        @name = first
        @external_id = parent
      elsif first.kind_of? DocType
        super( parent )
        @name = first.name
        @external_id = first.external_id
      elsif first.kind_of? Array
        super( parent )
        @name = first[0]
        @external_id = first[1]
        @long_name = first[2]
        @uri = first[3]
      elsif first.kind_of? Source
        super( parent )
        parser = Parsers::BaseParser.new( first )
        event = parser.pull
        if event[0] == :start_doctype
          @name, @external_id, @long_name, @uri, = event[1..-1]
        end
      else
        super()
      end
    end

    def node_type
      :doctype
    end

    def attributes_of element
      rv = []
      each do |child|
        child.each do |key,val|
          rv << Attribute.new(key,val)
        end if child.kind_of? AttlistDecl and child.element_name == element
      end
      rv
    end

    def attribute_of element, attribute
      att_decl = find do |child|
        child.kind_of? AttlistDecl and
        child.element_name == element and
        child.include? attribute
      end
      return nil unless att_decl
      att_decl[attribute]
    end

    def clone
      DocType.new self
    end

    # output::
    #   Where to write the string
    # indent::
    #   An integer.  If -1, no indenting will be used; otherwise, the
    #   indentation will be this number of spaces, and children will be
    #   indented an additional amount.
    # transitive::
    #   If transitive is true and indent is >= 0, then the output will be
    #   pretty-printed in such a way that the added whitespace does not affect
    #   the absolute *value* of the document -- that is, it leaves the value
    #   and number of Text nodes in the document unchanged.
    # ie_hack::
    #   Internet Explorer is the worst piece of crap to have ever been
    #   written, with the possible exception of Windows itself.  Since IE is
    #   unable to parse proper XML, we have to provide a hack to generate XML
    #   that IE's limited abilities can handle.  This hack inserts a space 
    #   before the /> on empty tags.
    #
    def write( output, indent=0, transitive=false, ie_hack=false )
      indent( output, indent )
      output << START
      output << ' '
      output << @name
      output << " #@external_id" if @external_id
      output << " #@long_name" if @long_name
      output << " #@uri" if @uri
      unless @children.empty?
        next_indent = indent + 1
        output << ' ['
        child = nil    # speed
        @children.each { |child|
          output << "\n"
          child.write( output, next_indent )
        }
        #output << '   '*next_indent
        output << "\n]"
      end
      output << STOP
    end

    def context
      @parent.context
    end

    def entity( name )
      @entities[name].unnormalized if @entities[name]
    end

    def add child
      super(child)
      @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
      @entities[ child.name ] = child if child.kind_of? Entity
    end
    
    # This method retrieves the public identifier identifying the document's 
    # DTD.
    #
    # Method contributed by Henrik Martensson
    def public
      case @external_id
      when "SYSTEM"
        nil
      when "PUBLIC"
        strip_quotes(@long_name)
      end
    end
    
    # This method retrieves the system identifier identifying the document's DTD
    #
    # Method contributed by Henrik Martensson
    def system
      case @external_id
      when "SYSTEM"
        strip_quotes(@long_name)
      when "PUBLIC"
        @uri.kind_of?(String) ? strip_quotes(@uri) : nil
      end
    end
    
    # This method returns a list of notations that have been declared in the
    # _internal_ DTD subset. Notations in the external DTD subset are not 
    # listed.
    #
    # Method contributed by Henrik Martensson
    def notations
      children().select {|node| node.kind_of?(REXML::NotationDecl)}
    end
    
    # Retrieves a named notation. Only notations declared in the internal
    # DTD subset can be retrieved.
    #
    # Method contributed by Henrik Martensson
    def notation(name)
      notations.find { |notation_decl|
        notation_decl.name == name
      }
    end
    
    private
    
    # Method contributed by Henrik Martensson
    def strip_quotes(quoted_string)
      quoted_string =~ /^[\'\"].*[\´\"]$/ ?
        quoted_string[1, quoted_string.length-2] :
        quoted_string
    end
  end

  # We don't really handle any of these since we're not a validating
  # parser, so we can be pretty dumb about them.  All we need to be able
  # to do is spew them back out on a write()

  # This is an abstract class.  You never use this directly; it serves as a
  # parent class for the specific declarations.
  class Declaration < Child
    def initialize src
      super()
      @string = src
    end

    def to_s
      @string+'>'
    end

    def write( output, indent )
      output << ('   '*indent) if indent > 0
      output << to_s
    end
  end
  
  public
  class ElementDecl < Declaration
    def initialize( src )
      super
    end
  end

  class ExternalEntity < Child
    def initialize( src )
      super()
      @entity = src
    end
    def to_s
      @entity
    end
    def write( output, indent )
      output << @entity
    end
  end

  class NotationDecl < Child
    attr_accessor :public, :system
    def initialize name, middle, pub, sys
      super(nil)
      @name = name
      @middle = middle
      @public = pub
      @system = sys
    end

    def to_s
      "<!NOTATION #@name #@middle#{
        @public ? ' ' + public.inspect : '' 
      }#{
        @system ? ' ' +@system.inspect : ''
      }>"
    end

    def write( output, indent=-1 )
      output << ('   '*indent) if indent > 0
      output << to_s
    end
    
    # This method retrieves the name of the notation.
    #
    # Method contributed by Henrik Martensson
    def name
      @name
    end
  end
end