summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers/sax2parser.rb
blob: d5ee1bcfcd098dd4f2d6db8b9064b26fca0e409c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'

module REXML
	module Parsers
		class SAX2Parser
			def initialize source
				@parser = BaseParser.new(source)
				@listeners = []
				@procs = []
				@namespace_stack = []
				@has_listeners = false
				@tag_stack = []
        @entities = {}
			end
			
      def add_listener( listener )
        @parser.add_listener( listener )
      end

			# Listen arguments:
			#
			# Symbol, Array, Block
			# 	Listen to Symbol events on Array elements
			# Symbol, Block
			#   Listen to Symbol events
			# Array, Listener
			# 	Listen to all events on Array elements
			# Array, Block
			# 	Listen to :start_element events on Array elements
			# Listener
			# 	Listen to All events
			#
			# Symbol can be one of: :start_element, :end_element,
			# :start_prefix_mapping, :end_prefix_mapping, :characters,
			# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
			# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
			#
			# Array contains regular expressions or strings which will be matched
			# against fully qualified element names.
			#
			# Listener must implement the methods in SAX2Listener
			#
			# Block will be passed the same arguments as a SAX2Listener method would
			# be, where the method name is the same as the matched Symbol.
			# See the SAX2Listener for more information.
			def listen( *args, &blok )
				if args[0].kind_of? Symbol
					if args.size == 2
						args[1].each { |match| @procs << [args[0], match, blok] }
					else
						add( [args[0], nil, blok] )
					end
				elsif args[0].kind_of? Array
					if args.size == 2
						args[0].each { |match| add( [nil, match, args[1]] ) }
					else
						args[0].each { |match| add( [ :start_element, match, blok ] ) }
					end
				else
					add([nil, nil, args[0]])
				end
			end
			
			def deafen( listener=nil, &blok )
				if listener
					@listeners.delete_if {|item| item[-1] == listener }
					@has_listeners = false if @listeners.size == 0
				else
					@procs.delete_if {|item| item[-1] == blok }
				end
			end
			
			def parse
				@procs.each { |sym,match,block| block.call if sym == :start_document }
				@listeners.each { |sym,match,block| 
					block.start_document if sym == :start_document or sym.nil?
				}
				root = context = []
				while true
					event = @parser.pull
					case event[0]
					when :end_document
						handle( :end_document )
						break
					when :end_doctype
						context = context[1]
					when :start_element
						@tag_stack.push(event[1])
						# find the observers for namespaces
						procs = get_procs( :start_prefix_mapping, event[1] )
						listeners = get_listeners( :start_prefix_mapping, event[1] )
						if procs or listeners
							# break out the namespace declarations
							# The attributes live in event[2]
							event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
							nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
							nsdecl.collect! { |n, value| [ n[6..-1], value ] }
							@namespace_stack.push({})
							nsdecl.each do |n,v|
								@namespace_stack[-1][n] = v
								# notify observers of namespaces
								procs.each { |ob| ob.call( n, v ) } if procs
								listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
							end
						end
						event[1] =~ Namespace::NAMESPLIT
						prefix = $1
						local = $2
						uri = get_namespace(prefix)
						# find the observers for start_element
						procs = get_procs( :start_element, event[1] )
						listeners = get_listeners( :start_element, event[1] )
						# notify observers
						procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
						listeners.each { |ob| 
							ob.start_element( uri, local, event[1], event[2] ) 
						} if listeners
					when :end_element
						@tag_stack.pop
						event[1] =~ Namespace::NAMESPLIT
						prefix = $1
						local = $2
						uri = get_namespace(prefix)
						# find the observers for start_element
						procs = get_procs( :end_element, event[1] )
						listeners = get_listeners( :end_element, event[1] )
						# notify observers
						procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
						listeners.each { |ob| 
							ob.end_element( uri, local, event[1] ) 
						} if listeners

						namespace_mapping = @namespace_stack.pop
						# find the observers for namespaces
						procs = get_procs( :end_prefix_mapping, event[1] )
						listeners = get_listeners( :end_prefix_mapping, event[1] )
						if procs or listeners
							namespace_mapping.each do |prefix, uri|
								# notify observers of namespaces
								procs.each { |ob| ob.call( prefix ) } if procs
								listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
							end
						end
					when :text
            #normalized = @parser.normalize( event[1] )
            #handle( :characters, normalized )
            copy = event[1].clone
            @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
            copy.gsub!( Text::NUMERICENTITY ) {|m|
              m=$1
              m = "0#{m}" if m[0] == ?x
              [Integer(m)].pack('U*')
            }
            handle( :characters, copy )
          when :entitydecl
            @entities[ event[1] ] = event[2] if event.size == 3
						handle( *event )
					when :processing_instruction, :comment, :doctype, :attlistdecl, 
						:elementdecl, :cdata, :notationdecl, :xmldecl
						handle( *event )
					end
				end
			end

			private
			def handle( symbol, *arguments )
				tag = @tag_stack[-1]
				procs = get_procs( symbol, tag )
				listeners = get_listeners( symbol, tag )
				# notify observers
				procs.each { |ob| ob.call( *arguments ) } if procs
				listeners.each { |l| 
					l.send( symbol.to_s, *arguments ) 
				} if listeners
			end

			# The following methods are duplicates, but it is faster than using
			# a helper
			def get_procs( symbol, name )
				return nil if @procs.size == 0
				@procs.find_all do |sym, match, block|
          #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
					(
						(sym.nil? or symbol == sym) and 
						((name.nil? and match.nil?) or match.nil? or (
							(name == match) or
							(match.kind_of? Regexp and name =~ match)
							)
						)
					)
				end.collect{|x| x[-1]}
			end
			def get_listeners( symbol, name )
				return nil if @listeners.size == 0
				@listeners.find_all do |sym, match, block|
					(
						(sym.nil? or symbol == sym) and 
						((name.nil? and match.nil?) or match.nil? or (
							(name == match) or
							(match.kind_of? Regexp and name =~ match)
							)
						)
					)
				end.collect{|x| x[-1]}
			end

			def add( pair )
				if pair[-1].respond_to? :call
					@procs << pair unless @procs.include? pair
				else
					@listeners << pair unless @listeners.include? pair
					@has_listeners = true
				end
			end

			def get_namespace( prefix ) 
        uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
					(@namespace_stack.find { |ns| not ns[nil].nil? })
				uris[-1][prefix] unless uris.nil? or 0 == uris.size
			end
		end
	end
end