summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers/pullparser.rb
blob: fe4d41c959ae9cbc4131ca98bda1f713210cd765 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
require 'rexml/parseexception'
require 'rexml/parsers/baseparser'
require 'rexml/xmltokens'

module REXML
	module Parsers
		# = Using the Pull Parser
		# <em>This API is experimental, and subject to change.</em>
		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
		#  while parser.has_next?
		#    res = parser.next
		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
		#  end
		# See the PullEvent class for information on the content of the results.
		# The data is identical to the arguments passed for the various events to
		# the StreamListener API.
		#
		# Notice that:
		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
		#  while parser.has_next?
		#    res = parser.next
		#    raise res[1] if res.error?
		#  end
		#
		# Nat Price gave me some good ideas for the API.
		class PullParser < BaseParser
			include XMLTokens

			def initialize stream
				super
				@entities = {}
        @listeners = nil
			end

      def add_listener( listener )
        @listeners = [] unless @listeners
        @listeners << listener
      end

			def each
				while has_next?
					yield self.pull
				end
			end

			def peek depth=0
				PullEvent.new(super)
			end

			def pull
				event = super
				case event[0]
				when :entitydecl
					@entities[ event[1] ] = 
						event[2] unless event[2] =~ /PUBLIC|SYSTEM/
				when :text
					unnormalized = unnormalize( event[1], @entities )
					event << unnormalized
				end
				PullEvent.new( event )
			end
		end

		# A parsing event.  The contents of the event are accessed as an +Array?,
		# and the type is given either by the ...? methods, or by accessing the
		# +type+ accessor.  The contents of this object vary from event to event,
		# but are identical to the arguments passed to +StreamListener+s for each
		# event.
		class PullEvent
			# The type of this event.  Will be one of :tag_start, :tag_end, :text,
			# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
			# :notationdecl, :entity, :cdata, :xmldecl, or :error.
			def initialize(arg)
				@contents = arg
			end
			def []( index )
				@contents[index+1]
			end
			def event_type
				@contents[0]
			end
			# Content: [ String tag_name, Hash attributes ]
			def start_element?
				@contents[0] == :start_element
			end
			# Content: [ String tag_name ]
			def end_element?
				@contents[0] == :end_element
			end
			# Content: [ String raw_text, String unnormalized_text ]
			def text?
				@contents[0] == :text
			end
			# Content: [ String text ]
			def instruction?
				@contents[0] == :processing_instruction
			end
			# Content: [ String text ]
			def comment?
				@contents[0] == :comment
			end
			# Content: [ String name, String pub_sys, String long_name, String uri ]
			def doctype?
				@contents[0] == :start_doctype
			end
			# Content: [ String text ]
			def attlistdecl?
				@contents[0] == :attlistdecl
			end
			# Content: [ String text ]
			def elementdecl?
				@contents[0] == :elementdecl
			end
			# Due to the wonders of DTDs, an entity declaration can be just about
			# anything.  There's no way to normalize it; you'll have to interpret the
			# content yourself.  However, the following is true:
			#
			# * If the entity declaration is an internal entity:
			#   [ String name, String value ]
			# Content: [ String text ]
			def entitydecl?
				@contents[0] == :entitydecl
			end
			# Content: [ String text ]
			def notationdecl?
				@contents[0] == :notationdecl
			end
			# Content: [ String text ]
			def entity?
				@contents[0] == :entity
			end
			# Content: [ String text ]
			def cdata?
				@contents[0] == :cdata
			end
			# Content: [ String version, String encoding, String standalone ]
			def xmldecl?
				@contents[0] == :xmldecl
			end
			def error?
				@contents[0] == :error
			end

			def inspect
				@contents[0].to_s + ": " + @contents[1..-1].inspect
			end
		end
	end
end