diff options
Diffstat (limited to 'lib/rexml/parsers/pullparser.rb')
-rw-r--r-- | lib/rexml/parsers/pullparser.rb | 318 |
1 files changed, 161 insertions, 157 deletions
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index 0a328ea8fc..36dc7160c3 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -1,95 +1,99 @@ +require 'forwardable' + require 'rexml/parseexception' require 'rexml/parsers/baseparser' require 'rexml/xmltokens' module REXML - module Parsers - # = Using the Pull Parser - # <em>This API is experimental, and subject to change.</em> - # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) - # while parser.has_next? - # res = parser.next - # puts res[1]['att'] if res.start_tag? and res[0] == 'b' - # end - # See the PullEvent class for information on the content of the results. - # The data is identical to the arguments passed for the various events to - # the StreamListener API. - # - # Notice that: - # parser = PullParser.new( "<a>BAD DOCUMENT" ) - # while parser.has_next? - # res = parser.next - # raise res[1] if res.error? - # end - # - # Nat Price gave me some good ideas for the API. - class PullParser - include XMLTokens - - def initialize stream - @entities = {} + module Parsers + # = Using the Pull Parser + # <em>This API is experimental, and subject to change.</em> + # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) + # while parser.has_next? + # res = parser.next + # puts res[1]['att'] if res.start_tag? and res[0] == 'b' + # end + # See the PullEvent class for information on the content of the results. + # The data is identical to the arguments passed for the various events to + # the StreamListener API. + # + # Notice that: + # parser = PullParser.new( "<a>BAD DOCUMENT" ) + # while parser.has_next? + # res = parser.next + # raise res[1] if res.error? + # end + # + # Nat Price gave me some good ideas for the API. + class PullParser + include XMLTokens + extend Forwardable + + def_delegators( :@parser, :has_next? ) + def_delegators( :@parser, :entity ) + def_delegators( :@parser, :empty? ) + def_delegators( :@parser, :source ) + + def initialize stream + @entities = {} @listeners = nil @parser = BaseParser.new( stream ) - end + @my_stack = [] + end def add_listener( listener ) @listeners = [] unless @listeners @listeners << listener end - def each - while has_next? - yield self.pull - end - end - - def peek depth=0 - PullEvent.new(@parser.peek(depth)) - end - - def has_next? - @parser.has_next? + def each + while has_next? + yield self.pull + end end - def pull - event = @parser.pull - case event[0] - when :entitydecl - @entities[ event[1] ] = - event[2] unless event[2] =~ /PUBLIC|SYSTEM/ - when :text - unnormalized = @parser.unnormalize( event[1], @entities ) - event << unnormalized - end - PullEvent.new( event ) - end - - def unshift token - @parser.unshift token + def peek depth=0 + if @my_stack.length <= depth + (depth - @my_stack.length + 1).times { + e = PullEvent.new(@parser.pull) + @my_stack.push(e) + } + end + @my_stack[depth] end - def entity reference - @parser.entity( reference ) + def pull + return @my_stack.shift if @my_stack.length > 0 + + event = @parser.pull + case event[0] + when :entitydecl + @entities[ event[1] ] = + event[2] unless event[2] =~ /PUBLIC|SYSTEM/ + when :text + unnormalized = @parser.unnormalize( event[1], @entities ) + event << unnormalized + end + PullEvent.new( event ) end - def empty? - @parser.empty? + def unshift token + @my_stack.unshift token + end + end + + # A parsing event. The contents of the event are accessed as an +Array?, + # and the type is given either by the ...? methods, or by accessing the + # +type+ accessor. The contents of this object vary from event to event, + # but are identical to the arguments passed to +StreamListener+s for each + # event. + class PullEvent + # The type of this event. Will be one of :tag_start, :tag_end, :text, + # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl, + # :notationdecl, :entity, :cdata, :xmldecl, or :error. + def initialize(arg) + @contents = arg end - - end - - # A parsing event. The contents of the event are accessed as an +Array?, - # and the type is given either by the ...? methods, or by accessing the - # +type+ accessor. The contents of this object vary from event to event, - # but are identical to the arguments passed to +StreamListener+s for each - # event. - class PullEvent - # The type of this event. Will be one of :tag_start, :tag_end, :text, - # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl, - # :notationdecl, :entity, :cdata, :xmldecl, or :error. - def initialize(arg) - @contents = arg - end def []( start, endd=nil) if start.kind_of? Range @@ -103,90 +107,90 @@ module REXML else raise "Illegal argument #{start.inspect} (#{start.class})" end - end - - def event_type - @contents[0] - end - - # Content: [ String tag_name, Hash attributes ] - def start_element? - @contents[0] == :start_element - end - - # Content: [ String tag_name ] - def end_element? - @contents[0] == :end_element - end - - # Content: [ String raw_text, String unnormalized_text ] - def text? - @contents[0] == :text - end - - # Content: [ String text ] - def instruction? - @contents[0] == :processing_instruction - end - - # Content: [ String text ] - def comment? - @contents[0] == :comment - end - - # Content: [ String name, String pub_sys, String long_name, String uri ] - def doctype? - @contents[0] == :start_doctype - end - - # Content: [ String text ] - def attlistdecl? - @contents[0] == :attlistdecl - end - - # Content: [ String text ] - def elementdecl? - @contents[0] == :elementdecl - end - - # Due to the wonders of DTDs, an entity declaration can be just about - # anything. There's no way to normalize it; you'll have to interpret the - # content yourself. However, the following is true: - # - # * If the entity declaration is an internal entity: - # [ String name, String value ] - # Content: [ String text ] - def entitydecl? - @contents[0] == :entitydecl - end - - # Content: [ String text ] - def notationdecl? - @contents[0] == :notationdecl - end - - # Content: [ String text ] - def entity? - @contents[0] == :entity - end - - # Content: [ String text ] - def cdata? - @contents[0] == :cdata - end - - # Content: [ String version, String encoding, String standalone ] - def xmldecl? - @contents[0] == :xmldecl - end - - def error? - @contents[0] == :error - end - - def inspect + end + + def event_type + @contents[0] + end + + # Content: [ String tag_name, Hash attributes ] + def start_element? + @contents[0] == :start_element + end + + # Content: [ String tag_name ] + def end_element? + @contents[0] == :end_element + end + + # Content: [ String raw_text, String unnormalized_text ] + def text? + @contents[0] == :text + end + + # Content: [ String text ] + def instruction? + @contents[0] == :processing_instruction + end + + # Content: [ String text ] + def comment? + @contents[0] == :comment + end + + # Content: [ String name, String pub_sys, String long_name, String uri ] + def doctype? + @contents[0] == :start_doctype + end + + # Content: [ String text ] + def attlistdecl? + @contents[0] == :attlistdecl + end + + # Content: [ String text ] + def elementdecl? + @contents[0] == :elementdecl + end + + # Due to the wonders of DTDs, an entity declaration can be just about + # anything. There's no way to normalize it; you'll have to interpret the + # content yourself. However, the following is true: + # + # * If the entity declaration is an internal entity: + # [ String name, String value ] + # Content: [ String text ] + def entitydecl? + @contents[0] == :entitydecl + end + + # Content: [ String text ] + def notationdecl? + @contents[0] == :notationdecl + end + + # Content: [ String text ] + def entity? + @contents[0] == :entity + end + + # Content: [ String text ] + def cdata? + @contents[0] == :cdata + end + + # Content: [ String version, String encoding, String standalone ] + def xmldecl? + @contents[0] == :xmldecl + end + + def error? + @contents[0] == :error + end + + def inspect @contents[0].to_s + ": " + @contents[1..-1].inspect - end - end - end + end + end + end end |