From d464704f111d211c1f1ff9ef23ef1d755054be00 Mon Sep 17 00:00:00 2001 From: shyouhei Date: Wed, 15 Aug 2007 19:08:43 +0000 Subject: add tag v1_8_5_54 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_8_5_54@12952 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ruby_1_8_5/lib/rexml/parsers/sax2parser.rb | 232 +++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 ruby_1_8_5/lib/rexml/parsers/sax2parser.rb (limited to 'ruby_1_8_5/lib/rexml/parsers/sax2parser.rb') diff --git a/ruby_1_8_5/lib/rexml/parsers/sax2parser.rb b/ruby_1_8_5/lib/rexml/parsers/sax2parser.rb new file mode 100644 index 0000000000..61a216cec1 --- /dev/null +++ b/ruby_1_8_5/lib/rexml/parsers/sax2parser.rb @@ -0,0 +1,232 @@ +require 'rexml/parsers/baseparser' +require 'rexml/parseexception' +require 'rexml/namespace' +require 'rexml/text' + +module REXML + module Parsers + # SAX2Parser + class SAX2Parser + def initialize source + @parser = BaseParser.new(source) + @listeners = [] + @procs = [] + @namespace_stack = [] + @has_listeners = false + @tag_stack = [] + @entities = {} + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + # Listen arguments: + # + # Symbol, Array, Block + # Listen to Symbol events on Array elements + # Symbol, Block + # Listen to Symbol events + # Array, Listener + # Listen to all events on Array elements + # Array, Block + # Listen to :start_element events on Array elements + # Listener + # Listen to All events + # + # Symbol can be one of: :start_element, :end_element, + # :start_prefix_mapping, :end_prefix_mapping, :characters, + # :processing_instruction, :doctype, :attlistdecl, :elementdecl, + # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment + # + # There is an additional symbol that can be listened for: :progress. + # This will be called for every event generated, passing in the current + # stream position. + # + # Array contains regular expressions or strings which will be matched + # against fully qualified element names. + # + # Listener must implement the methods in SAX2Listener + # + # Block will be passed the same arguments as a SAX2Listener method would + # be, where the method name is the same as the matched Symbol. + # See the SAX2Listener for more information. + def listen( *args, &blok ) + if args[0].kind_of? Symbol + if args.size == 2 + args[1].each { |match| @procs << [args[0], match, blok] } + else + add( [args[0], nil, blok] ) + end + elsif args[0].kind_of? Array + if args.size == 2 + args[0].each { |match| add( [nil, match, args[1]] ) } + else + args[0].each { |match| add( [ :start_element, match, blok ] ) } + end + else + add([nil, nil, args[0]]) + end + end + + def deafen( listener=nil, &blok ) + if listener + @listeners.delete_if {|item| item[-1] == listener } + @has_listeners = false if @listeners.size == 0 + else + @procs.delete_if {|item| item[-1] == blok } + end + end + + def parse + @procs.each { |sym,match,block| block.call if sym == :start_document } + @listeners.each { |sym,match,block| + block.start_document if sym == :start_document or sym.nil? + } + root = context = [] + while true + event = @parser.pull + case event[0] + when :end_document + handle( :end_document ) + break + when :end_doctype + context = context[1] + when :start_element + @tag_stack.push(event[1]) + # find the observers for namespaces + procs = get_procs( :start_prefix_mapping, event[1] ) + listeners = get_listeners( :start_prefix_mapping, event[1] ) + if procs or listeners + # break out the namespace declarations + # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} + nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } + nsdecl.collect! { |n, value| [ n[6..-1], value ] } + @namespace_stack.push({}) + nsdecl.each do |n,v| + @namespace_stack[-1][n] = v + # notify observers of namespaces + procs.each { |ob| ob.call( n, v ) } if procs + listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners + end + end + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :start_element, event[1] ) + listeners = get_listeners( :start_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs + listeners.each { |ob| + ob.start_element( uri, local, event[1], event[2] ) + } if listeners + when :end_element + @tag_stack.pop + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :end_element, event[1] ) + listeners = get_listeners( :end_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1] ) } if procs + listeners.each { |ob| + ob.end_element( uri, local, event[1] ) + } if listeners + + namespace_mapping = @namespace_stack.pop + # find the observers for namespaces + procs = get_procs( :end_prefix_mapping, event[1] ) + listeners = get_listeners( :end_prefix_mapping, event[1] ) + if procs or listeners + namespace_mapping.each do |prefix, uri| + # notify observers of namespaces + procs.each { |ob| ob.call( prefix ) } if procs + listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners + end + end + when :text + #normalized = @parser.normalize( event[1] ) + #handle( :characters, normalized ) + copy = event[1].clone + @entities.each { |key, value| copy = copy.gsub("&#{key};", value) } + copy.gsub!( Text::NUMERICENTITY ) {|m| + m=$1 + m = "0#{m}" if m[0] == ?x + [Integer(m)].pack('U*') + } + handle( :characters, copy ) + when :entitydecl + @entities[ event[1] ] = event[2] if event.size == 3 + handle( *event ) + when :processing_instruction, :comment, :doctype, :attlistdecl, + :elementdecl, :cdata, :notationdecl, :xmldecl + handle( *event ) + end + handle( :progress, @parser.position ) + end + end + + private + def handle( symbol, *arguments ) + tag = @tag_stack[-1] + procs = get_procs( symbol, tag ) + listeners = get_listeners( symbol, tag ) + # notify observers + procs.each { |ob| ob.call( *arguments ) } if procs + listeners.each { |l| + l.send( symbol.to_s, *arguments ) + } if listeners + end + + # The following methods are duplicates, but it is faster than using + # a helper + def get_procs( symbol, name ) + return nil if @procs.size == 0 + @procs.find_all do |sym, match, block| + #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + def get_listeners( symbol, name ) + return nil if @listeners.size == 0 + @listeners.find_all do |sym, match, block| + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + + def add( pair ) + if pair[-1].respond_to? :call + @procs << pair unless @procs.include? pair + else + @listeners << pair unless @listeners.include? pair + @has_listeners = true + end + end + + def get_namespace( prefix ) + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || + (@namespace_stack.find { |ns| not ns[nil].nil? }) + uris[-1][prefix] unless uris.nil? or 0 == uris.size + end + end + end +end -- cgit v1.2.3