5 files changed, 597 insertions, 573 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e95cba8904..bce4ba4c20 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -2,103 +2,103 @@ require 'rexml/parseexception'
 require 'rexml/source'
 
 module REXML
-	module Parsers
-		# = Using the Pull Parser
-		# <em>This API is experimental, and subject to change.</em>
-		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
-		#  end
-		# See the PullEvent class for information on the content of the results.
-		# The data is identical to the arguments passed for the various events to
-		# the StreamListener API.
-		#
-		# Notice that:
-		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    raise res[1] if res.error?
-		#  end
-		#
-		# Nat Price gave me some good ideas for the API.
-		class BaseParser
-			NCNAME_STR= '[\w:][\-\w\d.]*'
-			NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class BaseParser
+      NCNAME_STR= '[\w:][\-\w\d.]*'
+      NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
 
-			NAMECHAR = '[\-\w\d\.:]'
-			NAME = "([\\w:]#{NAMECHAR}*)"
-			NMTOKEN = "(?:#{NAMECHAR})+"
-			NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
-			REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
-			REFERENCE_RE = /#{REFERENCE}/
+      NAMECHAR = '[\-\w\d\.:]'
+      NAME = "([\\w:]#{NAMECHAR}*)"
+      NMTOKEN = "(?:#{NAMECHAR})+"
+      NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+      REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+      REFERENCE_RE = /#{REFERENCE}/
 
-			DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
-			DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
-			ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
-			COMMENT_START = /\A<!--/u
-			COMMENT_PATTERN = /<!--(.*?)-->/um
-			CDATA_START = /\A<!\[CDATA\[/u
-			CDATA_END = /^\s*\]\s*>/um
-			CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
-			XMLDECL_START = /\A<\?xml\s/u;
-			XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
-			INSTRUCTION_START = /\A<\?/u
-			INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
-			TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
-			CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+      DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+      DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+      COMMENT_START = /\A<!--/u
+      COMMENT_PATTERN = /<!--(.*?)-->/um
+      CDATA_START = /\A<!\[CDATA\[/u
+      CDATA_END = /^\s*\]\s*>/um
+      CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+      XMLDECL_START = /\A<\?xml\s/u;
+      XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
+      INSTRUCTION_START = /\A<\?/u
+      INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+      CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
 
-			VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
-			ENCODING = /\bencoding=["'](.*?)['"]/um
-			STANDALONE = /\bstandalone=["'](.*?)['"]/um
+      VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+      ENCODING = /\bencoding=["'](.*?)['"]/um
+      STANDALONE = /\bstandalone=["'](.*?)['"]/um
 
-			ENTITY_START = /^\s*<!ENTITY/
-			IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
-			ELEMENTDECL_START = /^\s*<!ELEMENT/um
-			ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
-			SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
-			ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
-			NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
-			ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
-			ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
-			ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
-			DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
-			ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
-			ATTDEF_RE = /#{ATTDEF}/
-			ATTLISTDECL_START = /^\s*<!ATTLIST/um
-			ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
-			NOTATIONDECL_START = /^\s*<!NOTATION/um
-			PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
-			SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+      ENTITY_START = /^\s*<!ENTITY/
+      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+      ELEMENTDECL_START = /^\s*<!ELEMENT/um
+      ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+      SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+      ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+      NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+      ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+      ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+      ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+      DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+      ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+      ATTDEF_RE = /#{ATTDEF}/
+      ATTLISTDECL_START = /^\s*<!ATTLIST/um
+      ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+      NOTATIONDECL_START = /^\s*<!NOTATION/um
+      PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+      SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
 
-			TEXT_PATTERN = /\A([^<]*)/um
+      TEXT_PATTERN = /\A([^<]*)/um
 
-			# Entity constants
-			PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
-			SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
-			PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
-			EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
-			NDATADECL = "\\s+NDATA\\s+#{NAME}"
-			PEREFERENCE = "%#{NAME};"
-			ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
-			PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
-			ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
-			PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
-			GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
-			ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+      # Entity constants
+      PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+      SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+      PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+      EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+      NDATADECL = "\\s+NDATA\\s+#{NAME}"
+      PEREFERENCE = "%#{NAME};"
+      ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+      PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+      ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+      PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+      GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+      ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
 
-			EREFERENCE = /&(?!#{NAME};)/
+      EREFERENCE = /&(?!#{NAME};)/
 
-			DEFAULT_ENTITIES = { 
-				'gt' => [/&gt;/, '&gt;', '>', />/], 
-				'lt' => [/&lt;/, '&lt;', '<', /</], 
-				'quot' => [/&quot;/, '&quot;', '"', /"/], 
-				"apos" => [/&apos;/, "&apos;", "'", /'/] 
-			}
+      DEFAULT_ENTITIES = { 
+        'gt' => [/&gt;/, '&gt;', '>', />/], 
+        'lt' => [/&lt;/, '&lt;', '<', /</], 
+        'quot' => [/&quot;/, '&quot;', '"', /"/], 
+        "apos" => [/&apos;/, "&apos;", "'", /'/] 
+      }
 
-			def initialize( source )
-				self.stream = source
-			end
+      def initialize( source )
+        self.stream = source
+      end
 
       def add_listener( listener )
         if !defined?(@listeners) or !@listeners
@@ -119,315 +119,320 @@ module REXML
 
       attr_reader :source
 
-			def stream=( source )
-				if source.kind_of? String
-					@source = Source.new(source)
-				elsif source.kind_of? IO
-					@source = IOSource.new(source)
-				elsif source.kind_of? Source
-					@source = source
-				elsif defined? StringIO and source.kind_of? StringIO
-					@source = IOSource.new(source)
-				else
-					raise "#{source.class} is not a valid input stream.  It must be \n"+
-					"either a String, IO, StringIO or Source."
-				end
-				@closed = nil
-				@document_status = nil
-				@tags = []
-				@stack = []
-				@entities = []
-			end
+      def stream=( source )
+        @source = SourceFactory.create_from( source )
+        @closed = nil
+        @document_status = nil
+        @tags = []
+        @stack = []
+        @entities = []
+      end
+
+      def position
+        if @source.respond_to? :position
+          @source.position
+        else
+          # FIXME
+          0
+        end
+      end
 
-			# Returns true if there are no more events
-			def empty?
-        #puts "@source.empty? = #{@source.empty?}"
-        #puts "@stack.empty? = #{@stack.empty?}"
+      # Returns true if there are no more events
+      def empty?
+        #STDERR.puts "@source.empty? = #{@source.empty?}"
+        #STDERR.puts "@stack.empty? = #{@stack.empty?}"
         return (@source.empty? and @stack.empty?)
-			end
+      end
 
-			# Returns true if there are more events.  Synonymous with !empty?
-			def has_next?
+      # Returns true if there are more events.  Synonymous with !empty?
+      def has_next?
         return !(@source.empty? and @stack.empty?)
-			end
+      end
 
-			# Push an event back on the head of the stream.  This method
-			# has (theoretically) infinite depth.
-			def unshift token
-				@stack.unshift(token)
-			end
+      # Push an event back on the head of the stream.  This method
+      # has (theoretically) infinite depth.
+      def unshift token
+        @stack.unshift(token)
+      end
 
-			# Peek at the +depth+ event in the stack.  The first element on the stack
-			# is at depth 0.  If +depth+ is -1, will parse to the end of the input
-			# stream and return the last event, which is always :end_document.
-			# Be aware that this causes the stream to be parsed up to the +depth+ 
-			# event, so you can effectively pre-parse the entire document (pull the 
-			# entire thing into memory) using this method.  
-			def peek depth=0
-				raise %Q[Illegal argument "#{depth}"] if depth < -1
-				temp = []
-				if depth == -1
-					temp.push(pull()) until empty?
-				else
-					while @stack.size+temp.size < depth+1
-						temp.push(pull())
-					end
-				end
-				@stack += temp if temp.size > 0
-				@stack[depth]
-			end
+      # Peek at the +depth+ event in the stack.  The first element on the stack
+      # is at depth 0.  If +depth+ is -1, will parse to the end of the input
+      # stream and return the last event, which is always :end_document.
+      # Be aware that this causes the stream to be parsed up to the +depth+ 
+      # event, so you can effectively pre-parse the entire document (pull the 
+      # entire thing into memory) using this method.  
+      def peek depth=0
+        raise %Q[Illegal argument "#{depth}"] if depth < -1
+        temp = []
+        if depth == -1
+          temp.push(pull()) until empty?
+        else
+          while @stack.size+temp.size < depth+1
+            temp.push(pull())
+          end
+        end
+        @stack += temp if temp.size > 0
+        @stack[depth]
+      end
 
-			# Returns the next event.  This is a +PullEvent+ object.
-			def pull
-				if @closed
-					x, @closed = @closed, nil
-					return [ :end_element, x ]
-				end
-				return [ :end_document ] if empty?
-				return @stack.shift if @stack.size > 0
-				@source.read if @source.buffer.size<2
-				if @document_status == nil
-					@source.consume( /^\s*/um )
-					word = @source.match( /(<[^>]*)>/um )
-					word = word[1] unless word.nil?
-					case word
-					when COMMENT_START
-						return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
-					when XMLDECL_START
-						results = @source.match( XMLDECL_PATTERN, true )[1]
-						version = VERSION.match( results )
-						version = version[1] unless version.nil?
-						encoding = ENCODING.match(results)
-						encoding = encoding[1] unless encoding.nil?
-						@source.encoding = encoding
-						standalone = STANDALONE.match(results)
-						standalone = standalone[1] unless standalone.nil?
-						return [ :xmldecl, version, encoding, standalone]
-					when INSTRUCTION_START
-						return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
-					when DOCTYPE_START
-						md = @source.match( DOCTYPE_PATTERN, true )
-						identity = md[1]
-						close = md[2]
-						identity =~ IDENTITY
-						name = $1
-						raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
-						pub_sys = $2.nil? ? nil : $2.strip
-						long_name = $3.nil? ? nil : $3.strip
-						uri = $4.nil? ? nil : $4.strip
-						args = [ :start_doctype, name, pub_sys, long_name, uri ]
-						if close == ">"
-							@document_status = :after_doctype
-							@source.read if @source.buffer.size<2
-							md = @source.match(/^\s*/um, true)
-							@stack << [ :end_doctype ]
-						else
-							@document_status = :in_doctype
-						end
-						return args
-					else
-						@document_status = :after_doctype
-						@source.read if @source.buffer.size<2
-						md = @source.match(/\s*/um, true)
-					end
-				end
-				if @document_status == :in_doctype
-					md = @source.match(/\s*(.*?>)/um)
-					case md[1]
-					when SYSTEMENTITY 
-						match = @source.match( SYSTEMENTITY, true )[1]
-						return [ :externalentity, match ]
+      # Returns the next event.  This is a +PullEvent+ object.
+      def pull
+        if @closed
+          x, @closed = @closed, nil
+          return [ :end_element, x ]
+        end
+        return [ :end_document ] if empty?
+        return @stack.shift if @stack.size > 0
+        @source.read if @source.buffer.size<2
+        #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+        if @document_status == nil
+          #@source.consume( /^\s*/um )
+          word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+          word = word[1] unless word.nil?
+          #STDERR.puts "WORD = #{word.inspect}"
+          case word
+          when COMMENT_START
+            return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+          when XMLDECL_START
+            #STDERR.puts "XMLDECL"
+            results = @source.match( XMLDECL_PATTERN, true )[1]
+            version = VERSION.match( results )
+            version = version[1] unless version.nil?
+            encoding = ENCODING.match(results)
+            encoding = encoding[1] unless encoding.nil?
+            @source.encoding = encoding
+            standalone = STANDALONE.match(results)
+            standalone = standalone[1] unless standalone.nil?
+            return [ :xmldecl, version, encoding, standalone ]
+          when INSTRUCTION_START
+            return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+          when DOCTYPE_START
+            md = @source.match( DOCTYPE_PATTERN, true )
+            identity = md[1]
+            close = md[2]
+            identity =~ IDENTITY
+            name = $1
+            raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+            pub_sys = $2.nil? ? nil : $2.strip
+            long_name = $3.nil? ? nil : $3.strip
+            uri = $4.nil? ? nil : $4.strip
+            args = [ :start_doctype, name, pub_sys, long_name, uri ]
+            if close == ">"
+              @document_status = :after_doctype
+              @source.read if @source.buffer.size<2
+              md = @source.match(/^\s*/um, true)
+              @stack << [ :end_doctype ]
+            else
+              @document_status = :in_doctype
+            end
+            return args
+          when /^\s+/
+          else
+            @document_status = :after_doctype
+            @source.read if @source.buffer.size<2
+            md = @source.match(/\s*/um, true)
+          end
+        end
+        if @document_status == :in_doctype
+          md = @source.match(/\s*(.*?>)/um)
+          case md[1]
+          when SYSTEMENTITY 
+            match = @source.match( SYSTEMENTITY, true )[1]
+            return [ :externalentity, match ]
 
-					when ELEMENTDECL_START
-						return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+          when ELEMENTDECL_START
+            return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
 
-					when ENTITY_START
-						match = @source.match( ENTITYDECL, true ).to_a.compact
-						match[0] = :entitydecl
-						ref = false
-						if match[1] == '%'
-							ref = true
-							match.delete_at 1
-						end
-						# Now we have to sort out what kind of entity reference this is
-						if match[2] == 'SYSTEM'
-							# External reference
-							match[3] = match[3][1..-2] # PUBID
-							match.delete_at(4) if match.size > 4 # Chop out NDATA decl
-							# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
-						elsif match[2] == 'PUBLIC'
-							# External reference
-							match[3] = match[3][1..-2] # PUBID
-							match[4] = match[4][1..-2] # HREF
-							# match is [ :entity, name, PUBLIC, pubid, href ]
-						else
-							match[2] = match[2][1..-2]
-							match.pop if match.size == 4
-							# match is [ :entity, name, value ]
-						end
-						match << '%' if ref
-						return match
-					when ATTLISTDECL_START
-						md = @source.match( ATTLISTDECL_PATTERN, true )
-						raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
-						element = md[1]
-						contents = md[0]
+          when ENTITY_START
+            match = @source.match( ENTITYDECL, true ).to_a.compact
+            match[0] = :entitydecl
+            ref = false
+            if match[1] == '%'
+              ref = true
+              match.delete_at 1
+            end
+            # Now we have to sort out what kind of entity reference this is
+            if match[2] == 'SYSTEM'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+              # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+            elsif match[2] == 'PUBLIC'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match[4] = match[4][1..-2] # HREF
+              # match is [ :entity, name, PUBLIC, pubid, href ]
+            else
+              match[2] = match[2][1..-2]
+              match.pop if match.size == 4
+              # match is [ :entity, name, value ]
+            end
+            match << '%' if ref
+            return match
+          when ATTLISTDECL_START
+            md = @source.match( ATTLISTDECL_PATTERN, true )
+            raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+            element = md[1]
+            contents = md[0]
 
-						pairs = {}
-						values = md[0].scan( ATTDEF_RE )
-						values.each do |attdef|
-							unless attdef[3] == "#IMPLIED"
-								attdef.compact!
-								val = attdef[3]
-								val = attdef[4] if val == "#FIXED "
-								pairs[attdef[0]] = val
-							end
-						end
-						return [ :attlistdecl, element, pairs, contents ]
-					when NOTATIONDECL_START
-						md = nil
-						if @source.match( PUBLIC )
-							md = @source.match( PUBLIC, true )
-						elsif @source.match( SYSTEM )
-							md = @source.match( SYSTEM, true )
-						else
-							raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
-						end
-						return [ :notationdecl, md[1], md[2], md[3] ]
-					when CDATA_END
-						@document_status = :after_doctype
-						@source.match( CDATA_END, true )
-						return [ :end_doctype ]
-					end
-				end
-				begin
-					if @source.buffer[0] == ?<
-						if @source.buffer[1] == ?/
-							last_tag = @tags.pop
-							#md = @source.match_to_consume( '>', CLOSE_MATCH)
-							md = @source.match( CLOSE_MATCH, true )
-							raise REXML::ParseException.new( "Missing end tag for "+
+            pairs = {}
+            values = md[0].scan( ATTDEF_RE )
+            values.each do |attdef|
+              unless attdef[3] == "#IMPLIED"
+                attdef.compact!
+                val = attdef[3]
+                val = attdef[4] if val == "#FIXED "
+                pairs[attdef[0]] = val
+              end
+            end
+            return [ :attlistdecl, element, pairs, contents ]
+          when NOTATIONDECL_START
+            md = nil
+            if @source.match( PUBLIC )
+              md = @source.match( PUBLIC, true )
+              vals = [md[1],md[2],md[4],md[6]]
+            elsif @source.match( SYSTEM )
+              md = @source.match( SYSTEM, true )
+              vals = [md[1],md[2],nil,md[4]]
+            else
+              raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+            end
+            return [ :notationdecl, *vals ]
+          when CDATA_END
+            @document_status = :after_doctype
+            @source.match( CDATA_END, true )
+            return [ :end_doctype ]
+          end
+        end
+        begin
+          if @source.buffer[0] == ?<
+            if @source.buffer[1] == ?/
+              last_tag = @tags.pop
+              #md = @source.match_to_consume( '>', CLOSE_MATCH)
+              md = @source.match( CLOSE_MATCH, true )
+              raise REXML::ParseException.new( "Missing end tag for "+
                 "'#{last_tag}' (got \"#{md[1]}\")", 
                 @source) unless last_tag == md[1]
-							return [ :end_element, last_tag ]
-						elsif @source.buffer[1] == ?!
-							md = @source.match(/\A(\s*[^>]*>)/um)
-							#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
-							raise REXML::ParseException.new("Malformed node", @source) unless md
-							if md[0][2] == ?-
-								md = @source.match( COMMENT_PATTERN, true )
-								return [ :comment, md[1] ] if md
-							else
-								md = @source.match( CDATA_PATTERN, true )
-								return [ :cdata, md[1] ] if md
-							end
-							raise REXML::ParseException.new( "Declarations can only occur "+
-								"in the doctype declaration.", @source)
-						elsif @source.buffer[1] == ??
-							md = @source.match( INSTRUCTION_PATTERN, true )
-							return [ :processing_instruction, md[1], md[2] ] if md
-							raise REXML::ParseException.new( "Bad instruction declaration",
-								@source)
-						else
-							# Get the next tag
-							md = @source.match(TAG_MATCH, true)
-							raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
-							attrs = []
-							if md[2].size > 0
-								attrs = md[2].scan( ATTRIBUTE_PATTERN )
-								raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
-							end
-				
-							if md[4]
-								@closed = md[1]
-							else
-								@tags.push( md[1] )
-							end
-							attributes = {}
-							attrs.each { |a,b,c| attributes[a] = c }
-							return [ :start_element, md[1], attributes ]
-						end
-					else
-						md = @source.match( TEXT_PATTERN, true )
+              return [ :end_element, last_tag ]
+            elsif @source.buffer[1] == ?!
+              md = @source.match(/\A(\s*[^>]*>)/um)
+              #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+              raise REXML::ParseException.new("Malformed node", @source) unless md
+              if md[0][2] == ?-
+                md = @source.match( COMMENT_PATTERN, true )
+                return [ :comment, md[1] ] if md
+              else
+                md = @source.match( CDATA_PATTERN, true )
+                return [ :cdata, md[1] ] if md
+              end
+              raise REXML::ParseException.new( "Declarations can only occur "+
+                "in the doctype declaration.", @source)
+            elsif @source.buffer[1] == ??
+              md = @source.match( INSTRUCTION_PATTERN, true )
+              return [ :processing_instruction, md[1], md[2] ] if md
+              raise REXML::ParseException.new( "Bad instruction declaration",
+                @source)
+            else
+              # Get the next tag
+              md = @source.match(TAG_MATCH, true)
+              raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+              attrs = []
+              if md[2].size > 0
+                attrs = md[2].scan( ATTRIBUTE_PATTERN )
+                raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+              end
+        
+              if md[4]
+                @closed = md[1]
+              else
+                @tags.push( md[1] )
+              end
+              attributes = {}
+              attrs.each { |a,b,c| attributes[a] = c }
+              return [ :start_element, md[1], attributes ]
+            end
+          else
+            md = @source.match( TEXT_PATTERN, true )
             if md[0].length == 0
-              #puts "EMPTY = #{empty?}"
-              #puts "BUFFER = \"#{@source.buffer}\""
+              puts "EMPTY = #{empty?}"
+              puts "BUFFER = \"#{@source.buffer}\""
               @source.match( /(\s+)/, true )
             end
+            #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
             #return [ :text, "" ] if md[0].length == 0
-						# unnormalized = Text::unnormalize( md[1], self )
-						# return PullEvent.new( :text, md[1], unnormalized )
-						return [ :text, md[1] ]
-					end
-				rescue REXML::ParseException
-					raise
-				rescue Exception, NameError => error
-					raise REXML::ParseException.new( "Exception parsing",
-						@source, self, (error ? error : $!) )
-				end
-				return [ :dummy ]
-			end
+            # unnormalized = Text::unnormalize( md[1], self )
+            # return PullEvent.new( :text, md[1], unnormalized )
+            return [ :text, md[1] ]
+          end
+        rescue REXML::ParseException
+          raise
+        rescue Exception, NameError => error
+          raise REXML::ParseException.new( "Exception parsing",
+            @source, self, (error ? error : $!) )
+        end
+        return [ :dummy ]
+      end
 
-			def entity( reference, entities )
-				value = nil
-				value = entities[ reference ] if entities
-				if not value
-					value = DEFAULT_ENTITIES[ reference ]
-					value = value[2] if value
-				end
-				unnormalize( value, entities ) if value
-			end
+      def entity( reference, entities )
+        value = nil
+        value = entities[ reference ] if entities
+        if not value
+          value = DEFAULT_ENTITIES[ reference ]
+          value = value[2] if value
+        end
+        unnormalize( value, entities ) if value
+      end
 
-			# Escapes all possible entities
-			def normalize( input, entities=nil, entity_filter=nil )
-				copy = input.clone
-				# Doing it like this rather than in a loop improves the speed
-				copy.gsub!( EREFERENCE, '&amp;' )
-				entities.each do |key, value|
-					copy.gsub!( value, "&#{key};" ) unless entity_filter and 
-																			entity_filter.include?(entity)
-				end if entities
-				copy.gsub!( EREFERENCE, '&amp;' )
-				DEFAULT_ENTITIES.each do |key, value|
-					copy.gsub!( value[3], value[1] )
-				end
-				copy
-			end
+      # Escapes all possible entities
+      def normalize( input, entities=nil, entity_filter=nil )
+        copy = input.clone
+        # Doing it like this rather than in a loop improves the speed
+        copy.gsub!( EREFERENCE, '&amp;' )
+        entities.each do |key, value|
+          copy.gsub!( value, "&#{key};" ) unless entity_filter and 
+                                      entity_filter.include?(entity)
+        end if entities
+        copy.gsub!( EREFERENCE, '&amp;' )
+        DEFAULT_ENTITIES.each do |key, value|
+          copy.gsub!( value[3], value[1] )
+        end
+        copy
+      end
 
-			# Unescapes all possible entities
-			def unnormalize( string, entities=nil, filter=nil )
-				rv = string.clone
-				rv.gsub!( /\r\n?/, "\n" )
-				matches = rv.scan( REFERENCE_RE )
-				return rv if matches.size == 0
-				rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
-					m=$1
-					m = "0#{m}" if m[0] == ?x
-					[Integer(m)].pack('U*')
-				}
-				matches.collect!{|x|x[0]}.compact!
-				if matches.size > 0
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							entity_value = entity( entity_reference, entities )
-							if entity_value
-								re = /&#{entity_reference};/
-								rv.gsub!( re, entity_value )
-							end
-						end
-					end
-					matches.each do |entity_reference|
-						unless filter and filter.include?(entity_reference)
-							er = DEFAULT_ENTITIES[entity_reference]
-							rv.gsub!( er[0], er[2] ) if er
-						end
-					end
-					rv.gsub!( /&amp;/, '&' )
-				end
-				rv
-			end
-		end
-	end
+      # Unescapes all possible entities
+      def unnormalize( string, entities=nil, filter=nil )
+        rv = string.clone
+        rv.gsub!( /\r\n?/, "\n" )
+        matches = rv.scan( REFERENCE_RE )
+        return rv if matches.size == 0
+        rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+          m=$1
+          m = "0#{m}" if m[0] == ?x
+          [Integer(m)].pack('U*')
+        }
+        matches.collect!{|x|x[0]}.compact!
+        if matches.size > 0
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              entity_value = entity( entity_reference, entities )
+              if entity_value
+                re = /&#{entity_reference};/
+                rv.gsub!( re, entity_value )
+              end
+            end
+          end
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              er = DEFAULT_ENTITIES[entity_reference]
+              rv.gsub!( er[0], er[2] ) if er
+            end
+          end
+          rv.gsub!( /&amp;/, '&' )
+        end
+        rv
+      end
+    end
+  end
 end
 
 =begin
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index 0a328ea8fc..36dc7160c3 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -1,95 +1,99 @@
+require 'forwardable'
+
 require 'rexml/parseexception'
 require 'rexml/parsers/baseparser'
 require 'rexml/xmltokens'
 
 module REXML
-	module Parsers
-		# = Using the Pull Parser
-		# <em>This API is experimental, and subject to change.</em>
-		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
-		#  end
-		# See the PullEvent class for information on the content of the results.
-		# The data is identical to the arguments passed for the various events to
-		# the StreamListener API.
-		#
-		# Notice that:
-		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
-		#  while parser.has_next?
-		#    res = parser.next
-		#    raise res[1] if res.error?
-		#  end
-		#
-		# Nat Price gave me some good ideas for the API.
-		class PullParser
-			include XMLTokens
-
-			def initialize stream
-				@entities = {}
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class PullParser
+      include XMLTokens
+      extend Forwardable
+
+      def_delegators( :@parser, :has_next? )
+      def_delegators( :@parser, :entity )
+      def_delegators( :@parser, :empty? )
+      def_delegators( :@parser, :source )
+
+      def initialize stream
+        @entities = {}
         @listeners = nil
         @parser = BaseParser.new( stream )
-			end
+        @my_stack = []
+      end
 
       def add_listener( listener )
         @listeners = [] unless @listeners
         @listeners << listener
       end
 
-			def each
-				while has_next?
-					yield self.pull
-				end
-			end
-
-			def peek depth=0
-				PullEvent.new(@parser.peek(depth))
-			end
-
-      def has_next?
-        @parser.has_next?
+      def each
+        while has_next?
+          yield self.pull
+        end
       end
 
-			def pull
-				event = @parser.pull
-				case event[0]
-				when :entitydecl
-					@entities[ event[1] ] = 
-						event[2] unless event[2] =~ /PUBLIC|SYSTEM/
-				when :text
-					unnormalized = @parser.unnormalize( event[1], @entities )
-					event << unnormalized
-				end
-				PullEvent.new( event )
-			end
-
-      def unshift token
-        @parser.unshift token
+      def peek depth=0
+        if @my_stack.length <= depth
+          (depth - @my_stack.length + 1).times {
+            e = PullEvent.new(@parser.pull)
+            @my_stack.push(e)
+          }
+        end
+        @my_stack[depth]
       end
 
-      def entity reference
-        @parser.entity( reference )
+      def pull
+        return @my_stack.shift if @my_stack.length > 0
+
+        event = @parser.pull
+        case event[0]
+        when :entitydecl
+          @entities[ event[1] ] = 
+            event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+        when :text
+          unnormalized = @parser.unnormalize( event[1], @entities )
+          event << unnormalized
+        end
+        PullEvent.new( event )
       end
 
-      def empty?
-        @parser.empty?
+      def unshift token
+        @my_stack.unshift token
+      end
+    end
+
+    # A parsing event.  The contents of the event are accessed as an +Array?,
+    # and the type is given either by the ...? methods, or by accessing the
+    # +type+ accessor.  The contents of this object vary from event to event,
+    # but are identical to the arguments passed to +StreamListener+s for each
+    # event.
+    class PullEvent
+      # The type of this event.  Will be one of :tag_start, :tag_end, :text,
+      # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+      # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+      def initialize(arg)
+        @contents = arg
       end
-
-		end
-
-		# A parsing event.  The contents of the event are accessed as an +Array?,
-		# and the type is given either by the ...? methods, or by accessing the
-		# +type+ accessor.  The contents of this object vary from event to event,
-		# but are identical to the arguments passed to +StreamListener+s for each
-		# event.
-		class PullEvent
-			# The type of this event.  Will be one of :tag_start, :tag_end, :text,
-			# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
-			# :notationdecl, :entity, :cdata, :xmldecl, or :error.
-			def initialize(arg)
-				@contents = arg
-			end
 
       def []( start, endd=nil)
         if start.kind_of? Range
@@ -103,90 +107,90 @@ module REXML
         else
           raise "Illegal argument #{start.inspect} (#{start.class})"
         end
-			end
-
-			def event_type
-				@contents[0]
-			end
-
-			# Content: [ String tag_name, Hash attributes ]
-			def start_element?
-				@contents[0] == :start_element
-			end
-
-			# Content: [ String tag_name ]
-			def end_element?
-				@contents[0] == :end_element
-			end
-
-			# Content: [ String raw_text, String unnormalized_text ]
-			def text?
-				@contents[0] == :text
-			end
-
-			# Content: [ String text ]
-			def instruction?
-				@contents[0] == :processing_instruction
-			end
-
-			# Content: [ String text ]
-			def comment?
-				@contents[0] == :comment
-			end
-
-			# Content: [ String name, String pub_sys, String long_name, String uri ]
-			def doctype?
-				@contents[0] == :start_doctype
-			end
-
-			# Content: [ String text ]
-			def attlistdecl?
-				@contents[0] == :attlistdecl
-			end
-
-			# Content: [ String text ]
-			def elementdecl?
-				@contents[0] == :elementdecl
-			end
-
-			# Due to the wonders of DTDs, an entity declaration can be just about
-			# anything.  There's no way to normalize it; you'll have to interpret the
-			# content yourself.  However, the following is true:
-			#
-			# * If the entity declaration is an internal entity:
-			#   [ String name, String value ]
-			# Content: [ String text ]
-			def entitydecl?
-				@contents[0] == :entitydecl
-			end
-
-			# Content: [ String text ]
-			def notationdecl?
-				@contents[0] == :notationdecl
-			end
-
-			# Content: [ String text ]
-			def entity?
-				@contents[0] == :entity
-			end
-
-			# Content: [ String text ]
-			def cdata?
-				@contents[0] == :cdata
-			end
-
-			# Content: [ String version, String encoding, String standalone ]
-			def xmldecl?
-				@contents[0] == :xmldecl
-			end
-
-			def error?
-				@contents[0] == :error
-			end
-
-			def inspect
+      end
+
+      def event_type
+        @contents[0]
+      end
+
+      # Content: [ String tag_name, Hash attributes ]
+      def start_element?
+        @contents[0] == :start_element
+      end
+
+      # Content: [ String tag_name ]
+      def end_element?
+        @contents[0] == :end_element
+      end
+
+      # Content: [ String raw_text, String unnormalized_text ]
+      def text?
+        @contents[0] == :text
+      end
+
+      # Content: [ String text ]
+      def instruction?
+        @contents[0] == :processing_instruction
+      end
+
+      # Content: [ String text ]
+      def comment?
+        @contents[0] == :comment
+      end
+
+      # Content: [ String name, String pub_sys, String long_name, String uri ]
+      def doctype?
+        @contents[0] == :start_doctype
+      end
+
+      # Content: [ String text ]
+      def attlistdecl?
+        @contents[0] == :attlistdecl
+      end
+
+      # Content: [ String text ]
+      def elementdecl?
+        @contents[0] == :elementdecl
+      end
+
+      # Due to the wonders of DTDs, an entity declaration can be just about
+      # anything.  There's no way to normalize it; you'll have to interpret the
+      # content yourself.  However, the following is true:
+      #
+      # * If the entity declaration is an internal entity:
+      #   [ String name, String value ]
+      # Content: [ String text ]
+      def entitydecl?
+        @contents[0] == :entitydecl
+      end
+
+      # Content: [ String text ]
+      def notationdecl?
+        @contents[0] == :notationdecl
+      end
+
+      # Content: [ String text ]
+      def entity?
+        @contents[0] == :entity
+      end
+
+      # Content: [ String text ]
+      def cdata?
+        @contents[0] == :cdata
+      end
+
+      # Content: [ String version, String encoding, String standalone ]
+      def xmldecl?
+        @contents[0] == :xmldecl
+      end
+
+      def error?
+        @contents[0] == :error
+      end
+
+      def inspect
         @contents[0].to_s + ": " + @contents[1..-1].inspect
-			end
-		end
-	end
+      end
+    end
+  end
 end
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index d5ee1bcfcd..61a216cec1 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -1,9 +1,11 @@
 require 'rexml/parsers/baseparser'
 require 'rexml/parseexception'
 require 'rexml/namespace'
+require 'rexml/text'
 
 module REXML
 	module Parsers
+    # SAX2Parser
 		class SAX2Parser
 			def initialize source
 				@parser = BaseParser.new(source)
@@ -36,6 +38,10 @@ module REXML
 			# :start_prefix_mapping, :end_prefix_mapping, :characters,
 			# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
 			# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+      #
+      # There is an additional symbol that can be listened for: :progress.
+      # This will be called for every event generated, passing in the current 
+      # stream position.
 			#
 			# Array contains regular expressions or strings which will be matched
 			# against fully qualified element names.
@@ -161,6 +167,7 @@ module REXML
 						:elementdecl, :cdata, :notationdecl, :xmldecl
 						handle( *event )
 					end
+          handle( :progress, @parser.position )
 				end
 			end
 
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 996d613e15..256d0f611c 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -1,42 +1,46 @@
 module REXML
-	module Parsers
-		class StreamParser
-			def initialize source, listener
-				@listener = listener
-				@parser = BaseParser.new( source )
-			end
-
+  module Parsers
+    class StreamParser
+      def initialize source, listener
+        @listener = listener
+        @parser = BaseParser.new( source )
+      end
+      
       def add_listener( listener )
         @parser.add_listener( listener )
       end
-
-			def parse
-				# entity string
-				while true
-					event = @parser.pull
-					case event[0]
-					when :end_document
-						return
-					when :start_element
-						attrs = event[2].each do |n, v|
-							event[2][n] = @parser.unnormalize( v )
-						end
-						@listener.tag_start( event[1], attrs )
-					when :end_element
-						@listener.tag_end( event[1] )
-					when :text
-						normalized = @parser.unnormalize( event[1] )
-						@listener.text( normalized )
-					when :processing_instruction
-						@listener.instruction( *event[1,2] )
+      
+      def parse
+        # entity string
+        while true
+          event = @parser.pull
+          case event[0]
+          when :end_document
+            return
+          when :start_element
+            attrs = event[2].each do |n, v|
+              event[2][n] = @parser.unnormalize( v )
+            end
+            @listener.tag_start( event[1], attrs )
+          when :end_element
+            @listener.tag_end( event[1] )
+          when :text
+            normalized = @parser.unnormalize( event[1] )
+            @listener.text( normalized )
+          when :processing_instruction
+            @listener.instruction( *event[1,2] )
           when :start_doctype
             @listener.doctype( *event[1..-1] )
-					when :comment, :attlistdecl, :notationdecl, :elementdecl, 
-            :entitydecl, :cdata, :xmldecl, :attlistdecl
-						@listener.send( event[0].to_s, *event[1..-1] )
-					end
-				end
-			end
-		end
-	end
+          when :end_doctype
+            # FIXME: remove this condition for milestone:3.2
+            @listener.doctype_end if @listener.respond_to? :doctype_end
+          when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+            @listener.send( event[0].to_s, *event[1..-1] )
+          when :entitydecl, :notationdecl
+            @listener.send( event[0].to_s, event[1..-1] )
+          end
+        end
+      end
+    end
+  end
 end
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index 57d11f7e23..500a53f426 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -19,8 +19,12 @@ module REXML
         begin
           while true
             event = @parser.pull
+            #STDERR.puts "TREEPARSER GOT #{event.inspect}"
             case event[0]
             when :end_document
+              unless tag_stack.empty?
+                raise ParseException.new("No close tag for #{tag_stack.inspect}")
+              end
               return
             when :start_element
               tag_stack.push(event[1])
@@ -35,10 +39,10 @@ module REXML
                   @build_context[-1] << event[1]
                 else
                   @build_context.add( 
-                    Text.new( event[1], @build_context.whitespace, nil, true ) 
+                    Text.new(event[1], @build_context.whitespace, nil, true) 
                   ) unless (
-                    event[1].strip.size==0 and 
-                    @build_context.ignore_whitespace_nodes
+                    @build_context.ignore_whitespace_nodes and
+                    event[1].strip.size==0
                   )
                 end
               end