From 07d9346ded3dccc825c320316fe23e088d782f5e Mon Sep 17 00:00:00 2001 From: ser Date: Tue, 2 Oct 2007 12:18:07 +0000 Subject: * Fixes a bug in the pretty printer related to an incomplete refactoring. * Fixes another bug related to the formatting code refactoring in 3.1.7 * Fixes ticket:99, and adds Henrik's unit test (with minor modifications) * Fixes ticket:102. Fix provided by kevinj -- thanks! Great job. * Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. * Eggbeater missed one. * Fix for ticket:115 * Fixes a bug in the pretty printer related to an incomplete refactoring. * Merge code cleanups * Fixes another bug related to the formatting code refactoring in 3.1.7 * Indentation fix. * r1342@bean (orig r12846): nobu | 2007-07-25 17:18:55 -0400 * lib/rexml/formatters/*.rb: set properties. * r1358@bean (orig r12883): nobu | 2007-08-06 04:36:31 -0400 * lib/rexml/encodings/{ISO-8859-15,CP-1252}.rb: fixed invalid syntax. * r1359@bean (orig r13096): nobu | 2007-08-18 02:03:45 -0400 * lib/rexml/rexml.rb: removed doubled constant. * r1360@bean (orig r13097): nobu | 2007-08-18 02:12:48 -0400 * lib/rexml/rexml.rb: added encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_6@13600 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/attribute.rb | 2 +- lib/rexml/document.rb | 3 +- lib/rexml/element.rb | 11 ++++--- lib/rexml/parsers/baseparser.rb | 56 +++++++++++++++++++++++++++----- lib/rexml/parsers/treeparser.rb | 5 +-- lib/rexml/rexml.rb | 14 ++++---- lib/rexml/source.rb | 4 +-- lib/rexml/undefinednamespaceexception.rb | 8 +++++ 8 files changed, 77 insertions(+), 26 deletions(-) create mode 100644 lib/rexml/undefinednamespaceexception.rb diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index 029035d675..89c1ada36c 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -50,7 +50,7 @@ module REXML @element = first.element end elsif first.kind_of? String - @element = parent if parent.kind_of? Element + @element = parent self.name = first @normalized = second.to_s else diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 558b5d731a..81e63c60f1 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -66,6 +66,7 @@ module REXML def add( child ) if child.kind_of? XMLDecl @children.unshift child + child.parent = self elsif child.kind_of? DocType # Find first Element or DocType node and insert the decl right # before it. If there is no such node, just insert the child at the @@ -183,7 +184,7 @@ module REXML output = Output.new( output, xml_decl.encoding ) end formatter = if indent > -1 - if transitive + if trans REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index f757cfffa8..92612036a1 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -553,6 +553,7 @@ module REXML def attribute( name, namespace=nil ) prefix = nil prefix = namespaces.index(namespace) if namespace + prefix = nil if prefix == 'xmlns' attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) end @@ -854,15 +855,15 @@ module REXML # Source (see Element.initialize). If not supplied or nil, a # new, default Element will be constructed # Returns:: the added Element - # a = Element.new 'a' - # a.elements.add Element.new 'b' #-> - # a.elements.add 'c' #-> + # a = Element.new('a') + # a.elements.add(Element.new('b')) #-> + # a.elements.add('c') #-> def add element=nil rv = nil if element.nil? - Element.new "", self, @element.context + Element.new("", self, @element.context) elsif not element.kind_of?(Element) - Element.new element, self, @element.context + Element.new(element, self, @element.context) else @element << element element.context = @element.context diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3782d61b2c..5f7a5ec43b 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,5 +1,6 @@ require 'rexml/parseexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +25,8 @@ module REXML # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +37,7 @@ module REXML DOCTYPE_START = /\A\s*)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A/um CDATA_START = /\A/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +135,7 @@ module REXML @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -213,6 +216,7 @@ module REXML return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +292,9 @@ module REXML val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +319,7 @@ module REXML begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +353,47 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +407,8 @@ module REXML # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index a53fa41925..ff8261cedf 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -29,8 +29,7 @@ module REXML return when :start_element tag_stack.push(event[1]) - # find the observers for namespaces - @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1], event[2] ) when :end_element tag_stack.pop @build_context = @build_context.parent @@ -86,6 +85,8 @@ module REXML end rescue REXML::Validation::ValidationException raise + rescue REXML::UndefinedNamespaceException + raise rescue raise ParseException.new( $!.message, @parser.source, @parser, $! ) end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 19805d61e1..8af1697e51 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby. # # REXML is a _pure_ Ruby, XML 1.0 conforming, @@ -10,8 +11,9 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell -# Version:: 3.1.7.1 -# Date:: 2007/209 +# Version:: 3.1.7.2 +# Date:: 2007/275 +# Revision:: $Revision$ # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -20,10 +22,10 @@ # or can be accessed # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML - COPYRIGHT = "Copyright © 2001-2007 Sean Russell " - DATE = "2007/209" - VERSION = "3.1.7.1" - REVISION = "$Revision: 1270$".gsub(/\$Revision:|\$/,'').strip + COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell " + VERSION = "3.1.7.2" + DATE = "2007/275" + REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip Copyright = COPYRIGHT Version = VERSION diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index ada876cde5..bc99c0ed6a 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -17,8 +17,8 @@ module REXML elsif arg.kind_of? Source arg else - raise "#{source.class} is not a valid input stream. It must walk \n"+ - "like either a String, IO, or Source." + raise "#{arg.class} is not a valid input stream. It must walk \n"+ + "like either a String, an IO, or a Source." end end end diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb new file mode 100644 index 0000000000..8ebfdfd0a9 --- /dev/null +++ b/lib/rexml/undefinednamespaceexception.rb @@ -0,0 +1,8 @@ +require 'rexml/parseexception' +module REXML + class UndefinedNamespaceException < ParseException + def initialize( prefix, source, parser ) + super( "Undefined prefix #{prefix} found" ) + end + end +end -- cgit v1.2.3