diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-11-04 04:50:15 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-11-04 04:50:15 +0000 |
commit | 37886ed9a8205673ce073153f762ab05eca13ac5 (patch) | |
tree | d3489b735a95f2870cc14e0fbb01b5933a173951 /lib/rexml/parsers/baseparser.rb | |
parent | 80018be1fb538641742ccf8f1edfe6ba975a2b95 (diff) |
Fixes a bug in the pretty printer related to an incomplete refactoring.
Fixes another bug related to the formatting code refactoring in 3.1.7
Fixes ticket:99, and adds Henrik's unit test (with minor modifications)
Fixes ticket:102.
Fix provided by kevinj -- thanks! Great job.
Implements namespace validation in the baseparser. This means that, as per
the XML namespace spec, unbound prefixes generate UndefinedNamespaceException.
Also, as per the namespace spec, the 'xml' prefix must be bound to
http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared.
in the XML.
Fix for ticket:115
Fixes a bug in the pretty printer related to an incomplete refactoring.
Merge code cleanups
Fixes another bug related to the formatting code refactoring in 3.1.7
Indentation fix.
lib/rexml/formatters/*.rb: set properties.
lib/rexml/encodings/{ISO-8859-15,CP-1252}.rb: fixed invalid syntax.
lib/rexml/rexml.rb: removed doubled constant.
lib/rexml/rexml.rb: added encoding.
Fixes ticket:110
Missing include for UndefinedNamespaceException was causing errors in some
cases.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_6@13815 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers/baseparser.rb')
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 58 |
1 files changed, 49 insertions, 9 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3782d61b2c..fc2354a67f 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,5 +1,7 @@ require 'rexml/parseexception' +require 'rexml/undefinednamespaceexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +26,8 @@ module REXML # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +38,7 @@ module REXML DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A<!--/u COMMENT_PATTERN = /<!--(.*?)-->/um CDATA_START = /\A<!\[CDATA\[/u @@ -45,7 +48,7 @@ module REXML XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +136,7 @@ module REXML @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -188,6 +192,7 @@ module REXML end return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 + #STDERR.puts @source.encoding @source.read if @source.buffer.size<2 #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil @@ -213,6 +218,7 @@ module REXML return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +294,9 @@ module REXML val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +321,7 @@ module REXML begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +355,47 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +409,8 @@ module REXML # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error |