diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-10-02 01:46:32 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-10-02 01:46:32 +0000 |
commit | 9acf1749fb72fead34f9d5a7bf07b77891199680 (patch) | |
tree | d60e76c74e7bdbbd08bb27699dbe4c55af1e5944 /lib/rexml/parsers | |
parent | c1b7ca8feccf9432d0c8de5f75b43ab889cc1292 (diff) |
r1366@bean: ser | 2007-10-01 21:24:33 -0400
r1352@bean: ser | 2007-07-29 11:33:07 -0400
Implements namespace validation in the baseparser. This means that, as per
the XML namespace spec, unbound prefixes generate UndefinedNamespaceException.
Also, as per the namespace spec, the 'xml' prefix must be bound to
http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared.
in the XML.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 56 | ||||
-rw-r--r-- | lib/rexml/parsers/treeparser.rb | 5 |
2 files changed, 50 insertions, 11 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 3782d61b2c..5f7a5ec43b 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,5 +1,6 @@ require 'rexml/parseexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +25,8 @@ module REXML # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +37,7 @@ module REXML DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A<!--/u COMMENT_PATTERN = /<!--(.*?)-->/um CDATA_START = /\A<!\[CDATA\[/u @@ -45,7 +47,7 @@ module REXML XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +135,7 @@ module REXML @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -213,6 +216,7 @@ module REXML return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +292,9 @@ module REXML val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +319,7 @@ module REXML begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +353,47 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +407,8 @@ module REXML # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index a53fa41925..ff8261cedf 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -29,8 +29,7 @@ module REXML return when :start_element tag_stack.push(event[1]) - # find the observers for namespaces - @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1], event[2] ) when :end_element tag_stack.pop @build_context = @build_context.parent @@ -86,6 +85,8 @@ module REXML end rescue REXML::Validation::ValidationException raise + rescue REXML::UndefinedNamespaceException + raise rescue raise ParseException.new( $!.message, @parser.source, @parser, $! ) end |