summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers/baseparser.rb
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-10-02 12:18:07 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-10-02 12:18:07 +0000
commit07d9346ded3dccc825c320316fe23e088d782f5e (patch)
tree61facf427f0288270730a064ac30b8e59852af1a /lib/rexml/parsers/baseparser.rb
parentdf067e31c0cc3ced138b0f53767cfd458c7a5fc0 (diff)
* Fixes a bug in the pretty printer related to an incomplete refactoring.
* Fixes another bug related to the formatting code refactoring in 3.1.7 * Fixes ticket:99, and adds Henrik's unit test (with minor modifications) * Fixes ticket:102. Fix provided by kevinj -- thanks! Great job. * Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. * Eggbeater missed one. * Fix for ticket:115 * Fixes a bug in the pretty printer related to an incomplete refactoring. * Merge code cleanups * Fixes another bug related to the formatting code refactoring in 3.1.7 * Indentation fix. * r1342@bean (orig r12846): nobu | 2007-07-25 17:18:55 -0400 * lib/rexml/formatters/*.rb: set properties. * r1358@bean (orig r12883): nobu | 2007-08-06 04:36:31 -0400 * lib/rexml/encodings/{ISO-8859-15,CP-1252}.rb: fixed invalid syntax. * r1359@bean (orig r13096): nobu | 2007-08-18 02:03:45 -0400 * lib/rexml/rexml.rb: removed doubled constant. * r1360@bean (orig r13097): nobu | 2007-08-18 02:12:48 -0400 * lib/rexml/rexml.rb: added encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_6@13600 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers/baseparser.rb')
-rw-r--r--lib/rexml/parsers/baseparser.rb56
1 files changed, 47 insertions, 9 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 3782d61b2c..5f7a5ec43b 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,5 +1,6 @@
require 'rexml/parseexception'
require 'rexml/source'
+require 'set'
module REXML
module Parsers
@@ -24,7 +25,8 @@ module REXML
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@@ -35,7 +37,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
@@ -45,7 +47,7 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@@ -133,6 +135,7 @@ module REXML
@tags = []
@stack = []
@entities = []
+ @nsstack = []
end
def position
@@ -213,6 +216,7 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@@ -288,6 +292,9 @@ module REXML
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
end
end
return [ :attlistdecl, element, pairs, contents ]
@@ -312,6 +319,7 @@ module REXML
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
+ @nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@@ -345,19 +353,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each { |a,b,c,d,e|
+ if b == "xmlns"
+ if c == "xml"
+ if d != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif c == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << c
+ elsif b
+ prefixes << b unless b == "xml"
+ end
+ attributes[a] = e
+ }
end
- if md[4]
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
@closed = md[1]
+ @nsstack.shift
else
@tags.push( md[1] )
end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@@ -371,6 +407,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error