summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-11-04 04:50:15 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-11-04 04:50:15 +0000
commit37886ed9a8205673ce073153f762ab05eca13ac5 (patch)
treed3489b735a95f2870cc14e0fbb01b5933a173951
parent80018be1fb538641742ccf8f1edfe6ba975a2b95 (diff)
Fixes a bug in the pretty printer related to an incomplete refactoring.
Fixes another bug related to the formatting code refactoring in 3.1.7 Fixes ticket:99, and adds Henrik's unit test (with minor modifications) Fixes ticket:102. Fix provided by kevinj -- thanks! Great job. Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. Fix for ticket:115 Fixes a bug in the pretty printer related to an incomplete refactoring. Merge code cleanups Fixes another bug related to the formatting code refactoring in 3.1.7 Indentation fix. lib/rexml/formatters/*.rb: set properties. lib/rexml/encodings/{ISO-8859-15,CP-1252}.rb: fixed invalid syntax. lib/rexml/rexml.rb: removed doubled constant. lib/rexml/rexml.rb: added encoding. Fixes ticket:110 Missing include for UndefinedNamespaceException was causing errors in some cases. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_6@13815 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--lib/rexml/attribute.rb2
-rw-r--r--lib/rexml/document.rb3
-rw-r--r--lib/rexml/element.rb11
-rw-r--r--lib/rexml/encoding.rb9
-rw-r--r--lib/rexml/parsers/baseparser.rb58
-rw-r--r--lib/rexml/parsers/treeparser.rb6
-rw-r--r--lib/rexml/rexml.rb14
-rw-r--r--lib/rexml/source.rb16
-rw-r--r--lib/rexml/undefinednamespaceexception.rb8
9 files changed, 95 insertions, 32 deletions
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 029035d675..89c1ada36c 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -50,7 +50,7 @@ module REXML
@element = first.element
end
elsif first.kind_of? String
- @element = parent if parent.kind_of? Element
+ @element = parent
self.name = first
@normalized = second.to_s
else
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 558b5d731a..81e63c60f1 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -66,6 +66,7 @@ module REXML
def add( child )
if child.kind_of? XMLDecl
@children.unshift child
+ child.parent = self
elsif child.kind_of? DocType
# Find first Element or DocType node and insert the decl right
# before it. If there is no such node, just insert the child at the
@@ -183,7 +184,7 @@ module REXML
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1
- if transitive
+ if trans
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index f757cfffa8..92612036a1 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -553,6 +553,7 @@ module REXML
def attribute( name, namespace=nil )
prefix = nil
prefix = namespaces.index(namespace) if namespace
+ prefix = nil if prefix == 'xmlns'
attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
end
@@ -854,15 +855,15 @@ module REXML
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
- # a = Element.new 'a'
- # a.elements.add Element.new 'b' #-> <a><b/></a>
- # a.elements.add 'c' #-> <a><b/><c/></a>
+ # a = Element.new('a')
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
+ # a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
rv = nil
if element.nil?
- Element.new "", self, @element.context
+ Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
- Element.new element, self, @element.context
+ Element.new(element, self, @element.context)
else
@element << element
element.context = @element.context
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 6cae6b644d..a01763be99 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -56,8 +56,13 @@ module REXML
def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
- return UTF_16 if /\A\xfe\xff/n =~ str
- return UNILE if /\A\xff\xfe/n =~ str
+ if str[0] == 0xfe && str[1] == 0xff
+ str[0,2] = ""
+ return UTF_16
+ elsif str[0] == 0xff && str[1] == 0xfe
+ str[0,2] = ""
+ return UNILE
+ end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $3.upcase if $3
return UTF_8
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 3782d61b2c..fc2354a67f 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,5 +1,7 @@
require 'rexml/parseexception'
+require 'rexml/undefinednamespaceexception'
require 'rexml/source'
+require 'set'
module REXML
module Parsers
@@ -24,7 +26,8 @@ module REXML
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@@ -35,7 +38,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
@@ -45,7 +48,7 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@@ -133,6 +136,7 @@ module REXML
@tags = []
@stack = []
@entities = []
+ @nsstack = []
end
def position
@@ -188,6 +192,7 @@ module REXML
end
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
+ #STDERR.puts @source.encoding
@source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
@@ -213,6 +218,7 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@@ -288,6 +294,9 @@ module REXML
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
end
end
return [ :attlistdecl, element, pairs, contents ]
@@ -312,6 +321,7 @@ module REXML
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
+ @nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@@ -345,19 +355,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each { |a,b,c,d,e|
+ if b == "xmlns"
+ if c == "xml"
+ if d != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif c == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << c
+ elsif b
+ prefixes << b unless b == "xml"
+ end
+ attributes[a] = e
+ }
end
- if md[4]
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
@closed = md[1]
+ @nsstack.shift
else
@tags.push( md[1] )
end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@@ -371,6 +409,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index a53fa41925..5c3e142ea7 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -1,4 +1,5 @@
require 'rexml/validation/validationexception'
+require 'rexml/undefinednamespaceexception'
module REXML
module Parsers
@@ -29,8 +30,7 @@ module REXML
return
when :start_element
tag_stack.push(event[1])
- # find the observers for namespaces
- @build_context = @build_context.add_element( event[1], event[2] )
+ el = @build_context = @build_context.add_element( event[1], event[2] )
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@@ -86,6 +86,8 @@ module REXML
end
rescue REXML::Validation::ValidationException
raise
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 19805d61e1..8af1697e51 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
#
# REXML is a _pure_ Ruby, XML 1.0 conforming,
@@ -10,8 +11,9 @@
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
-# Version:: 3.1.7.1
-# Date:: 2007/209
+# Version:: 3.1.7.2
+# Date:: 2007/275
+# Revision:: $Revision$
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
@@ -20,10 +22,10 @@
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
- COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
- DATE = "2007/209"
- VERSION = "3.1.7.1"
- REVISION = "$Revision: 1270$".gsub(/\$Revision:|\$/,'').strip
+ COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
+ VERSION = "3.1.7.2"
+ DATE = "2007/275"
+ REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
Copyright = COPYRIGHT
Version = VERSION
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index ada876cde5..ce7a2c98b0 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -17,8 +17,8 @@ module REXML
elsif arg.kind_of? Source
arg
else
- raise "#{source.class} is not a valid input stream. It must walk \n"+
- "like either a String, IO, or Source."
+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
+ "like either a String, an IO, or a Source."
end
end
end
@@ -134,6 +134,7 @@ module REXML
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
+
# Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and
@@ -145,13 +146,16 @@ module REXML
str = @source.read( 2 )
if encoding
self.encoding = encoding
- elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
- self.encoding = check_encoding( str )
- elsif (0xef == str[0] && 0xbb == str[1])
+ elsif 0xfe == str[0] && 0xff == str[1]
+ @line_break = "\000>"
+ elsif 0xff == str[0] && 0xfe == str[1]
+ @line_break = ">\000"
+ elsif 0xef == str[0] && 0xbb == str[1]
str += @source.read(1)
str = '' if (0xbf == str[2])
+ @line_break = ">"
else
- @line_break = '>'
+ @line_break = ">"
end
super str+@source.readline( @line_break )
end
diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb
new file mode 100644
index 0000000000..8ebfdfd0a9
--- /dev/null
+++ b/lib/rexml/undefinednamespaceexception.rb
@@ -0,0 +1,8 @@
+require 'rexml/parseexception'
+module REXML
+ class UndefinedNamespaceException < ParseException
+ def initialize( prefix, source, parser )
+ super( "Undefined prefix #{prefix} found" )
+ end
+ end
+end