summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/rexml/encoding.rb74
-rw-r--r--lib/rexml/encodings/EUC-JP.rb24
-rw-r--r--lib/rexml/encodings/EUC-JP_decl.rb6
-rw-r--r--lib/rexml/encodings/ICONV.rb14
-rw-r--r--lib/rexml/encodings/ISO-8859-1.rb4
-rw-r--r--lib/rexml/encodings/ISO-8859-1_decl.rb6
-rw-r--r--lib/rexml/encodings/Shift-JIS_decl.rb6
-rw-r--r--lib/rexml/encodings/UNILE.rb4
-rw-r--r--lib/rexml/encodings/UNILE_decl.rb6
-rw-r--r--lib/rexml/encodings/US-ASCII.rb4
-rw-r--r--lib/rexml/encodings/US-ASCII_decl.rb6
-rw-r--r--lib/rexml/encodings/UTF-16.rb4
-rw-r--r--lib/rexml/encodings/UTF-16_decl.rb6
-rw-r--r--lib/rexml/encodings/UTF-8.rb11
-rw-r--r--lib/rexml/light/node.rb77
-rw-r--r--lib/rexml/output.rb4
-rw-r--r--lib/rexml/parseexception.rb21
-rw-r--r--lib/rexml/parsers/baseparser.rb53
-rw-r--r--lib/rexml/parsers/lightparser.rb14
-rw-r--r--lib/rexml/quickpath.rb2
-rw-r--r--lib/rexml/rexml.rb4
-rw-r--r--lib/rexml/source.rb30
-rw-r--r--lib/rexml/xpath_parser.rb25
23 files changed, 183 insertions, 222 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 06385d8d52..ad8ba7e342 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -2,61 +2,49 @@ module REXML
module Encoding
@@uconv_available = false
- ENCODING_CLAIMS = { }
-
- def Encoding.claim( encoding_str, match=nil )
- if match
- ENCODING_CLAIMS[ match ] = encoding_str
- else
- ENCODING_CLAIMS[ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])#{encoding_str}\2/i ] = encoding_str
- end
- end
-
# Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition.
UTF_8 = 'UTF-8'
- claim( UTF_8 )
+ UTF_16 = 'UTF-16'
+ UNILE = 'UNILE'
# ID ---> Encoding name
attr_reader :encoding
def encoding=( enc )
- enc = UTF_8 unless enc
- @encoding = enc.upcase
- require "rexml/encodings/#@encoding" unless @encoding == UTF_8
- end
-
- def check_encoding str
- rv = ENCODING_CLAIMS.find{|k,v| str =~ k }
- # Raise an exception if there is a declared encoding and we don't
- # recognize it
- unless rv
- if str =~ /^\s*<?xml\s*version=(['"]).*?\1\s*encoding=(["'])(.*?)\2/
- raise "A matching encoding handler was not found for encoding '#{$3}', or the encoding handler failed to load due to a missing support library (such as uconv)."
+ old_verbosity = $VERBOSE
+ begin
+ $VERBOSE = false
+ return if defined? @encoding and enc == @encoding
+ if enc and enc != UTF_8
+ @encoding = enc.upcase
+ begin
+ load 'rexml/encodings/ICONV.rb'
+ Iconv::iconv( UTF_8, @encoding, "" )
+ rescue LoadError, Exception => err
+ enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
+ begin
+ load enc_file
+ rescue LoadError
+ raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
+ end
+ end
else
- return UTF_8
+ enc = UTF_8
+ @encoding = enc.upcase
+ load 'rexml/encodings/UTF-8.rb'
end
+ ensure
+ $VERBOSE = old_verbosity
end
- return rv[1]
end
- def to_utf_8(str)
- return str
- end
-
- def from_utf_8 content
- return content
- end
- end
-
- module Encodingses
- encodings = []
- $:.each do |incl_dir|
- if Dir[ File.join(incl_dir, 'rexml', 'encodings') ].size > 0
- encodings |= Dir[ File.join(incl_dir, 'rexml', 'encodings', '*_decl.rb') ]
- end
- encodings.collect!{ |f| File.basename(f) }
- encodings.uniq!
+ def check_encoding str
+ # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+ return UTF_16 if str[0] == 254 && str[1] == 255
+ return UNILE if str[0] == 255 && str[1] == 254
+ str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+ return $1.upcase if $1
+ return UTF_8
end
- encodings.each { |enc| require "rexml/encodings/#{enc}" }
end
end
diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb
index def760b303..8b146e5f0a 100644
--- a/lib/rexml/encodings/EUC-JP.rb
+++ b/lib/rexml/encodings/EUC-JP.rb
@@ -3,30 +3,30 @@ begin
module REXML
module Encoding
- def from_euc_jp(str)
+ def decode(str)
return Uconv::euctou8(str)
end
- def to_euc_jp content
+ def encode content
return Uconv::u8toeuc(content)
end
end
end
rescue LoadError
begin
- require 'iconv'
- module REXML
- module Encoding
- def from_euc_jp(str)
- return Iconv::iconv("utf-8", "euc-jp", str).join('')
- end
+ require 'iconv'
+ module REXML
+ module Encoding
+ def decode(str)
+ return Iconv::iconv("utf-8", "euc-jp", str)[0]
+ end
- def to_euc_jp content
- return Iconv::iconv("euc-jp", "utf-8", content).join('')
+ def encode content
+ return Iconv::iconv("euc-jp", "utf-8", content)[0]
+ end
end
end
- end
rescue LoadError
- raise "uconv or iconv is required for Japanese encoding support."
+ raise "uconv or iconv is required for Japanese encoding support."
end
end
diff --git a/lib/rexml/encodings/EUC-JP_decl.rb b/lib/rexml/encodings/EUC-JP_decl.rb
deleted file mode 100644
index 4c7cd828a6..0000000000
--- a/lib/rexml/encodings/EUC-JP_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- EUC_JP = 'EUC-JP'
- claim( EUC_JP )
- end
-end
diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb
new file mode 100644
index 0000000000..f1b5c80b87
--- /dev/null
+++ b/lib/rexml/encodings/ICONV.rb
@@ -0,0 +1,14 @@
+require "iconv"
+raise LoadError unless defined? Iconv
+
+module REXML
+ module Encoding
+ def decode( str )
+ return Iconv::iconv(UTF_8, @encoding, str)[0]
+ end
+
+ def encode( content )
+ return Iconv::iconv(@encoding, UTF_8, content)[0]
+ end
+ end
+end
diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb
index 98c5aff3b2..0cb9afd147 100644
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@@ -1,7 +1,7 @@
module REXML
module Encoding
# Convert from UTF-8
- def to_iso_8859_1 content
+ def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@@ -16,7 +16,7 @@ module REXML
end
# Convert to UTF-8
- def from_iso_8859_1(str)
+ def decode(str)
str.unpack('C*').pack('U*')
end
end
diff --git a/lib/rexml/encodings/ISO-8859-1_decl.rb b/lib/rexml/encodings/ISO-8859-1_decl.rb
deleted file mode 100644
index a738d30472..0000000000
--- a/lib/rexml/encodings/ISO-8859-1_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- ISO_8859_1 = 'ISO-8859-1'
- claim( ISO_8859_1 )
- end
-end
diff --git a/lib/rexml/encodings/Shift-JIS_decl.rb b/lib/rexml/encodings/Shift-JIS_decl.rb
deleted file mode 100644
index 66f650144a..0000000000
--- a/lib/rexml/encodings/Shift-JIS_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- claim( 'Shift-JIS' )
- claim( 'Shift_JIS' )
- end
-end
diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb
index 74bed14340..e57a784061 100644
--- a/lib/rexml/encodings/UNILE.rb
+++ b/lib/rexml/encodings/UNILE.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- def to_unile content
+ def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def from_unile(str)
+ def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
diff --git a/lib/rexml/encodings/UNILE_decl.rb b/lib/rexml/encodings/UNILE_decl.rb
deleted file mode 100644
index 9e1c11dc03..0000000000
--- a/lib/rexml/encodings/UNILE_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- UNILE = 'UNILE'
- claim( UNILE, /^\377\376/ )
- end
-end
diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb
index 4ca2c82a83..0cb9afd147 100644
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@@ -1,7 +1,7 @@
module REXML
module Encoding
# Convert from UTF-8
- def to_us_ascii content
+ def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@@ -16,7 +16,7 @@ module REXML
end
# Convert to UTF-8
- def from_us_ascii(str)
+ def decode(str)
str.unpack('C*').pack('U*')
end
end
diff --git a/lib/rexml/encodings/US-ASCII_decl.rb b/lib/rexml/encodings/US-ASCII_decl.rb
deleted file mode 100644
index 1e69234fff..0000000000
--- a/lib/rexml/encodings/US-ASCII_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- US_ASCII = 'US-ASCII'
- claim( US_ASCII )
- end
-end
diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb
index 2aeef76a0c..31f2d81a5b 100644
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- def to_utf_16 content
+ def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def from_utf_16(str)
+ def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(arrayEnc.size-1, 2){|i|
diff --git a/lib/rexml/encodings/UTF-16_decl.rb b/lib/rexml/encodings/UTF-16_decl.rb
deleted file mode 100644
index f405a9f259..0000000000
--- a/lib/rexml/encodings/UTF-16_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- UTF_16 = 'UTF-16'
- claim( UTF_16, /^\376\377/ )
- end
-end
diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb
new file mode 100644
index 0000000000..96fee4c4c0
--- /dev/null
+++ b/lib/rexml/encodings/UTF-8.rb
@@ -0,0 +1,11 @@
+module REXML
+ module Encoding
+ def encode content
+ content
+ end
+
+ def decode(str)
+ str
+ end
+ end
+end
diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb
index 680f2c23fe..9dafd687dd 100644
--- a/lib/rexml/light/node.rb
+++ b/lib/rexml/light/node.rb
@@ -1,76 +1,58 @@
require 'rexml/xmltokens'
require 'rexml/light/node'
-# Development model
-# document = Node.new
-
-# Add an element "foo" to the document
-# foo = document << "foo"
-# # Set attribute "attr" on foo
-# foo["attr"] = "la"
-# # Set another attribute in a different namespace
-# foo["attr", "namespace"] = "too"
-# # Swap foo into another namespace
-# foo.namespace = "blah"
-# # Add a couple of element nodes to foo
-# foo << "a"
-# foo << "b"
-# # Access the children of foo in various ways
-# a = foo[0]
-# foo.each { |child|
-# #...
-# }
-# # Add text to foo
-# # Add instruction
-# # Add comment
-# # Get the root of the document
-# document == a.root
-# # Write the document out
-# puts document.to_s
+# [ :element, parent, name, attributes, children* ]
+ # a = Node.new
+ # a << "B" # => <a>B</a>
+ # a.b # => <a>B<b/></a>
+ # a.b[1] # => <a>B<b/><b/><a>
+ # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
+ # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
+ # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
module REXML
module Light
# Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be
# children.
- class Node < Array
- alias :_old_get :[]
- alias :_old_put :[]=
-
+ class Node
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+ PARENTS = [ :element, :document, :doctype ]
# Create a new element.
def initialize node=nil
+ @node = node
if node.kind_of? String
node = [ :text, node ]
elsif node.nil?
node = [ :document, nil, nil ]
elsif node[0] == :start_element
node[0] = :element
+ elsif node[0] == :start_doctype
+ node[0] = :doctype
+ elsif node[0] == :start_document
+ node[0] = :document
end
- replace( node )
- _old_put( 1, 0, 1 )
- _old_put( 1, nil )
end
def size
- el!()
- super-4
+ if PARENTS.include? @node[0]
+ @node[-1].size
+ else
+ 0
+ end
end
def each( &block )
- el!()
size.times { |x| yield( at(x+4) ) }
end
def name
- el!()
at(2)
end
def name=( name_str, ns=nil )
- el!()
pfx = ''
pfx = "#{prefix(ns)}:" if ns
- _old_put(1, "#{pfx}#{name_str}")
+ _old_put(2, "#{pfx}#{name_str}")
end
def parent=( node )
@@ -78,28 +60,23 @@ module REXML
end
def local_name
- el!()
namesplit
@name
end
def local_name=( name_str )
- el!()
_old_put( 1, "#@prefix:#{name_str}" )
end
def prefix( namespace=nil )
- el!()
prefix_of( self, namespace )
end
def namespace( prefix=prefix() )
- el!()
namespace_of( self, prefix )
end
def namespace=( namespace )
- el!()
@prefix = prefix( namespace )
pfx = ''
pfx = "#@prefix:" if @prefix.size > 0
@@ -107,7 +84,6 @@ module REXML
end
def []( reference, ns=nil )
- el!()
if reference.kind_of? String
pfx = ''
pfx = "#{prefix(ns)}:" if ns
@@ -125,7 +101,6 @@ module REXML
# Doesn't handle namespaces yet
def []=( reference, ns, value=nil )
- el!()
if reference.kind_of? String
value = ns unless value
at( 3 )[reference] = value
@@ -170,12 +145,10 @@ module REXML
end
def has_name?( name, namespace = '' )
- el!()
at(3) == name and namespace() == namespace
end
def children
- el!()
self
end
@@ -187,14 +160,6 @@ module REXML
end
- def el!
- if node_type() != :element and node_type() != :document
- _old_put( 0, :element )
- push({})
- end
- self
- end
-
private
def namesplit
diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb
index 7d4ab2e13b..c4a7473bfb 100644
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@@ -8,10 +8,6 @@ module REXML
@output = real_IO
self.encoding = encd
- eval <<-EOL
- alias :encode :to_#{encoding.tr('-', '_').downcase}
- alias :decode :from_#{encoding.tr('-', '_').downcase}
- EOL
@to_utf = encd == UTF_8 ? false : true
end
diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb
index 04928d9175..0fee3ae620 100644
--- a/lib/rexml/parseexception.rb
+++ b/lib/rexml/parseexception.rb
@@ -1,5 +1,5 @@
module REXML
- class ParseException < Exception
+ class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception
def initialize( message, source=nil, parser=nil, exception=nil )
@@ -12,9 +12,9 @@ module REXML
def to_s
# Quote the original exception, if there was one
if @continued_exception
- err = @continued_exception.message
+ err = @continued_exception.inspect
err << "\n"
- err << @continued_exception.backtrace[0..3].join("\n")
+ err << @continued_exception.backtrace.join("\n")
err << "\n...\n"
else
err = ""
@@ -24,17 +24,24 @@ module REXML
err << super
# Add contextual information
- err << "\n#{@source.current_line}\nLast 80 unconsumed characters:\n#{@source.buffer[0..80].gsub(/\n/, ' ')}\n" if @source
- err << "\nContext:\n#{@parser.context}" if @parser
+ if @source
+ err << "\nLine: #{line}\n"
+ err << "Position: #{position}\n"
+ err << "Last 80 unconsumed characters:\n"
+ err << @source.buffer[0..80].gsub(/\n/, ' ')
+ err << "\n"
+ err << @source.buffer[0..80].unpack("U*").inspect
+ end
+
err
end
def position
- @source.current_line[0] if @source
+ @source.current_line[0] if @source and @source.current_line
end
def line
- @source.current_line[2] if @source
+ @source.current_line[2] if @source and @source.current_line
end
def context
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index d6e04c7817..27c9642a68 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -89,10 +89,10 @@ module REXML
EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = {
- 'gt' => [/&gt;/, '&gt;', '>'],
- 'lt' => [/&lt;/, '&lt;', '<'],
- 'quot' => [/&quot;/, '&quot;', '"'],
- "apos" => [/&apos;/, "&apos;", "'"]
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
}
def initialize( source )
@@ -126,6 +126,7 @@ module REXML
# Returns true if there are more events. Synonymous with !empty?
def has_next?
+ return true if @closed
@source.read if @source.buffer.size==0 and !@source.empty?
(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
end
@@ -143,7 +144,7 @@ module REXML
# event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method.
def peek depth=0
- raise 'Illegal argument "#{depth}"' if depth < -1
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = []
if depth == -1
temp.push(pull()) until empty?
@@ -166,8 +167,9 @@ module REXML
return @stack.shift if @stack.size > 0
@source.read if @source.buffer.size==0
if @document_status == nil
- @source.match( /^\s*/um, true )
- word = @source.match( /^\s*(<.*?)>/um )
+ @source.consume( /^\s*/um )
+ word = @source.match( /(<.*?)>/um )
+ #word = @source.match_to( '>', /(<.*?)>/um )
word = word[1] unless word.nil?
case word
when COMMENT_START
@@ -190,7 +192,7 @@ module REXML
close = md[2]
identity =~ IDENTITY
name = $1
- raise "DOCTYPE is missing a name" if name.nil?
+ raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip
uri = $4.nil? ? nil : $4.strip
@@ -274,10 +276,11 @@ module REXML
return [ :end_doctype ]
end
end
- begin
+ begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
last_tag = @tags.pop
+ #md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
@@ -286,18 +289,20 @@ module REXML
md = @source.match(/\A(\s*[^>]*>)/um)
#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
- case md[1]
- when CDATA_START
- return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ if md[0][2] == ?-
+ md = @source.match( COMMENT_PATTERN, true )
+ return [ :comment, md[1] ] if md
else
- raise REXML::ParseException.new( "Declarations can only occur "+
- "in the doctype declaration.", @source)
+ md = @source.match( CDATA_PATTERN, true )
+ return [ :cdata, md[1] ] if md
end
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
elsif @source.buffer[1] == ??
md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ]
+ return [ :processing_instruction, md[1], md[2] ] if md
+ raise REXML::ParseException.new( "Bad instruction declaration",
+ @source)
else
# Get the next tag
md = @source.match(TAG_MATCH, true)
@@ -318,17 +323,19 @@ module REXML
return [ :start_element, md[1], attributes ]
end
else
- md = @source.match(TEXT_PATTERN, true)
- raise "no text to add" if md[0].length == 0
+ md = @source.match( TEXT_PATTERN, true )
+ #md = @source.match_to_consume( '<', TEXT_PATTERN )
+ #@source.read
+ raise REXML::ParseException("no text to add") if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
- rescue REXML::ParseException
- raise $!
+ rescue REXML::ParseException
+ raise
rescue Exception, NameError => error
raise REXML::ParseException.new( "Exception parsing",
- @source, self, error )
+ @source, self, (error ? error : $!) )
end
return [ :dummy ]
end
@@ -354,7 +361,7 @@ module REXML
end if entities
copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value|
- copy.gsub!( value[2], value[1] )
+ copy.gsub!( value[3], value[1] )
end
copy
end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
index e2f083bc8e..8c555f7960 100644
--- a/lib/rexml/parsers/lightparser.rb
+++ b/lib/rexml/parsers/lightparser.rb
@@ -16,25 +16,25 @@ module REXML
end
def parse
- root = context = REXML::Light::Node.new([ :document ])
+ root = context = [ :document ]
while true
event = @parser.pull
case event[0]
when :end_document
break
when :end_doctype
- context = context.parent
+ context = context[1]
when :start_element, :start_doctype
- new_node = REXML::Light::Node.new(event)
+ new_node = event
context << new_node
- new_node.parent = context
+ new_node[1,0] = [context]
context = new_node
when :end_element, :end_doctype
- context = context.parent
+ context = context[1]
else
- new_node = REXML::Light::Node.new(event)
+ new_node = event
context << new_node
- new_node.parent = context
+ new_node[1,0] = [context]
end
end
root
diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb
index c099db8579..2c54ac1999 100644
--- a/lib/rexml/quickpath.rb
+++ b/lib/rexml/quickpath.rb
@@ -31,7 +31,7 @@ module REXML
results = filter([element], path)
when /^\*/u
results = filter(element.to_a, path)
- when /^[\[!\w:]/u
+ when /^[[!\w:]/u
# match on child
matches = []
children = element.to_a
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index b7de03f3f9..8c402dd23f 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -21,6 +21,6 @@
# A tutorial is available in docs/tutorial.html
module REXML
Copyright = "Copyright #{Time.now.year} Sean Russell <ser@germane-software.com>"
- Date = "+2003/110"
- Version = "2.7.1"
+ Date = "+2003/283"
+ Version = "2.7.2"
end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 8c175785b7..915b6efc27 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -39,10 +39,6 @@ module REXML
# Overridden to support optimized en/decoding
def encoding=(enc)
super
- eval <<-EOL
- alias :encode :to_#{encoding.tr('-', '_').downcase}
- alias :decode :from_#{encoding.tr('-', '_').downcase}
- EOL
@line_break = encode( '>' )
if enc != UTF_8
@buffer = decode(@buffer)
@@ -78,8 +74,22 @@ module REXML
def read
end
+ def consume( pattern )
+ @buffer = $' if pattern.match( @buffer )
+ end
+
+ def match_to( char, pattern )
+ return pattern.match(@buffer)
+ end
+
+ def match_to_consume( char, pattern )
+ md = pattern.match(@buffer)
+ @buffer = $'
+ return md
+ end
+
def match pattern, consume=false
- md = pattern.match @buffer
+ md = pattern.match(@buffer)
@buffer = $' if consume and md
return md
end
@@ -112,7 +122,9 @@ module REXML
#@block_size = block_size
#super @source.read(@block_size)
@line_break = '>'
- super @source.readline( @line_break )
+ #super @source.readline( "\n" )
+ super @source.readline( @line_break )+@source.read
+ @line_break = encode( '>' )
end
def scan pattern, consume=false
@@ -145,11 +157,15 @@ module REXML
str = @source.readline('>')
str = decode(str) if @to_utf and str
@buffer << str
- rescue
+ rescue Exception, NameError
@source = nil
end
end
+ def consume( pattern )
+ match( pattern, true )
+ end
+
def match pattern, consume=false
rv = pattern.match(@buffer)
@buffer = $' if consume and rv
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 215078b766..9cd1e5d64c 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -2,16 +2,6 @@ require 'rexml/namespace'
require 'rexml/xmltokens'
require 'rexml/parsers/xpathparser'
-# Ignore this class. It adds a __ne__ method, because Ruby doesn't seem to
-# understand object.send( "!=", foo ), whereas it *does* understand "<", "==",
-# and all of the other comparison methods. Stupid, and annoying, and not at
-# all POLS.
-class Object
- def __ne__(b)
- self != b
- end
-end
-
module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
@@ -132,11 +122,10 @@ module REXML
when :child
#puts "CHILD"
new_nodeset = []
- ps_clone = nil
+ nt = nil
for node in nodeset
- #ps_clone = path_stack.clone
- #new_nodeset += internal_parse( ps_clone, node.children ) if node.parent?
- new_nodeset += node.children if node.parent?
+ nt = node.node_type
+ new_nodeset += node.children if nt == :element or nt == :document
end
#path_stack[0,(path_stack.size-ps_clone.size)] = []
return new_nodeset
@@ -238,9 +227,11 @@ module REXML
when :descendant
#puts ":DESCENDANT"
results = []
+ nt = nil
for node in nodeset
+ nt = node.node_type
results += internal_parse( path_stack.clone.unshift( :descendant_or_self ),
- node.children ) if node.parent?
+ node.children ) if nt == :element or nt == :document
end
return results
@@ -310,11 +301,13 @@ module REXML
def d_o_s( p, ns, r )
#puts r.collect{|n|n.to_s}.inspect
#puts ns.collect{|n|n.to_s}.inspect
+ nt = nil
ns.each_index do |i|
n = ns[i]
x = match( p.clone, [ n ] )
#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
- d_o_s( p, n.children, x ) if n.parent?
+ nt = n.node_type
+ d_o_s( p, n.children, x ) if nt == :element or nt == :document
r[i,0] = [x] if x.size > 0
end
end