summaryrefslogtreecommitdiff
path: root/lib/rexml
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-11-04 04:52:08 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-11-04 04:52:08 +0000
commit06f2b5b1d890253cdc4de78a326369a10d22595b (patch)
treed72a3afeb172bb2c4318c3b6d9e499b5a3e5e7ce /lib/rexml
parentb3ab1dbf34f9ab5b69462c1b7aa8575c27cf8f65 (diff)
Fixes ticket:110 (more UTF-16 problems)
Missing include for UndefinedNamespaceException was causing errors in some cases. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13816 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml')
-rw-r--r--lib/rexml/encoding.rb9
-rw-r--r--lib/rexml/parsers/baseparser.rb2
-rw-r--r--lib/rexml/parsers/treeparser.rb1
-rw-r--r--lib/rexml/source.rb12
4 files changed, 18 insertions, 6 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 6cae6b644d..a01763be99 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -56,8 +56,13 @@ module REXML
def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
- return UTF_16 if /\A\xfe\xff/n =~ str
- return UNILE if /\A\xff\xfe/n =~ str
+ if str[0] == 0xfe && str[1] == 0xff
+ str[0,2] = ""
+ return UTF_16
+ elsif str[0] == 0xff && str[1] == 0xfe
+ str[0,2] = ""
+ return UNILE
+ end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $3.upcase if $3
return UTF_8
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 5f7a5ec43b..fc2354a67f 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,4 +1,5 @@
require 'rexml/parseexception'
+require 'rexml/undefinednamespaceexception'
require 'rexml/source'
require 'set'
@@ -191,6 +192,7 @@ module REXML
end
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
+ #STDERR.puts @source.encoding
@source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index ff8261cedf..5c3e142ea7 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -1,4 +1,5 @@
require 'rexml/validation/validationexception'
+require 'rexml/undefinednamespaceexception'
module REXML
module Parsers
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 659bcdc195..e05460fea1 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -135,6 +135,7 @@ module REXML
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
+
# Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and
@@ -146,13 +147,16 @@ module REXML
str = @source.read( 2 )
if encoding
self.encoding = encoding
- elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
- self.encoding = check_encoding( str )
- elsif (0xef == str[0] && 0xbb == str[1])
+ elsif 0xfe == str[0] && 0xff == str[1]
+ @line_break = "\000>"
+ elsif 0xff == str[0] && 0xfe == str[1]
+ @line_break = ">\000"
+ elsif 0xef == str[0] && 0xbb == str[1]
str += @source.read(1)
str = '' if (0xbf == str[2])
+ @line_break = ">"
else
- @line_break = '>'
+ @line_break = ">"
end
super str+@source.readline( @line_break )
end