summaryrefslogtreecommitdiff
path: root/lib/rexml/encoding.rb
blob: 3206adcc8b2091572745c0a660381a8766be9e39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
	module Encoding
		@encoding_methods = {}
		def self.register(enc, &block)
			@encoding_methods[enc] = block
		end
		def self.apply(obj, enc)
			@encoding_methods[enc][obj]
		end
		def self.encoding_method(enc)
			@encoding_methods[enc]
		end

		# Native, default format is UTF-8, so it is declared here rather than in
		# an encodings/ definition.
		UTF_8 = 'UTF-8'
		UTF_16 = 'UTF-16'
		UNILE = 'UNILE'

		# ID ---> Encoding name
		attr_reader :encoding
		def encoding=( enc )
			old_verbosity = $VERBOSE
			begin
				$VERBOSE = false
				return if defined? @encoding and enc == @encoding
				if enc and enc != UTF_8
					@encoding = enc.upcase
					begin
						require 'rexml/encodings/ICONV.rb'
						Encoding.apply(self, "ICONV")
					rescue LoadError, Exception => err
						raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
						@encoding.untaint 
						enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
						begin
							require enc_file
							Encoding.apply(self, @encoding)
						rescue LoadError
							puts $!.message
							raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
						end
					end
				else
					@encoding = UTF_8
					require 'rexml/encodings/UTF-8.rb'
					Encoding.apply(self, @encoding)
				end
			ensure
				$VERBOSE = old_verbosity
			end
		end

		def check_encoding str
			# We have to recognize UTF-16, LSB UTF-16, and UTF-8
			return UTF_16 if str[0] == 254 && str[1] == 255
			return UNILE if str[0] == 255 && str[1] == 254
			str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
			return $1.upcase if $1
			return UTF_8
		end
	end
end