summaryrefslogtreecommitdiff
path: root/lib/yaml/encoding.rb
blob: 59491b53b7fb500a35e9a698cfefa84958a142b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#
# Handle Unicode-to-Internal conversion
#

module YAML

	#
	# Encodings ( $-K to ICONV )
	#
	CHARSETS = {
		'NONE' => 'LATIN1',
		'ASCII' => 'US-ASCII',
		'UTF-8' => 'UTF-8',
		'EUC' => 'EUC-JP',
		'SJIS' => 'SHIFT-JIS'
	}

	#
	# YAML documents can be in UTF-8, UTF-16 or UTF-32
	# So let's read and write in Unicode
	#

    @@unicode = false
	begin
		require 'iconv'
		DEFAULTS[:Encoding] = :Utf8
	rescue LoadError
	end

    def YAML.unicode; @@unicode; end
    def YAML.unicode=( bool ); @@unicode = bool; end

	#
	# Unicode conversion
	#
    
	def YAML.utf_to_internal( str, from_enc )
		return unless str
		to_enc = CHARSETS[$-K]
		case from_enc
			when :Utf32
				Iconv.iconv( to_enc, 'UTF-32', str )[0]
			when :Utf16
				Iconv.iconv( to_enc, 'UTF-16', str )[0]
			when :Utf8
				Iconv.iconv( to_enc, 'UTF-8', str )[0]
			when :None
				str
			else
				raise YAML::Error, ERROR_UNSUPPORTED_ENCODING % from_enc.inspect
		end
	end

	def YAML.internal_to_utf( str, to_enc )
		return unless str
		from_enc = CHARSETS[$-K]
		case to_enc
			when :Utf32
				Iconv.iconv( 'UTF-32', from_enc, str )[0]
			when :Utf16                        
				Iconv.iconv( 'UTF-16', from_enc, str )[0]
			when :Utf8                         
				Iconv.iconv( 'UTF-8', from_enc, str )[0]
			when :None
				str
			else
				raise YAML::Error, ERROR_UNSUPPORTED_ENCODING % to_enc.inspect
		end
	end

	def YAML.sniff_encoding( str )
		unless YAML::unicode
			:None
		else
			case str
				when /^\x00\x00\xFE\xFF/	# UTF-32
					:Utf32
				when /^\xFE\xFF/	# UTF-32BE
					:Utf16
				else
					:Utf8
			end
		end
	end

	def YAML.enc_separator( enc )
		case enc
			when :Utf32
				"\000\000\000\n"
			when :Utf16
				"\000\n"
			when :Utf8
				"\n"
			when :None
				"\n"
			else
				raise YAML::Error, ERROR_UNSUPPORTED_ENCODING % enc.inspect
		end
	end

	#
	# Escape the string, condensing common escapes
	#
	def YAML.escape( value )
		value.gsub( /\\/, "\\\\\\" ).gsub( /"/, "\\\"" ).gsub( /([\x00-\x1f])/ ) { |x| ESCAPES[ x.unpack("C")[0] ] }
	end

	#
	# Unescape the condenses escapes
	#
	def YAML.unescape( value )
		value.gsub( /\\(?:([nevbr\\fartz])|0?x([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/ ) { |x| 
			if $3
				["#$3".hex ].pack('U*')
			elsif $2
				[$2].pack( "H2" ) 
			else
				UNESCAPES[$1] 
			end
		}
	end

end