blob: ce565e7dd5f189776e880eae612aa6a35ee8c8e6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#
# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
#
module REXML
module Encoding
@@__REXML_encoding_methods = %q~
# Convert from UTF-8
def to_iso_8859_15 content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
case num
# shortcut first bunch basic characters
when 0..0xA3: array_enc << num
# characters removed compared to iso-8859-1
when 0xA4: array_enc << '¤'
when 0xA6: array_enc << '¦'
when 0xA8: array_enc << '¨'
when 0xB4: array_enc << '´'
when 0xB8: array_enc << '¸'
when 0xBC: array_enc << '¼'
when 0xBD: array_enc << '½'
when 0xBE: array_enc << '¾'
# characters added compared to iso-8859-1
when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac
when 0x0160: array_enc << 0xA6 # 0xc5 0xa0
when 0x0161: array_enc << 0xA8 # 0xc5 0xa1
when 0x017D: array_enc << 0xB4 # 0xc5 0xbd
when 0x017E: array_enc << 0xB8 # 0xc5 0xbe
when 0x0152: array_enc << 0xBC # 0xc5 0x92
when 0x0153: array_enc << 0xBD # 0xc5 0x93
when 0x0178: array_enc << 0xBE # 0xc5 0xb8
else
# all remaining basic characters can be used directly
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def from_iso_8859_15(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
case num
# characters that differ compared to iso-8859-1
when 0xA4: array_enc << 0x20AC
when 0xA6: array_enc << 0x0160
when 0xA8: array_enc << 0x0161
when 0xB4: array_enc << 0x017D
when 0xB8: array_enc << 0x017E
when 0xBC: array_enc << 0x0152
when 0xBD: array_enc << 0x0153
when 0xBE: array_enc << 0x0178
else
array_enc << num
end
end
array_enc.pack('U*')
end
~
end
end
|