summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-13 20:46:00 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-13 20:46:00 +0000
commit5b46f99ce1111eec5fc45ea0b9d9631b09aa02d9 (patch)
tree7703aa64fb18ec9f329857ee7c023031711298b2
parent50bbc4e6ae9229907ad08730ab7330024361918a (diff)
* enc/*.c: add replicas and aliases.
* enc/make_encdb.h: add duplicate and undefined check. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15028 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--enc/ascii.c44
-rw-r--r--enc/big5.c1
-rw-r--r--enc/iso_8859_11.c3
-rwxr-xr-xenc/make_encdb.rb36
-rw-r--r--enc/shift_jis.c2
-rw-r--r--enc/windows_1251.c2
7 files changed, 84 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 195fd77..9cd2282 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Mon Jan 14 05:44:44 2008 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * enc/*.c: add replicas and aliases.
+
+ * enc/make_encdb.h: add duplicate and undefined check.
+
Mon Jan 14 02:03:05 2008 NARUSE, Yui <naruse@ruby-lang.org>
* include/ruby/oniguruma.h: remove ONIG_ENCODING_* and OnigEncoding*
diff --git a/enc/ascii.c b/enc/ascii.c
index e79d930..8caf4cf 100644
--- a/enc/ascii.c
+++ b/enc/ascii.c
@@ -48,3 +48,47 @@ OnigEncodingDefine(ascii, ASCII) = {
onigenc_always_true_is_allowed_reverse_match
};
ENC_ALIAS("BINARY", "ASCII-8BIT");
+ENC_REPLICATE("IBM437", "ASCII-8BIT");
+ENC_ALIAS("CP437", "IBM437");
+ENC_REPLICATE("IBM737", "ASCII-8BIT");
+ENC_ALIAS("CP737", "IBM737");
+ENC_REPLICATE("IBM775", "ASCII-8BIT");
+ENC_ALIAS("CP775", "IBM775");
+ENC_REPLICATE("CP850", "ASCII-8BIT");
+ENC_ALIAS("CP850", "IBM850");
+ENC_REPLICATE("IBM852", "ASCII-8BIT");
+ENC_REPLICATE("CP852", "IBM852");
+ENC_REPLICATE("IBM855", "ASCII-8BIT");
+ENC_REPLICATE("CP855", "IBM855");
+ENC_REPLICATE("IBM857", "ASCII-8BIT");
+ENC_ALIAS("CP857", "IBM857");
+ENC_REPLICATE("IBM860", "ASCII-8BIT");
+ENC_ALIAS("CP860", "IBM860");
+ENC_REPLICATE("IBM861", "ASCII-8BIT");
+ENC_ALIAS("CP861", "IBM861");
+ENC_REPLICATE("IBM862", "ASCII-8BIT");
+ENC_ALIAS("CP862", "IBM862");
+ENC_REPLICATE("IBM863", "ASCII-8BIT");
+ENC_ALIAS("CP863", "IBM863");
+ENC_REPLICATE("IBM864", "ASCII-8BIT");
+ENC_ALIAS("CP864", "IBM864");
+ENC_REPLICATE("IBM865", "ASCII-8BIT");
+ENC_ALIAS("CP865", "IBM865");
+ENC_REPLICATE("IBM866", "ASCII-8BIT");
+ENC_ALIAS("CP866", "IBM866");
+ENC_REPLICATE("IBM869", "ASCII-8BIT");
+ENC_ALIAS("CP869", "IBM869");
+ENC_REPLICATE("Windows-1258", "ASCII-8BIT");
+ENC_ALIAS("CP1258", "Windows-1258");
+ENC_REPLICATE("gb1988", "ASCII-8BIT");
+ENC_REPLICATE("koi8-u", "ASCII-8BIT");
+ENC_REPLICATE("macCentEuro", "ASCII-8BIT");
+ENC_REPLICATE("macCroatian", "ASCII-8BIT");
+ENC_REPLICATE("macCyrillic", "ASCII-8BIT");
+ENC_REPLICATE("macGreek", "ASCII-8BIT");
+ENC_REPLICATE("macIceland", "ASCII-8BIT");
+ENC_REPLICATE("macRoman", "ASCII-8BIT");
+ENC_REPLICATE("macRomania", "ASCII-8BIT");
+ENC_REPLICATE("macThai", "ASCII-8BIT");
+ENC_REPLICATE("macTurkish", "ASCII-8BIT");
+ENC_REPLICATE("macUkraine", "ASCII-8BIT");
diff --git a/enc/big5.c b/enc/big5.c
index f1456d9..44503fe 100644
--- a/enc/big5.c
+++ b/enc/big5.c
@@ -162,3 +162,4 @@ OnigEncodingDefine(big5, BIG5) = {
big5_left_adjust_char_head,
big5_is_allowed_reverse_match
};
+ENC_ALIAS("CP950", "BIG5");
diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c
index c95de80..edba3cb 100644
--- a/enc/iso_8859_11.c
+++ b/enc/iso_8859_11.c
@@ -95,5 +95,6 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = {
onigenc_always_true_is_allowed_reverse_match
};
ENC_ALIAS("ISO8859-11", "ISO-8859-11");
+ENC_REPLICATE("TIS-620", "ISO-8859-11");
ENC_REPLICATE("Windows-874", "ISO-8859-11");
-ENC_ALIAS("CP874", "ISO-8859-11");
+ENC_ALIAS("CP874", "Windows-874");
diff --git a/enc/make_encdb.rb b/enc/make_encdb.rb
index f260a2c..c140007 100755
--- a/enc/make_encdb.rb
+++ b/enc/make_encdb.rb
@@ -9,6 +9,12 @@
# ENC_ALIAS("CP932", "Windows-31J")
#
+def check_duplication(encs, name, fn, line)
+ if encs.include?(name)
+ raise ArgumentError, "%s:%d: encoding %s is already registered" % [fn, line, name]
+ end
+end
+
encodings = []
replicas = {}
aliases = {}
@@ -17,20 +23,34 @@ Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.sort.each do |fn|
open(File.join(encdir,fn)) do |f|
orig = nil
name = nil
+ encs = []
f.each_line do |line|
break if /^OnigEncodingDefine/o =~ line
end
f.each_line do |line|
break if /"(.*?)"/ =~ line
end
- encodings << $1 if $1
- f.each_line do |line|
- if /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
- encodings << $1
- replicas[$1] = $2
- elsif /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
- encodings << $1
- aliases[$1] = $2
+ if $1
+ check_duplication(encs, $1, fn, $.)
+ encs << $1.upcase
+ encodings << $1
+ f.each_line do |line|
+ if /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
+ raise ArgumentError,
+ '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
+ [fn, $., $2, $1] unless encs.include?($2.upcase)
+ check_duplication(encs, $1, fn, $.)
+ encs << $1.upcase
+ encodings << $1
+ replicas[$1] = $2
+ elsif /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
+ raise ArgumentError,
+ '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
+ [fn, $., $2, $1] unless encs.include?($2.upcase)
+ check_duplication(encs, $1, fn, $.)
+ encodings << $1
+ aliases[$1] = $2
+ end
end
end
end
diff --git a/enc/shift_jis.c b/enc/shift_jis.c
index ebfc9af..876a0ff 100644
--- a/enc/shift_jis.c
+++ b/enc/shift_jis.c
@@ -376,3 +376,5 @@ ENC_ALIAS("SJIS", "Shift_JIS");
ENC_REPLICATE("Windows-31J", "Shift_JIS");
ENC_ALIAS("CP932", "Windows-31J");
ENC_ALIAS("csWindows31J", "Windows-31J"); /* IANA. IE6 don't accept Windows-31J but csWindows31J. */
+ENC_REPLICATE("MacJapanese", "Shift_JIS");
+ENC_ALIAS("MacJapan", "MacJapanese"); \ No newline at end of file
diff --git a/enc/windows_1251.c b/enc/windows_1251.c
index 173fb66..faba0c9 100644
--- a/enc/windows_1251.c
+++ b/enc/windows_1251.c
@@ -180,7 +180,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
flag, p, end, items);
}
-OnigEncodingType OnigEncodingCP1251 = {
+OnigEncodingDefine(windows_1251, Windows_1251) = {
onigenc_single_byte_mbc_enc_len,
"Windows-1251", /* name */
1, /* max enc length */