From 21671b558cfd8d90647f40a8594fd9e6db038768 Mon Sep 17 00:00:00 2001 From: naruse Date: Sun, 13 Jan 2008 14:29:12 +0000 Subject: * enc/make_encdb.h: sort encoding names by original name. * encoding.c, enc/*.c: define replicas and aliases. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15025 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ enc/euc_jp.c | 3 +++ enc/iso_8859_1.c | 3 +++ enc/iso_8859_10.c | 1 + enc/iso_8859_11.c | 3 +++ enc/iso_8859_13.c | 1 + enc/iso_8859_14.c | 1 + enc/iso_8859_15.c | 1 + enc/iso_8859_16.c | 1 + enc/iso_8859_2.c | 3 +++ enc/iso_8859_3.c | 1 + enc/iso_8859_4.c | 3 +++ enc/iso_8859_5.c | 3 +++ enc/iso_8859_6.c | 3 +++ enc/iso_8859_7.c | 3 +++ enc/iso_8859_8.c | 3 +++ enc/iso_8859_9.c | 3 +++ enc/make_encdb.rb | 8 ++++---- enc/us_ascii.c | 2 +- encoding.c | 7 +++++-- 20 files changed, 52 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 706cb3ee97..b7007fa364 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Sun Jan 13 22:47:28 2008 NARUSE, Yui + + * enc/make_encdb.h: sort encoding names by original name. + + * encoding.c, enc/*.c: define replicas and aliases. + Sun Jan 13 20:24:03 2008 NARUSE, Yui * encoding.c: add documents. diff --git a/enc/euc_jp.c b/enc/euc_jp.c index f62627ed84..fbf3848f47 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -364,3 +364,6 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { 0 }; ENC_ALIAS("eucJP", "EUC-JP"); /* UI-OSF Application Platform Profile for Japanese Environment Version 1.1 */ +ENC_REPLICATE("eucJP-ms", "EUC-JP"); /* TOG/JVC CDE/Motif Technical WG */ +ENC_ALIAS("euc-jp-ms", "EUC-JP"); +ENC_REPLICATE("CP51932", "EUC-JP"); /* Windows CodePage 51932 */ diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index 0c85c8befb..10c68f5300 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -272,3 +272,6 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-1", "ISO-8859-1"); +ENC_REPLICATE("Windows-1252", "ISO-8859-1"); +ENC_ALIAS("CP1252", "Windows-1252"); diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index 092941e873..64ec45a3b8 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -241,3 +241,4 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-10", "ISO-8859-10"); diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index ce0fee7b21..c95de807c2 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -94,3 +94,6 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-11", "ISO-8859-11"); +ENC_REPLICATE("Windows-874", "ISO-8859-11"); +ENC_ALIAS("CP874", "ISO-8859-11"); diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index 0922566169..9f7f12122d 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -230,3 +230,4 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-13", "ISO-8859-13"); diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index 0d879169b2..0970145dc1 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -243,3 +243,4 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-14", "ISO-8859-14"); diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index c189252a67..a842ab4f3d 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -237,3 +237,4 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-15", "ISO-8859-15"); diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index 1fdaa50da5..86e626c5fb 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -239,3 +239,4 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-16", "ISO-8859-16"); diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index 57340b3e15..4013ed9aa5 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -237,3 +237,6 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-2", "ISO-8859-2"); +ENC_REPLICATE("Windowws-1250", "ISO-8859-2"); +ENC_ALIAS("CP1250", "Windows-1250"); diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c index 90c2636d5c..5b0ba04c73 100644 --- a/enc/iso_8859_3.c +++ b/enc/iso_8859_3.c @@ -237,3 +237,4 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-3", "ISO-8859-3"); diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c index cc103cd924..d9d41f4e53 100644 --- a/enc/iso_8859_4.c +++ b/enc/iso_8859_4.c @@ -239,3 +239,6 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-4", "ISO-8859-4"); +ENC_REPLICATE("Windows-1257", "ISO-8859-4"); +ENC_ALIAS("CP1257", "Windows-1257"); diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c index de7a0fd8bb..06b91ee643 100644 --- a/enc/iso_8859_5.c +++ b/enc/iso_8859_5.c @@ -227,3 +227,6 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-5", "ISO-8859-5"); +ENC_REPLICATE("Windows-1251", "ISO-8859-5"); +ENC_ALIAS("CP1251", "Windows-1251"); diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c index c0f3027904..8a91280c1c 100644 --- a/enc/iso_8859_6.c +++ b/enc/iso_8859_6.c @@ -94,3 +94,6 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-6", "ISO-8859-6"); +ENC_REPLICATE("Windows-1256", "ISO-8859-6"); +ENC_ALIAS("CP1256", "Windows-1256"); diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index 2ff42ed53f..68d590dfa1 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -224,3 +224,6 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-7", "ISO-8859-7"); +ENC_REPLICATE("Windows-1253", "ISO-8859-7"); +ENC_ALIAS("CP1253", "Windows-1253"); diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index e0e600cd14..72cfea95d7 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -94,3 +94,6 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-8", "ISO-8859-8"); +ENC_REPLICATE("Windows-1255", "ISO-8859-8"); +ENC_ALIAS("CP1255", "Windows-1255"); diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index f811bac247..0af43fb733 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -230,3 +230,6 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match }; +ENC_ALIAS("ISO8859-9", "ISO-8859-9"); +ENC_REPLICATE("Windows-1254", "ISO-8859-9"); +ENC_ALIAS("CP1254", "Windows-1254"); diff --git a/enc/make_encdb.rb b/enc/make_encdb.rb index fe596769dc..f260a2caa0 100755 --- a/enc/make_encdb.rb +++ b/enc/make_encdb.rb @@ -13,7 +13,7 @@ encodings = [] replicas = {} aliases = {} encdir = ARGV[0] -Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.each do |fn| +Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.sort.each do |fn| open(File.join(encdir,fn)) do |f| orig = nil name = nil @@ -26,19 +26,19 @@ Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.each do |fn| encodings << $1 if $1 f.each_line do |line| if /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line + encodings << $1 replicas[$1] = $2 elsif /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line + encodings << $1 aliases[$1] = $2 end end end end -p aliases + open('encdb.h', 'wb') do |f| f.puts 'static const char *const enc_name_list[] = {' encodings.each {|name| f.puts' "%s",' % name} - replicas.each_key {|name| f.puts' "%s",' % name} - aliases.each_key {|name| f.puts' "%s",' % name} f.puts('};', '', 'static void', 'enc_init_db(void)', '{') replicas.each_pair {|name, orig| f.puts ' ENC_REPLICATE("%s", "%s");' % [name, orig] diff --git a/enc/us_ascii.c b/enc/us_ascii.c index b3ac093425..165932d1ae 100644 --- a/enc/us_ascii.c +++ b/enc/us_ascii.c @@ -28,4 +28,4 @@ OnigEncodingDefine(us_ascii, US_ASCII) = { }; ENC_ALIAS("ASCII", "US-ASCII"); ENC_ALIAS("ANSI_X3.4-1986", "US-ASCII"); - +ENC_ALIAS("646", "US-ASCII"); \ No newline at end of file diff --git a/encoding.c b/encoding.c index d892c8ff51..a7a62e3bb3 100644 --- a/encoding.c +++ b/encoding.c @@ -1087,10 +1087,13 @@ Init_Encoding(void) rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0); rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); + enc_init_db(); + /* dummy for unsupported, statefull encoding */ rb_define_dummy_encoding("ISO-2022-JP"); - - enc_init_db(); + rb_enc_alias("ISO2022-JP", "ISO-2022-JP"); + rb_define_dummy_encoding("ISO-2022-JP-2"); + rb_enc_alias("ISO2022-JP-2", "ISO-2022-JP-2"); } /* locale insensitive functions */ -- cgit v1.2.3