summaryrefslogtreecommitdiff
path: root/enc
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-28 09:26:55 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-28 09:26:55 +0000
commit793e9423cd4c4c679959b7dd011040b5aba2d169 (patch)
tree034b079d1e8a3b856e8cc5cc96dc3fd185f5d4a8 /enc
parent48af602e38293f53e7c9afa7b41ba778714220f4 (diff)
Fri Dec 28 01:55:04 2007 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_dispatch): reverted some of the changes in r14746. * transcode.c, enc/trans/single_byte.c: Added conversions to/from US-ASCII and ASCII-8BIT (using data tables). * enc/trans/single_byte.c: Some spacing/ordering changes due to automatic data file generation. * transcode_data.h, transcode.c: Preliminary code for using micro-conversion functions. * test/ruby/test_transcode.rb: Added some tests for US-ASCII and ASCII-8BIT conversions. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rw-r--r--enc/trans/single_byte.c117
1 files changed, 103 insertions, 14 deletions
diff --git a/enc/trans/single_byte.c b/enc/trans/single_byte.c
index bbfb3f5f9a..baa523447e 100644
--- a/enc/trans/single_byte.c
+++ b/enc/trans/single_byte.c
@@ -1,6 +1,63 @@
#include "transcode_data.h"
static const unsigned char
+from_US_ASCII_offsets[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+static const struct byte_lookup* const
+from_US_ASCII_infos[2] = {
+ NOMAP, UNDEF,
+};
+static const BYTE_LOOKUP
+from_US_ASCII = {
+ /* used from from_US_ASCII */
+ /* used from to_US_ASCII */
+ /* used from to_ASCII_8BIT */
+ /* used from from_ASCII_8BIT */
+ from_US_ASCII_offsets,
+ from_US_ASCII_infos
+};
+
+static rb_transcoder
+rb_from_US_ASCII = {
+ "US-ASCII", "UTF-8", &from_US_ASCII, 1, 0,
+ NULL, NULL,
+};
+
+static rb_transcoder
+rb_to_US_ASCII = {
+ "UTF-8", "US-ASCII", &from_US_ASCII, 1, 1,
+ NULL, NULL,
+};
+
+static rb_transcoder
+rb_from_ASCII_8BIT = {
+ "ASCII-8BIT", "UTF-8", &from_US_ASCII, 1, 0,
+ NULL, NULL,
+};
+
+static rb_transcoder
+rb_to_ASCII_8BIT = {
+ "UTF-8", "ASCII-8BIT", &from_US_ASCII, 1, 1,
+ NULL, NULL,
+};
+
+static const unsigned char
from_ISO_8859_1_offsets[256] = {
/* used from from_ISO_8859_1 */
/* used from from_ISO_8859_2 */
@@ -69,6 +126,7 @@ from_ISO_8859_1 = {
from_ISO_8859_1_offsets,
from_ISO_8859_1_infos
};
+
static rb_transcoder
rb_from_ISO_8859_1 = {
"ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0,
@@ -167,6 +225,7 @@ to_ISO_8859_1 = {
to_ISO_8859_1_offsets,
to_ISO_8859_1_infos
};
+
static rb_transcoder
rb_to_ISO_8859_1 = {
"UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1,
@@ -214,6 +273,7 @@ from_ISO_8859_2 = {
from_ISO_8859_1_offsets,
from_ISO_8859_2_infos
};
+
static rb_transcoder
rb_from_ISO_8859_2 = {
"ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0,
@@ -370,6 +430,7 @@ to_ISO_8859_2 = {
to_ISO_8859_2_offsets,
to_ISO_8859_2_infos
};
+
static rb_transcoder
rb_to_ISO_8859_2 = {
"UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1,
@@ -434,6 +495,7 @@ from_ISO_8859_3 = {
from_ISO_8859_3_offsets,
from_ISO_8859_3_infos
};
+
static rb_transcoder
rb_from_ISO_8859_3 = {
"ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0,
@@ -565,6 +627,7 @@ to_ISO_8859_3 = {
to_ISO_8859_2_offsets,
to_ISO_8859_3_infos
};
+
static rb_transcoder
rb_to_ISO_8859_3 = {
"UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1,
@@ -612,6 +675,7 @@ from_ISO_8859_4 = {
from_ISO_8859_1_offsets,
from_ISO_8859_4_infos
};
+
static rb_transcoder
rb_from_ISO_8859_4 = {
"ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0,
@@ -747,6 +811,7 @@ to_ISO_8859_4 = {
to_ISO_8859_2_offsets,
to_ISO_8859_4_infos
};
+
static rb_transcoder
rb_to_ISO_8859_4 = {
"UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1,
@@ -826,6 +891,7 @@ from_ISO_8859_5 = {
from_ISO_8859_1_offsets,
from_ISO_8859_5_infos
};
+
static rb_transcoder
rb_from_ISO_8859_5 = {
"ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0,
@@ -977,6 +1043,7 @@ to_ISO_8859_5 = {
to_ISO_8859_5_offsets,
to_ISO_8859_5_infos
};
+
static rb_transcoder
rb_to_ISO_8859_5 = {
"UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1,
@@ -1032,6 +1099,7 @@ from_ISO_8859_6 = {
from_ISO_8859_6_offsets,
from_ISO_8859_6_infos
};
+
static rb_transcoder
rb_from_ISO_8859_6 = {
"ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0,
@@ -1138,6 +1206,7 @@ to_ISO_8859_6 = {
to_ISO_8859_6_offsets,
to_ISO_8859_6_infos
};
+
static rb_transcoder
rb_to_ISO_8859_6 = {
"UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1,
@@ -1235,6 +1304,7 @@ from_ISO_8859_7 = {
from_ISO_8859_7_offsets,
from_ISO_8859_7_infos
};
+
static rb_transcoder
rb_from_ISO_8859_7 = {
"ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0,
@@ -1421,6 +1491,7 @@ to_ISO_8859_7 = {
to_ISO_8859_7_offsets,
to_ISO_8859_7_infos
};
+
static rb_transcoder
rb_to_ISO_8859_7 = {
"UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1,
@@ -1501,6 +1572,7 @@ from_ISO_8859_8 = {
from_ISO_8859_8_offsets,
from_ISO_8859_8_infos
};
+
static rb_transcoder
rb_from_ISO_8859_8 = {
"ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0,
@@ -1646,6 +1718,7 @@ to_ISO_8859_8 = {
to_ISO_8859_8_offsets,
to_ISO_8859_8_infos
};
+
static rb_transcoder
rb_to_ISO_8859_8 = {
"UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1,
@@ -1693,6 +1766,7 @@ from_ISO_8859_9 = {
from_ISO_8859_1_offsets,
from_ISO_8859_9_infos
};
+
static rb_transcoder
rb_from_ISO_8859_9 = {
"ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0,
@@ -1795,6 +1869,7 @@ to_ISO_8859_9 = {
to_ISO_8859_9_offsets,
to_ISO_8859_9_infos
};
+
static rb_transcoder
rb_to_ISO_8859_9 = {
"UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1,
@@ -1874,6 +1949,7 @@ from_ISO_8859_10 = {
from_ISO_8859_1_offsets,
from_ISO_8859_10_infos
};
+
static rb_transcoder
rb_from_ISO_8859_10 = {
"ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0,
@@ -2031,6 +2107,7 @@ to_ISO_8859_10 = {
to_ISO_8859_10_offsets,
to_ISO_8859_10_infos
};
+
static rb_transcoder
rb_to_ISO_8859_10 = {
"UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1,
@@ -2125,6 +2202,7 @@ from_ISO_8859_11 = {
from_ISO_8859_11_offsets,
from_ISO_8859_11_infos
};
+
static rb_transcoder
rb_from_ISO_8859_11 = {
"ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0,
@@ -2258,6 +2336,7 @@ to_ISO_8859_11 = {
to_ISO_8859_11_offsets,
to_ISO_8859_11_infos
};
+
static rb_transcoder
rb_to_ISO_8859_11 = {
"UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1,
@@ -2337,6 +2416,7 @@ from_ISO_8859_13 = {
from_ISO_8859_1_offsets,
from_ISO_8859_13_infos
};
+
static rb_transcoder
rb_from_ISO_8859_13 = {
"ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0,
@@ -2481,6 +2561,7 @@ to_ISO_8859_13 = {
to_ISO_8859_10_offsets,
to_ISO_8859_13_infos
};
+
static rb_transcoder
rb_to_ISO_8859_13 = {
"UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1,
@@ -2560,6 +2641,7 @@ from_ISO_8859_14 = {
from_ISO_8859_1_offsets,
from_ISO_8859_14_infos
};
+
static rb_transcoder
rb_from_ISO_8859_14 = {
"ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0,
@@ -2781,6 +2863,7 @@ to_ISO_8859_14 = {
to_ISO_8859_14_offsets,
to_ISO_8859_14_infos
};
+
static rb_transcoder
rb_to_ISO_8859_14 = {
"UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1,
@@ -2860,6 +2943,7 @@ from_ISO_8859_15 = {
from_ISO_8859_1_offsets,
from_ISO_8859_15_infos
};
+
static rb_transcoder
rb_from_ISO_8859_15 = {
"ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0,
@@ -2979,6 +3063,7 @@ to_ISO_8859_15 = {
to_ISO_8859_15_offsets,
to_ISO_8859_15_infos
};
+
static rb_transcoder
rb_to_ISO_8859_15 = {
"UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1,
@@ -2988,33 +3073,37 @@ rb_to_ISO_8859_15 = {
void
Init_single_byte(void)
{
+ rb_register_transcoder(&rb_from_US_ASCII);
+ rb_register_transcoder(&rb_to_US_ASCII);
+ rb_register_transcoder(&rb_from_ASCII_8BIT);
+ rb_register_transcoder(&rb_to_ASCII_8BIT);
rb_register_transcoder(&rb_from_ISO_8859_1);
- rb_register_transcoder(&rb_from_ISO_8859_2);
- rb_register_transcoder(&rb_from_ISO_8859_3);
- rb_register_transcoder(&rb_from_ISO_8859_4);
- rb_register_transcoder(&rb_from_ISO_8859_5);
- rb_register_transcoder(&rb_from_ISO_8859_6);
- rb_register_transcoder(&rb_from_ISO_8859_7);
- rb_register_transcoder(&rb_from_ISO_8859_8);
- rb_register_transcoder(&rb_from_ISO_8859_9);
- rb_register_transcoder(&rb_from_ISO_8859_10);
- rb_register_transcoder(&rb_from_ISO_8859_11);
- rb_register_transcoder(&rb_from_ISO_8859_13);
- rb_register_transcoder(&rb_from_ISO_8859_14);
- rb_register_transcoder(&rb_from_ISO_8859_15);
rb_register_transcoder(&rb_to_ISO_8859_1);
+ rb_register_transcoder(&rb_from_ISO_8859_2);
rb_register_transcoder(&rb_to_ISO_8859_2);
+ rb_register_transcoder(&rb_from_ISO_8859_3);
rb_register_transcoder(&rb_to_ISO_8859_3);
+ rb_register_transcoder(&rb_from_ISO_8859_4);
rb_register_transcoder(&rb_to_ISO_8859_4);
+ rb_register_transcoder(&rb_from_ISO_8859_5);
rb_register_transcoder(&rb_to_ISO_8859_5);
+ rb_register_transcoder(&rb_from_ISO_8859_6);
rb_register_transcoder(&rb_to_ISO_8859_6);
+ rb_register_transcoder(&rb_from_ISO_8859_7);
rb_register_transcoder(&rb_to_ISO_8859_7);
+ rb_register_transcoder(&rb_from_ISO_8859_8);
rb_register_transcoder(&rb_to_ISO_8859_8);
+ rb_register_transcoder(&rb_from_ISO_8859_9);
rb_register_transcoder(&rb_to_ISO_8859_9);
+ rb_register_transcoder(&rb_from_ISO_8859_10);
rb_register_transcoder(&rb_to_ISO_8859_10);
+ rb_register_transcoder(&rb_from_ISO_8859_11);
rb_register_transcoder(&rb_to_ISO_8859_11);
+ rb_register_transcoder(&rb_from_ISO_8859_13);
rb_register_transcoder(&rb_to_ISO_8859_13);
+ rb_register_transcoder(&rb_from_ISO_8859_14);
rb_register_transcoder(&rb_to_ISO_8859_14);
+ rb_register_transcoder(&rb_from_ISO_8859_15);
rb_register_transcoder(&rb_to_ISO_8859_15);
}
-/* Footprint (bytes): gross: 26788, saved: 3728, net: 23060 */
+/* Footprint (bytes): gross: 27876, saved: 4544, net: 23332 */