summaryrefslogtreecommitdiff
path: root/enc
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-03 14:12:06 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-03 14:12:06 +0000
commitf6441bf61cd4244aca5f465d262baf31b8872ac2 (patch)
tree57787744890d4ac6db62bbcf44b12e738f34eb62 /enc
parentfce093432eadc191b3647f116a9c2f6748efda3e (diff)
* transcode_data.h (rb_transcoding): remove stateful field.
add state field. (TRANSCODING_STATE): defined. (rb_transcoder): add fields: state_size, state_init_func, state_fini_func. change rb_transcoding* argument to void*. * transcode.c (transcode_restartable0): use TRANSCODING_STATE for first arguments of transcoder functions. (rb_transcoding_open_by_transcoder): initialize state field. (rb_transcoding_close): finalize state field. * tool/transcode-tblgen.rb: provide state size/init/fini. * enc/trans/newline.trans (universal_newline_init): defined. (fun_so_universal_newline): take void* as a state pointer. (rb_universal_newline): provide state size/init/fini. (rb_crlf_newline): ditto. (rb_cr_newline): ditto. * enc/trans/iso2022.trans (iso2022jp_init): defined. (fun_si_iso2022jp_to_eucjp): take void* as a state pointer. (fun_so_iso2022jp_to_eucjp): ditto. (fun_so_eucjp_to_iso2022jp): ditto. (iso2022jp_reset_sequence_size): ditto. (finish_eucjp_to_iso2022jp): ditto. (rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini. (rb_EUC_JP_to_ISO_2022_JP): ditto. * enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a state pointer. (fun_so_to_utf_16be): ditto. (fun_so_from_utf_16le): ditto. (fun_so_to_utf_16le): ditto. (fun_so_from_utf_32be): ditto. (fun_so_to_utf_32be): ditto. (fun_so_from_utf_32le): ditto. (fun_so_to_utf_32le): ditto. (rb_from_UTF_16BE): provide state size/init/fini. (rb_to_UTF_16BE): ditto. (rb_from_UTF_16LE): ditto. (rb_to_UTF_16LE): ditto. (rb_from_UTF_32BE): ditto. (rb_to_UTF_32BE): ditto. (rb_from_UTF_32LE): ditto. (rb_to_UTF_32LE): ditto. * enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state pointer. (fun_so_sjis2eucjp): ditto. (rb_eucjp2sjis): provide state size/init/fini. (rb_sjis2eucjp): provide state size/init/fini. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19096 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rw-r--r--enc/trans/iso2022.trans59
-rw-r--r--enc/trans/japanese.trans8
-rw-r--r--enc/trans/newline.trans31
-rw-r--r--enc/trans/utf_16_32.trans24
4 files changed, 77 insertions, 45 deletions
diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans
index 49da2c3f6c..067611ebd0 100644
--- a/enc/trans/iso2022.trans
+++ b/enc/trans/iso2022.trans
@@ -27,10 +27,22 @@
<%= transcode_generated_code %>
+#define G0_ASCII 0
+#define G0_JISX0208 1
+
+static int
+iso2022jp_init(void *statep)
+{
+ unsigned char *sp = statep;
+ *sp = G0_ASCII;
+ return 0;
+}
+
static VALUE
-fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
+fun_si_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l)
{
- if (t->stateful[0] == 0)
+ unsigned char *sp = statep;
+ if (*sp == G0_ASCII)
return (VALUE)NOMAP;
else if (0x21 <= s[0] && s[0] <= 0x7e)
return (VALUE)iso2022jp_to_eucjp_jisx0208_rest;
@@ -39,14 +51,15 @@ fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
}
static int
-fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l, unsigned char* o)
{
+ unsigned char *sp = statep;
if (s[0] == 0x1b) {
if (s[1] == '(') {
switch (s[l-1]) {
case 'B':
case 'J':
- t->stateful[0] = 0;
+ *sp = G0_ASCII;
break;
}
}
@@ -54,7 +67,7 @@ fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, u
switch (s[l-1]) {
case '@':
case 'B':
- t->stateful[0] = 1;
+ *sp = G0_JISX0208;
break;
}
}
@@ -75,31 +88,28 @@ rb_ISO_2022_JP_to_EUC_JP = {
3, /* max_input */
3, /* max_output */
stateful_decoder, /* stateful_type */
+ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
};
static int
-fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o)
+fun_so_eucjp_to_iso2022jp(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
+ unsigned char *sp = statep;
unsigned char *output0 = o;
- if (t->stateful[0] == 0) {
- t->stateful[0] = 1; /* initialized flag */
- t->stateful[1] = 1; /* G0 = ASCII */
- }
-
- if (l != t->stateful[1]) {
+ if (*sp != (l == 1 ? G0_ASCII : G0_JISX0208)) {
if (l == 1) {
*o++ = 0x1b;
*o++ = '(';
*o++ = 'B';
- t->stateful[1] = 1; /* G0 = ASCII */
+ *sp = G0_ASCII;
}
else {
*o++ = 0x1b;
*o++ = '$';
*o++ = 'B';
- t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */
+ *sp = G0_JISX0208; /* JIS X 0208 1983 */
}
}
@@ -115,27 +125,27 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u
}
static int
-iso2022jp_reset_sequence_size(rb_transcoding *t)
+iso2022jp_reset_sequence_size(void *statep)
{
- if (t->stateful[1] == 2)
+ unsigned char *sp = statep;
+ if (*sp == G0_JISX0208)
return 3;
return 0;
}
static int
-finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o)
+finish_eucjp_to_iso2022jp(void *statep, unsigned char *o)
{
+ unsigned char *sp = statep;
unsigned char *output0 = o;
- if (t->stateful[0] == 0)
+ if (*sp == G0_ASCII)
return 0;
- if (t->stateful[1] != 1) {
- *o++ = 0x1b;
- *o++ = '(';
- *o++ = 'B';
- t->stateful[1] = 1;
- }
+ *o++ = 0x1b;
+ *o++ = '(';
+ *o++ = 'B';
+ *sp = G0_ASCII;
return o - output0;
}
@@ -148,6 +158,7 @@ rb_EUC_JP_to_ISO_2022_JP = {
3, /* max_input */
5, /* max_output */
stateful_encoder, /* stateful_type */
+ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp,
finish_eucjp_to_iso2022jp,
iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp
diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans
index 279957b972..64f38fbfc6 100644
--- a/enc/trans/japanese.trans
+++ b/enc/trans/japanese.trans
@@ -20,7 +20,7 @@
<%= transcode_generated_code %>
static int
-fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
+fun_so_eucjp2sjis(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (s[0] == 0x8e) {
o[0] = s[1];
@@ -41,7 +41,7 @@ fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned
}
static int
-fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
+fun_so_sjis2eucjp(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (l == 1) {
o[0] = '\x8e';
@@ -49,7 +49,7 @@ fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned
return 2;
}
else {
- int h, m, l;
+ int h, l;
h = s[0];
l = s[1];
if (0xe0 <= h)
@@ -74,6 +74,7 @@ rb_eucjp2sjis = {
3, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp2sjis
};
@@ -85,6 +86,7 @@ rb_sjis2eucjp = {
2, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_sjis2eucjp
};
diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index da8202eea4..7b10d368b1 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -21,33 +21,41 @@
<%= transcode_generated_code %>
+#define NORMAL 0
+#define JUST_AFTER_CR 1
+
+static int
+universal_newline_init(void *statep)
+{
+ unsigned char *sp = statep;
+ *sp = NORMAL;
+ return 0;
+}
+
static int
-fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_universal_newline(void *statep, const unsigned char* s, size_t l, unsigned char* o)
{
+ unsigned char *sp = statep;
int len;
- /*
- t->stateful[0] == 0 : normal
- t->stateful[0] == 1 : just after '\r'
- */
if (s[0] == '\n') {
- if (t->stateful[0] == 0) {
+ if (*sp == NORMAL) {
o[0] = '\n';
len = 1;
}
- else {
+ else { /* JUST_AFTER_CR */
len = 0;
}
- t->stateful[0] = 0;
+ *sp = NORMAL;
}
else if (s[0] == '\r') {
o[0] = '\n';
len = 1;
- t->stateful[0] = 1;
+ *sp = JUST_AFTER_CR;
}
else {
o[0] = s[0];
len = 1;
- t->stateful[0] = 0;
+ *sp = NORMAL;
}
return len;
}
@@ -60,6 +68,7 @@ rb_universal_newline = {
1, /* max_input */
1, /* max_output */
stateful_decoder, /* stateful_type */
+ 1, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_universal_newline
};
@@ -71,6 +80,7 @@ rb_crlf_newline = {
1, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};
@@ -82,6 +92,7 @@ rb_cr_newline = {
1, /* max_input */
1, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 57cd77dd77..9ffff341a9 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -38,7 +38,7 @@
<%= transcode_generated_code %>
static int
-fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!s[0] && s[1]<0x80) {
o[0] = s[1];
@@ -66,7 +66,7 @@ fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsign
}
static int
-fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!(s[0]&0x80)) {
o[0] = 0x00;
@@ -94,7 +94,7 @@ fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
}
static int
-fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!s[1] && s[0]<0x80) {
o[0] = s[0];
@@ -122,7 +122,7 @@ fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsign
}
static int
-fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!(s[0]&0x80)) {
o[1] = 0x00;
@@ -150,7 +150,7 @@ fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
}
static int
-fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!s[1]) {
if (s[2]==0 && s[3]<0x80) {
@@ -179,7 +179,7 @@ fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsign
}
static int
-fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
o[0] = 0;
if (!(s[0]&0x80)) {
@@ -205,7 +205,7 @@ fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
}
static int
-fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
if (!s[2]) {
if (s[1]==0 && s[0]<0x80) {
@@ -234,7 +234,7 @@ fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsign
}
static int
-fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
+fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
o[3] = 0;
if (!(s[0]&0x80)) {
@@ -267,6 +267,7 @@ rb_from_UTF_16BE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16be
};
@@ -278,6 +279,7 @@ rb_to_UTF_16BE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16be
};
@@ -289,6 +291,7 @@ rb_from_UTF_16LE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16le
};
@@ -300,6 +303,7 @@ rb_to_UTF_16LE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16le
};
@@ -311,6 +315,7 @@ rb_from_UTF_32BE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32be
};
@@ -322,6 +327,7 @@ rb_to_UTF_32BE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32be
};
@@ -333,6 +339,7 @@ rb_from_UTF_32LE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32le
};
@@ -344,6 +351,7 @@ rb_to_UTF_32LE = {
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
+ 0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32le
};