#include "transcode_data.h" <% map = { "1b2842" => :func_so, # designate US-ASCII to G0. "ESC ( B" "1b284a" => :func_so, # designate JIS X 0201 latin to G0. "ESC ( J" "1b2440" => :func_so, # designate JIS X 0208 1978 to G0. "ESC $ @" "1b2442" => :func_so, # designate JIS X 0208 1983 to G0. "ESC $ B" "{00-0d,10-1a,1c-7f}" => :func_si, } transcode_generate_node(ActionMap.parse(map), "iso2022jp_decoder") map_jisx0208_rest = { "{21-7e}" => :func_so } transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_decoder_jisx0208_rest") map = { "{00-0d,10-1a,1c-7f}" => :func_so, "90{a1-fe}{a1-fe}" => :func_so, "92{a1-fe}{a1-fe}" => :func_so, } transcode_generate_node(ActionMap.parse(map), "iso2022jp_encoder") map = { "{00-0d,10-1a,1c-7f}" => :nomap, "90{a1-fe}{a1-fe}" => :func_so, "92{a1-fe}{a1-fe}" => :func_so, } transcode_generate_node(ActionMap.parse(map), "stateless_iso2022jp_to_eucjp") map = { "{0e,0f,1b}" => :undef, "{00-0d,10-1a,1c-7f}" => :nomap, "{a1-fe}{a1-fe}" => :func_so, "8e{a1-fe}" => :undef, "8f{a1-fe}{a1-fe}" => :undef, } transcode_generate_node(ActionMap.parse(map), "eucjp_to_stateless_iso2022jp") %> <%= transcode_generated_code %> #define G0_ASCII 0 /* ignore JIS X 0201 latin */ #define G0_JISX0208_1978 1 #define G0_JISX0208_1983 2 #define EMACS_MULE_LEADING_CODE_JISX0208_1978 0220 #define EMACS_MULE_LEADING_CODE_JISX0208_1983 0222 static int iso2022jp_init(void *statep) { unsigned char *sp = statep; *sp = G0_ASCII; return 0; } static VALUE fun_si_iso2022jp_decoder(void *statep, const unsigned char *s, size_t l) { unsigned char *sp = statep; if (*sp == G0_ASCII) return (VALUE)NOMAP; else if (0x21 <= s[0] && s[0] <= 0x7e) return (VALUE)iso2022jp_decoder_jisx0208_rest; else return (VALUE)INVALID; } static int fun_so_iso2022jp_decoder(void *statep, const unsigned char *s, size_t l, unsigned char* o) { unsigned char *sp = statep; if (s[0] == 0x1b) { if (s[1] == '(') { switch (s[l-1]) { case 'B': case 'J': *sp = G0_ASCII; break; } } else { switch (s[l-1]) { case '@': *sp = G0_JISX0208_1978; break; case 'B': *sp = G0_JISX0208_1983; break; } } return 0; } else { if (*sp == G0_JISX0208_1978) o[0] = EMACS_MULE_LEADING_CODE_JISX0208_1978; else o[0] = EMACS_MULE_LEADING_CODE_JISX0208_1983; o[1] = s[0] | 0x80; o[2] = s[1] | 0x80; return 3; } } static const rb_transcoder rb_iso2022jp_decoder = { "ISO-2022-JP", "stateless-ISO-2022-JP", iso2022jp_decoder, TRANSCODE_TABLE_INFO, 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ asciicompat_decoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder }; static int fun_so_iso2022jp_encoder(void *statep, const unsigned char *s, size_t l, unsigned char *o) { unsigned char *sp = statep; unsigned char *output0 = o; int newstate; if (l == 1) newstate = G0_ASCII; else if (s[0] == EMACS_MULE_LEADING_CODE_JISX0208_1978) newstate = G0_JISX0208_1978; else newstate = G0_JISX0208_1983; if (*sp != newstate) { if (newstate == G0_ASCII) { *o++ = 0x1b; *o++ = '('; *o++ = 'B'; } else if (newstate == G0_JISX0208_1978) { *o++ = 0x1b; *o++ = '$'; *o++ = '@'; } else { *o++ = 0x1b; *o++ = '$'; *o++ = 'B'; } *sp = newstate; } if (l == 1) { *o++ = s[0] & 0x7f; } else { *o++ = s[1] & 0x7f; *o++ = s[2] & 0x7f; } return o - output0; } static int iso2022jp_encoder_reset_sequence_size(void *statep) { unsigned char *sp = statep; if (*sp != G0_ASCII) return 3; return 0; } static int finish_iso2022jp_encoder(void *statep, unsigned char *o) { unsigned char *sp = statep; unsigned char *output0 = o; if (*sp == G0_ASCII) return 0; *o++ = 0x1b; *o++ = '('; *o++ = 'B'; *sp = G0_ASCII; return o - output0; } static const rb_transcoder rb_iso2022jp_encoder = { "stateless-ISO-2022-JP", "ISO-2022-JP", iso2022jp_encoder, TRANSCODE_TABLE_INFO, 1, /* input_unit_length */ 3, /* max_input */ 5, /* max_output */ asciicompat_encoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_iso2022jp_encoder, finish_iso2022jp_encoder, iso2022jp_encoder_reset_sequence_size, finish_iso2022jp_encoder }; static int fun_so_stateless_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[0] = s[1]; o[1] = s[2]; return 2; } static const rb_transcoder rb_stateless_iso2022jp_to_eucjp = { "stateless-ISO-2022-JP", "EUC-JP", stateless_iso2022jp_to_eucjp, TRANSCODE_TABLE_INFO, 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp, }; static int fun_so_eucjp_to_stateless_iso2022jp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[0] = EMACS_MULE_LEADING_CODE_JISX0208_1983; o[1] = s[0]; o[2] = s[1]; return 3; } static const rb_transcoder rb_eucjp_to_stateless_iso2022jp = { "EUC-JP", "stateless-ISO-2022-JP", eucjp_to_stateless_iso2022jp, TRANSCODE_TABLE_INFO, 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp, }; void Init_iso2022(void) { rb_register_transcoder(&rb_iso2022jp_decoder); rb_register_transcoder(&rb_iso2022jp_encoder); rb_register_transcoder(&rb_stateless_iso2022jp_to_eucjp); rb_register_transcoder(&rb_eucjp_to_stateless_iso2022jp); }