summaryrefslogtreecommitdiff
path: root/transcode_data.h
diff options
context:
space:
mode:
Diffstat (limited to 'transcode_data.h')
-rw-r--r--transcode_data.h129
1 files changed, 91 insertions, 38 deletions
diff --git a/transcode_data.h b/transcode_data.h
index f025b4e6fe..2007eb06d3 100644
--- a/transcode_data.h
+++ b/transcode_data.h
@@ -14,19 +14,17 @@
#ifndef RUBY_TRANSCODE_DATA_H
#define RUBY_TRANSCODE_DATA_H 1
-typedef unsigned char base_element;
+RUBY_SYMBOL_EXPORT_BEGIN
-typedef struct byte_lookup {
- const base_element *base;
- const struct byte_lookup *const *info;
-} BYTE_LOOKUP;
+#define WORDINDEX_SHIFT_BITS 2
+#define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS)
+#define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS)
+#define BYTE_LOOKUP_BASE(bl) ((bl)[0])
+#define BYTE_LOOKUP_INFO(bl) ((bl)[1])
-#ifndef PType
-/* data file needs to treat this as a pointer, to remove warnings */
-#define PType (const BYTE_LOOKUP *)
-#endif
+#define PType (unsigned int)
-#define NOMAP (PType 0x01) /* single byte direct map */
+#define NOMAP (PType 0x01) /* direct map */
#define ONEbt (0x02) /* one byte payload */
#define TWObt (0x03) /* two bytes payload */
#define THREEbt (0x05) /* three bytes payload */
@@ -38,16 +36,47 @@ typedef struct byte_lookup {
#define FUNsi (PType 0x0D) /* function from start to info */
#define FUNio (PType 0x0E) /* function from info to output */
#define FUNso (PType 0x0F) /* function from start to output */
+#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */
+#define GB4bt (PType 0x12) /* GB18030 four bytes payload */
+#define FUNsio (PType 0x13) /* function from start and info to output */
-#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
-#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
-#define o3(b1,b2,b3) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt))
-#define o4(b0,b1,b2,b3) (PType((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt))
+#define STR1_LENGTH(byte_addr) (unsigned int)(*(byte_addr) + 4)
+#define STR1_BYTEINDEX(w) ((w) >> 6)
+#define makeSTR1(bi) (((bi) << 6) | STR1)
+#define makeSTR1LEN(len) ((len)-4)
-#define getBT1(a) (((a)>> 8)&0xFF)
-#define getBT2(a) (((a)>>16)&0xFF)
-#define getBT3(a) (((a)>>24)&0xFF)
-#define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
+#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
+#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|\
+ (((unsigned char)(b2))<<16)|\
+ TWObt))
+#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|\
+ (((unsigned char)(b2))<<16)|\
+ (((unsigned int)(unsigned char)(b3))<<24)|\
+ THREEbt)&\
+ 0xffffffffU))
+#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|\
+ (((unsigned char)(b2))<<16)|\
+ (((unsigned int)(unsigned char)(b3))<<24)|\
+ ((((unsigned char)(b0))&0x07)<<5)|\
+ FOURbt)&\
+ 0xffffffffU))
+#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|\
+ (((unsigned char)(b2))<<16)|\
+ ((((unsigned char)(b1))&0x0f)<<24)|\
+ ((((unsigned int)(unsigned char)(b3))&0x0f)<<28)|\
+ GB4bt)&\
+ 0xffffffffU))
+#define funsio(diff) (PType((((unsigned int)(diff))<<8)|FUNsio))
+
+#define getBT1(a) ((unsigned char)((a)>> 8))
+#define getBT2(a) ((unsigned char)((a)>>16))
+#define getBT3(a) ((unsigned char)((a)>>24))
+#define getBT0(a) (((unsigned char)((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
+
+#define getGB4bt0(a) ((unsigned char)((a)>> 8))
+#define getGB4bt1(a) (((unsigned char)((a)>>24)&0x0F)|0x30)
+#define getGB4bt2(a) ((unsigned char)((a)>>16))
+#define getGB4bt3(a) (((unsigned char)((a)>>28)&0x0F)|0x30)
#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
@@ -56,31 +85,55 @@ typedef struct byte_lookup {
#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */
#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
-/* dynamic structure, one per conversion (similar to iconv_t) */
-/* may carry conversion state (e.g. for iso-2022-jp) */
-typedef struct rb_transcoding {
- const struct rb_transcoder *transcoder;
- VALUE ruby_string_dest; /* the String used as the conversion destination,
- or NULL if something else is being converted */
- unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
-} rb_transcoding;
+typedef enum {
+ asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
+ asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
+ asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
+ /* ASCII-incompatible -> ASCII-incompatible is intentionally omitted. */
+} rb_transcoder_asciicompat_type_t;
+
+typedef struct rb_transcoder rb_transcoder;
/* static structure, one per supported encoding pair */
-typedef struct rb_transcoder {
- const char *from_encoding;
- const char *to_encoding;
- const BYTE_LOOKUP *conv_tree_start;
+struct rb_transcoder {
+ const char *src_encoding;
+ const char *dst_encoding;
+ unsigned int conv_tree_start;
+ const unsigned char *byte_array;
+ unsigned int byte_array_length;
+ const unsigned int *word_array;
+ unsigned int word_array_length;
+ int word_size;
+ int input_unit_length;
+ int max_input;
int max_output;
- int from_utf8;
- void (*preprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
- void (*postprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
- VALUE (*func_ii)(VALUE); /* info -> info */
- VALUE (*func_si)(const unsigned char *); /* start -> info */
- int (*func_io)(VALUE, const unsigned char*); /* info -> output */
- int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */
-} rb_transcoder;
+ rb_transcoder_asciicompat_type_t asciicompat_type;
+ size_t state_size;
+ int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
+ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */
+ VALUE (*func_ii)(void*, VALUE); /* info -> info */
+ VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */
+ ssize_t (*func_io)(void*, VALUE, const unsigned char*, size_t); /* info -> output */
+ ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */
+ ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */
+ ssize_t (*resetsize_func)(void*); /* -> len */
+ ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */
+ ssize_t (*func_sio)(void*, const unsigned char*, size_t, VALUE, unsigned char*, size_t); /* start -> output */
+};
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
void rb_register_transcoder(const rb_transcoder *);
+/*
+ * To get rid of collision of initializer symbols in statically-linked encodings
+ * and transcoders
+ */
+#if defined(EXTSTATIC) && EXTSTATIC
+# define TRANS_INIT(name) void Init_trans_ ## name(void)
+#else
+# define TRANS_INIT(name) void Init_ ## name(void)
+#endif
+
+RUBY_SYMBOL_EXPORT_END
+
#endif /* RUBY_TRANSCODE_DATA_H */