From 82c673d3a1cab4a9f8a1e9ac30b28f108d726606 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 14 Jan 2009 11:12:30 +0000 Subject: * enc/trans/gb18030.trans, gb18030-tbl.rb: new Chinese GB18030 transcoding (from Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added tests for the above (from Yoshihiro Kambayashi) * transcode_data.h, transcode.c, tool/transcode_tblgen.rb: added support for GB18030-specific 4-byte sequences (with Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@21509 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- transcode_data.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'transcode_data.h') diff --git a/transcode_data.h b/transcode_data.h index 497eb3c51b..a26fb42034 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -35,6 +35,7 @@ #define FUNio (PType 0x0E) /* function from info to output */ #define FUNso (PType 0x0F) /* function from start to output */ #define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */ +#define GB4bt (PType 0x12) /* GB18030 four bytes payload */ #define STR1_LENGTH(byte_addr) (*(byte_addr) + 4) #define STR1_BYTEINDEX(w) ((w) >> 6) @@ -44,13 +45,19 @@ #define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt)) #define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt)) #define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt)&0xffffffffU)) -#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU)) +#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU)) +#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU)) #define getBT1(a) (((a)>> 8)&0xFF) #define getBT2(a) (((a)>>16)&0xFF) #define getBT3(a) (((a)>>24)&0xFF) #define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */ +#define getGB4bt0(a) (((a)>> 8)&0xFF) +#define getGB4bt1(a) (((a)>>24)&0x0F|0x30) +#define getGB4bt2(a) (((a)>>16)&0xFF) +#define getGB4bt3(a) (((a)>>28)&0x0F|0x30) + #define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii)) /* do we need these??? maybe not, can be done with simple tables */ -- cgit v1.2.3