diff options
Diffstat (limited to 'pack.c')
| -rw-r--r-- | pack.c | 2168 |
1 files changed, 1032 insertions, 1136 deletions
@@ -2,380 +2,135 @@ pack.c - - $Author: nobu $ - $Date: 2006/08/04 04:58:25 $ + $Author$ created at: Thu Feb 10 15:17:05 JST 1994 - Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 1993-2007 Yukihiro Matsumoto **********************************************************************/ -#include "ruby.h" +#include "internal.h" #include <sys/types.h> #include <ctype.h> +#include <errno.h> -#define SIZE16 2 -#define SIZE32 4 - -#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 -# define NATINT_PACK -#endif - -#ifdef NATINT_PACK -# define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16))) -# define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32))) -# define NATINT_LEN(type,len) (natint?sizeof(type):(len)) -# ifdef WORDS_BIGENDIAN -# define OFF16(p) OFF16B(p) -# define OFF32(p) OFF32B(p) -# endif -# define NATINT_HTOVS(x) (natint?htovs(x):htov16(x)) -# define NATINT_HTOVL(x) (natint?htovl(x):htov32(x)) -# define NATINT_HTONS(x) (natint?htons(x):hton16(x)) -# define NATINT_HTONL(x) (natint?htonl(x):hton32(x)) +/* + * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG + * instead of HAVE_LONG_LONG or LONG_LONG. + * This means q! and Q! means always the standard long long type and + * causes ArgumentError for platforms which has no long long type, + * even if the platform has an implementation specific 64bit type. + * This behavior is consistent with the document of pack/unpack. + */ +#ifdef HAVE_TRUE_LONG_LONG +static const char natstr[] = "sSiIlLqQjJ"; #else -# define NATINT_LEN(type,len) sizeof(type) -# define NATINT_HTOVS(x) htovs(x) -# define NATINT_HTOVL(x) htovl(x) -# define NATINT_HTONS(x) htons(x) -# define NATINT_HTONL(x) htonl(x) -#endif - -#ifndef OFF16 -# define OFF16(p) (char*)(p) -# define OFF32(p) (char*)(p) +static const char natstr[] = "sSiIlLjJ"; #endif -#ifndef OFF16B -# define OFF16B(p) (char*)(p) -# define OFF32B(p) (char*)(p) -#endif - -#define define_swapx(x, xtype) \ -static xtype \ -TOKEN_PASTE(swap,x)(z) \ - xtype z; \ -{ \ - xtype r; \ - xtype *zp; \ - unsigned char *s, *t; \ - int i; \ - \ - zp = xmalloc(sizeof(xtype)); \ - *zp = z; \ - s = (unsigned char*)zp; \ - t = xmalloc(sizeof(xtype)); \ - for (i=0; i<sizeof(xtype); i++) { \ - t[sizeof(xtype)-i-1] = s[i]; \ - } \ - r = *(xtype *)t; \ - free(t); \ - free(zp); \ - return r; \ -} +static const char endstr[] = "sSiIlLqQjJ"; -#ifndef swap16 -#define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF)) -#endif -#if SIZEOF_SHORT == 2 -#define swaps(x) swap16(x) -#else -#if SIZEOF_SHORT == 4 -#define swaps(x) ((((x)&0xFF)<<24) \ - |(((x)>>24)&0xFF) \ - |(((x)&0x0000FF00)<<8) \ - |(((x)&0x00FF0000)>>8) ) +#ifdef HAVE_TRUE_LONG_LONG +/* It is intentional to use long long instead of LONG_LONG. */ +# define NATINT_LEN_Q NATINT_LEN(long long, 8) #else -define_swapx(s,short) -#endif +# define NATINT_LEN_Q 8 #endif -#ifndef swap32 -#define swap32(x) ((((x)&0xFF)<<24) \ - |(((x)>>24)&0xFF) \ - |(((x)&0x0000FF00)<<8) \ - |(((x)&0x00FF0000)>>8) ) -#endif -#if SIZEOF_LONG == 4 -#define swapl(x) swap32(x) -#else -#if SIZEOF_LONG == 8 -#define swapl(x) ((((x)&0x00000000000000FF)<<56) \ - |(((x)&0xFF00000000000000)>>56) \ - |(((x)&0x000000000000FF00)<<40) \ - |(((x)&0x00FF000000000000)>>40) \ - |(((x)&0x0000000000FF0000)<<24) \ - |(((x)&0x0000FF0000000000)>>24) \ - |(((x)&0x00000000FF000000)<<8) \ - |(((x)&0x000000FF00000000)>>8)) -#else -define_swapx(l,long) -#endif +#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8) +# define NATINT_PACK #endif -#if SIZEOF_FLOAT == 4 -#if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */ -#define swapf(x) swapl(x) -#define FLOAT_SWAPPER unsigned long -#else -#if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */ -#define swapf(x) swaps(x) -#define FLOAT_SWAPPER unsigned short -#else /* SIZEOF_FLOAT == 4 but undivide by known size of int */ -define_swapx(f,float) -#endif /* #if SIZEOF_SHORT == 4 */ -#endif /* #if SIZEOF_LONG == 4 */ -#else /* SIZEOF_FLOAT != 4 */ -define_swapx(f,float) -#endif /* #if SIZEOF_FLOAT == 4 */ - -#if SIZEOF_DOUBLE == 8 -#if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */ -#define swapd(x) swapl(x) -#define DOUBLE_SWAPPER unsigned long -#else -#if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */ -static double -swapd(d) - const double d; -{ - double dtmp = d; - unsigned long utmp[2]; - unsigned long utmp0; - - utmp[0] = 0; utmp[1] = 0; - memcpy(utmp,&dtmp,sizeof(double)); - utmp0 = utmp[0]; - utmp[0] = swapl(utmp[1]); - utmp[1] = swapl(utmp0); - memcpy(&dtmp,utmp,sizeof(double)); - return dtmp; -} -#else -#if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */ -static double -swapd(d) - const double d; -{ - double dtmp = d; - unsigned short utmp[2]; - unsigned short utmp0; - - utmp[0] = 0; utmp[1] = 0; - memcpy(utmp,&dtmp,sizeof(double)); - utmp0 = utmp[0]; - utmp[0] = swaps(utmp[1]); - utmp[1] = swaps(utmp0); - memcpy(&dtmp,utmp,sizeof(double)); - return dtmp; -} -#else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */ -define_swapx(d, double) -#endif /* #if SIZEOF_SHORT == 4 */ -#endif /* #if SIZEOF_LONG == 4 */ -#endif /* #if SIZEOF_LONG == 8 */ -#else /* SIZEOF_DOUBLE != 8 */ -define_swapx(d, double) -#endif /* #if SIZEOF_DOUBLE == 8 */ - -#undef define_swapx - #ifdef DYNAMIC_ENDIAN -#ifdef ntohs -#undef ntohs -#undef ntohl -#undef htons -#undef htonl -#endif -static int -endian() -{ - static int init = 0; - static int endian_value; - char *p; - - if (init) return endian_value; - init = 1; - p = (char*)&init; - return endian_value = p[0]?0:1; -} - -#define ntohs(x) (endian()?(x):swaps(x)) -#define ntohl(x) (endian()?(x):swapl(x)) -#define ntohf(x) (endian()?(x):swapf(x)) -#define ntohd(x) (endian()?(x):swapd(x)) -#define htons(x) (endian()?(x):swaps(x)) -#define htonl(x) (endian()?(x):swapl(x)) -#define htonf(x) (endian()?(x):swapf(x)) -#define htond(x) (endian()?(x):swapd(x)) -#define htovs(x) (endian()?swaps(x):(x)) -#define htovl(x) (endian()?swapl(x):(x)) -#define htovf(x) (endian()?swapf(x):(x)) -#define htovd(x) (endian()?swapd(x):(x)) -#define vtohs(x) (endian()?swaps(x):(x)) -#define vtohl(x) (endian()?swapl(x):(x)) -#define vtohf(x) (endian()?swapf(x):(x)) -#define vtohd(x) (endian()?swapd(x):(x)) -# ifdef NATINT_PACK -#define htov16(x) (endian()?swap16(x):(x)) -#define htov32(x) (endian()?swap32(x):(x)) -#define hton16(x) (endian()?(x):swap16(x)) -#define hton32(x) (endian()?(x):swap32(x)) -# endif + /* for universal binary of NEXTSTEP and MacOS X */ + /* useless since autoconf 2.63? */ + static int + is_bigendian(void) + { + static int init = 0; + static int endian_value; + char *p; + + if (init) return endian_value; + init = 1; + p = (char*)&init; + return endian_value = p[0]?0:1; + } +# define BIGENDIAN_P() (is_bigendian()) +#elif defined(WORDS_BIGENDIAN) +# define BIGENDIAN_P() 1 #else -#ifdef WORDS_BIGENDIAN -#ifndef ntohs -#define ntohs(x) (x) -#define ntohl(x) (x) -#define htons(x) (x) -#define htonl(x) (x) -#endif -#define ntohf(x) (x) -#define ntohd(x) (x) -#define htonf(x) (x) -#define htond(x) (x) -#define htovs(x) swaps(x) -#define htovl(x) swapl(x) -#define htovf(x) swapf(x) -#define htovd(x) swapd(x) -#define vtohs(x) swaps(x) -#define vtohl(x) swapl(x) -#define vtohf(x) swapf(x) -#define vtohd(x) swapd(x) -# ifdef NATINT_PACK -#define htov16(x) swap16(x) -#define htov32(x) swap32(x) -#define hton16(x) (x) -#define hton32(x) (x) -# endif -#else /* LITTLE ENDIAN */ -#ifdef ntohs -#undef ntohs -#undef ntohl -#undef htons -#undef htonl -#endif -#define ntohs(x) swaps(x) -#define ntohl(x) swapl(x) -#define htons(x) swaps(x) -#define htonl(x) swapl(x) -#define ntohf(x) swapf(x) -#define ntohd(x) swapd(x) -#define htonf(x) swapf(x) -#define htond(x) swapd(x) -#define htovs(x) (x) -#define htovl(x) (x) -#define htovf(x) (x) -#define htovd(x) (x) -#define vtohs(x) (x) -#define vtohl(x) (x) -#define vtohf(x) (x) -#define vtohd(x) (x) -# ifdef NATINT_PACK -#define htov16(x) (x) -#define htov32(x) (x) -#define hton16(x) swap16(x) -#define hton32(x) swap32(x) -# endif -#endif +# define BIGENDIAN_P() 0 #endif -#ifdef FLOAT_SWAPPER -#define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; -#define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = htonf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = htovf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = ntohf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = vtohf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#else -#define FLOAT_CONVWITH(y) -#define HTONF(x,y) htonf(x) -#define HTOVF(x,y) htovf(x) -#define NTOHF(x,y) ntohf(x) -#define VTOHF(x,y) vtohf(x) -#endif - -#ifdef DOUBLE_SWAPPER -#define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; -#define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = htond((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = htovd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = ntohd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = vtohd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) +#ifdef NATINT_PACK +# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) #else -#define DOUBLE_CONVWITH(y) -#define HTOND(x,y) htond(x) -#define HTOVD(x,y) htovd(x) -#define NTOHD(x,y) ntohd(x) -#define VTOHD(x,y) vtohd(x) +# define NATINT_LEN(type,len) ((int)sizeof(type)) #endif -unsigned long rb_big2ulong_pack _((VALUE x)); +typedef union { + float f; + uint32_t u; + char buf[4]; +} FLOAT_SWAPPER; +typedef union { + double d; + uint64_t u; + char buf[8]; +} DOUBLE_SWAPPER; +#define swapf(x) swap32(x) +#define swapd(x) swap64(x) + +#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) +#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) +#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) +#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) +#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) +#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) +#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) +#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) + +#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x; +#define HTONF(x) ((x).u = rb_htonf((x).u)) +#define HTOVF(x) ((x).u = rb_htovf((x).u)) +#define NTOHF(x) ((x).u = rb_ntohf((x).u)) +#define VTOHF(x) ((x).u = rb_vtohf((x).u)) + +#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x; +#define HTOND(x) ((x).u = rb_htond((x).u)) +#define HTOVD(x) ((x).u = rb_htovd((x).u)) +#define NTOHD(x) ((x).u = rb_ntohd((x).u)) +#define VTOHD(x) ((x).u = rb_vtohd((x).u)) + +#define MAX_INTEGER_PACK_SIZE 8 -static unsigned long -num2i32(x) - VALUE x; -{ - x = rb_to_int(x); /* is nil OK? (should not) */ +static const char toofew[] = "too few arguments"; - if (FIXNUM_P(x)) return FIX2LONG(x); - if (TYPE(x) == T_BIGNUM) { - return rb_big2ulong_pack(x); - } - rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); - return 0; /* not reached */ -} +static void encodes(VALUE,const char*,long,int,int); +static void qpencode(VALUE,VALUE,long); -#if SIZEOF_LONG == SIZE32 -# define EXTEND32(x) -#else -/* invariant in modulo 1<<31 */ -# define EXTEND32(x) do { if (!natint) {(x) = (((1L<<31)-1-(x))^~(~0L<<31));}} while(0) -#endif -#if SIZEOF_SHORT == SIZE16 -# define EXTEND16(x) -#else -# define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15));}} while(0) -#endif +static unsigned long utf8_to_uv(const char*,long*); -#ifdef HAVE_LONG_LONG -# define QUAD_SIZE sizeof(LONG_LONG) -#else -# define QUAD_SIZE 8 -#endif -static const char toofew[] = "too few arguments"; +static ID id_associated; -static void encodes _((VALUE,char*,long,int)); -static void qpencode _((VALUE,VALUE,long)); +static void +str_associate(VALUE str, VALUE add) +{ + /* assert(NIL_P(rb_attr_get(str, id_associated))); */ + rb_ivar_set(str, id_associated, add); +} -static int uv_to_utf8 _((char*,unsigned long)); -static unsigned long utf8_to_uv _((char*,long*)); +static VALUE +str_associated(VALUE str) +{ + return rb_ivar_lookup(str, id_associated, Qfalse); +} /* * call-seq: - * arr.pack ( aTemplateString ) -> aBinaryString - * + * arr.pack( aTemplateString ) -> aBinaryString + * arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString + * * Packs the contents of <i>arr</i> into a binary sequence according to * the directives in <i>aTemplateString</i> (see the table below) * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, @@ -384,88 +139,168 @@ static unsigned long utf8_to_uv _((char*,long*)); * elements to convert. If the count is an asterisk * (``<code>*</code>''), all remaining array elements will be * converted. Any of the directives ``<code>sSiIlL</code>'' may be - * followed by an underscore (``<code>_</code>'') to use the underlying + * followed by an underscore (``<code>_</code>'') or + * exclamation mark (``<code>!</code>'') to use the underlying * platform's native size for the specified type; otherwise, they use a * platform-independent size. Spaces are ignored in the template * string. See also <code>String#unpack</code>. - * + * * a = [ "a", "b", "c" ] * n = [ 65, 66, 67 ] * a.pack("A3A3A3") #=> "a b c " * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" * n.pack("ccc") #=> "ABC" - * + * + * If <i>aBufferString</i> is specified and its capacity is enough, + * +pack+ uses it as the buffer and returns it. + * When the offset is specified by the beginning of <i>aTemplateString</i>, + * the result is filled after the offset. + * If original contents of <i>aBufferString</i> exists and it's longer than + * the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result. + * If it's shorter, the gap is filled with ``<code>\0</code>''. + * + * Note that ``buffer:'' option does not guarantee not to allocate memory + * in +pack+. If the capacity of <i>aBufferString</i> is not enough, + * +pack+ allocates memory. + * * Directives for +pack+. * - * Directive Meaning - * --------------------------------------------------------------- - * @ | Moves to absolute position - * A | ASCII string (space padded, count is width) - * a | ASCII string (null padded, count is width) - * B | Bit string (descending bit order) - * b | Bit string (ascending bit order) - * C | Unsigned char - * c | Char - * D, d | Double-precision float, native format - * E | Double-precision float, little-endian byte order - * e | Single-precision float, little-endian byte order - * F, f | Single-precision float, native format - * G | Double-precision float, network (big-endian) byte order - * g | Single-precision float, network (big-endian) byte order - * H | Hex string (high nibble first) - * h | Hex string (low nibble first) - * I | Unsigned integer - * i | Integer - * L | Unsigned long - * l | Long - * M | Quoted printable, MIME encoding (see RFC2045) - * m | Base64 encoded string - * N | Long, network (big-endian) byte order - * n | Short, network (big-endian) byte-order - * P | Pointer to a structure (fixed-length string) - * p | Pointer to a null-terminated string - * Q, q | 64-bit number - * S | Unsigned short - * s | Short - * U | UTF-8 - * u | UU-encoded string - * V | Long, little-endian byte order - * v | Short, little-endian byte order - * w | BER-compressed integer\fnm - * X | Back up a byte - * x | Null byte - * Z | Same as ``a'', except that null is added with * + * Integer | Array | + * Directive | Element | Meaning + * ---------------------------------------------------------------------------- + * C | Integer | 8-bit unsigned (unsigned char) + * S | Integer | 16-bit unsigned, native endian (uint16_t) + * L | Integer | 32-bit unsigned, native endian (uint32_t) + * Q | Integer | 64-bit unsigned, native endian (uint64_t) + * J | Integer | pointer width unsigned, native endian (uintptr_t) + * | | (J is available since Ruby 2.3.) + * | | + * c | Integer | 8-bit signed (signed char) + * s | Integer | 16-bit signed, native endian (int16_t) + * l | Integer | 32-bit signed, native endian (int32_t) + * q | Integer | 64-bit signed, native endian (int64_t) + * j | Integer | pointer width signed, native endian (intptr_t) + * | | (j is available since Ruby 2.3.) + * | | + * S_ S! | Integer | unsigned short, native endian + * I I_ I! | Integer | unsigned int, native endian + * L_ L! | Integer | unsigned long, native endian + * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * | | (Q_ and Q! is available since Ruby 2.1.) + * J! | Integer | uintptr_t, native endian (same with J) + * | | (J! is available since Ruby 2.3.) + * | | + * s_ s! | Integer | signed short, native endian + * i i_ i! | Integer | signed int, native endian + * l_ l! | Integer | signed long, native endian + * q_ q! | Integer | signed long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * | | (q_ and q! is available since Ruby 2.1.) + * j! | Integer | intptr_t, native endian (same with j) + * | | (j! is available since Ruby 2.3.) + * | | + * S> s> S!> s!> | Integer | same as the directives without ">" except + * L> l> L!> l!> | | big endian + * I!> i!> | | (available since Ruby 1.9.3) + * Q> q> Q!> q!> | | "S>" is same as "n" + * J> j> J!> j!> | | "L>" is same as "N" + * | | + * S< s< S!< s!< | Integer | same as the directives without "<" except + * L< l< L!< l!< | | little endian + * I!< i!< | | (available since Ruby 1.9.3) + * Q< q< Q!< q!< | | "S<" is same as "v" + * J< j< J!< j!< | | "L<" is same as "V" + * | | + * n | Integer | 16-bit unsigned, network (big-endian) byte order + * N | Integer | 32-bit unsigned, network (big-endian) byte order + * v | Integer | 16-bit unsigned, VAX (little-endian) byte order + * V | Integer | 32-bit unsigned, VAX (little-endian) byte order + * | | + * U | Integer | UTF-8 character + * w | Integer | BER-compressed integer + * + * Float | Array | + * Directive | Element | Meaning + * --------------------------------------------------------------------------- + * D d | Float | double-precision, native format + * F f | Float | single-precision, native format + * E | Float | double-precision, little-endian byte order + * e | Float | single-precision, little-endian byte order + * G | Float | double-precision, network (big-endian) byte order + * g | Float | single-precision, network (big-endian) byte order + * + * String | Array | + * Directive | Element | Meaning + * --------------------------------------------------------------------------- + * A | String | arbitrary binary string (space padded, count is width) + * a | String | arbitrary binary string (null padded, count is width) + * Z | String | same as ``a'', except that null is added with * + * B | String | bit string (MSB first) + * b | String | bit string (LSB first) + * H | String | hex string (high nibble first) + * h | String | hex string (low nibble first) + * u | String | UU-encoded string + * M | String | quoted printable, MIME encoding (see RFC2045) + * m | String | base64 encoded string (see RFC 2045, count is width) + * | | (if count is 0, no line feed are added, see RFC 4648) + * P | String | pointer to a structure (fixed-length string) + * p | String | pointer to a null-terminated string + * + * Misc. | Array | + * Directive | Element | Meaning + * --------------------------------------------------------------------------- + * @ | --- | moves to absolute position + * X | --- | back up a byte + * x | --- | null byte */ static VALUE -pack_pack(ary, fmt) - VALUE ary, fmt; +pack_pack(int argc, VALUE *argv, VALUE ary) { - static char *nul10 = "\0\0\0\0\0\0\0\0\0\0"; - static char *spc10 = " "; - char *p, *pend; - VALUE res, from, associates = 0; + static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; + static const char spc10[] = " "; + const char *p, *pend; + VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0; char type; - long items, len, idx, plen; - char *ptr; + long len, idx, plen; + const char *ptr; + int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ #ifdef NATINT_PACK int natint; /* native integer */ #endif + int integer_size, bigendian_p; + + rb_scan_args(argc, argv, "10:", &fmt, &opt); StringValue(fmt); - p = RSTRING(fmt)->ptr; - pend = p + RSTRING(fmt)->len; - res = rb_str_buf_new(0); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); + if (!NIL_P(opt)) { + static ID keyword_ids[1]; + if (!keyword_ids[0]) + CONST_ID(keyword_ids[0], "buffer"); + + rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer); + + if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING)) + rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); + } + if (buffer) + res = buffer; + else + res = rb_str_buf_new(0); - items = RARRAY(ary)->len; idx = 0; #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) -#define THISFROM (items > 0 ? RARRAY(ary)->ptr[idx] : TOO_FEW) -#define NEXTFROM (items-- > 0 ? RARRAY(ary)->ptr[idx++] : TOO_FEW) +#define MORE_ITEM (idx < RARRAY_LEN(ary)) +#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW) +#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) while (p < pend) { - if (RSTRING(fmt)->ptr + RSTRING(fmt)->len != pend) { + int explicit_endian = 0; + if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { rb_raise(rb_eRuntimeError, "format string modified"); } type = *p++; /* get data type */ @@ -480,31 +315,67 @@ pack_pack(ary, fmt) } continue; } - if (*p == '_' || *p == '!') { - const char *natstr = "sSiIlL"; - if (strchr(natstr, type)) { + { + modifiers: + switch (*p) { + case '_': + case '!': + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; } } + if (*p == '*') { /* set data length */ - len = strchr("@Xxu", type) ? 0 : items; + len = strchr("@Xxu", type) ? 0 + : strchr("PMm", type) ? 1 + : RARRAY_LEN(ary) - idx; p++; } else if (ISDIGIT(*p)) { - len = strtoul(p, (char**)&p, 10); + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } } else { len = 1; } switch (type) { + case 'U': + /* if encoding is US-ASCII, upgrade to UTF-8 */ + if (enc_info == 1) enc_info = 2; + break; + case 'm': case 'M': case 'u': + /* keep US-ASCII (do nothing) */ + break; + default: + /* fall back to BINARY */ + enc_info = 0; + break; + } + switch (type) { case 'A': case 'a': case 'Z': case 'B': case 'b': case 'H': case 'h': @@ -515,8 +386,8 @@ pack_pack(ary, fmt) } else { StringValue(from); - ptr = RSTRING(from)->ptr; - plen = RSTRING(from)->len; + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); OBJ_INFECT(res, from); } @@ -525,8 +396,8 @@ pack_pack(ary, fmt) switch (type) { case 'a': /* arbitrary binary string (null padded) */ - case 'A': /* ASCII string (space padded) */ - case 'Z': /* null terminated ASCII string */ + case 'A': /* arbitrary binary string (ASCII space padded) */ + case 'Z': /* null terminated string */ if (plen >= len) { rb_str_buf_cat(res, ptr, len); if (p[-1] == '*' && type == 'Z') @@ -543,6 +414,8 @@ pack_pack(ary, fmt) } break; +#define castchar(from) (char)((from) & 0xff) + case 'b': /* bit string (ascending) */ { int byte = 0; @@ -558,7 +431,7 @@ pack_pack(ary, fmt) if (i & 7) byte >>= 1; else { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } @@ -566,7 +439,7 @@ pack_pack(ary, fmt) if (len & 7) { char c; byte >>= 7 - (len & 7); - c = byte & 0xff; + c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; @@ -588,7 +461,7 @@ pack_pack(ary, fmt) if (i & 7) byte <<= 1; else { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } @@ -596,7 +469,7 @@ pack_pack(ary, fmt) if (len & 7) { char c; byte <<= 7 - (len & 7); - c = byte & 0xff; + c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; @@ -610,7 +483,7 @@ pack_pack(ary, fmt) long i, j = 0; if (len > plen) { - j = (len - plen + 1)/2; + j = (len + 1) / 2 - (plen + 1) / 2; len = plen; } for (i=0; i++ < len; ptr++) { @@ -621,13 +494,13 @@ pack_pack(ary, fmt) if (i & 1) byte >>= 4; else { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 1) { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; @@ -641,7 +514,7 @@ pack_pack(ary, fmt) long i, j = 0; if (len > plen) { - j = (len - plen + 1)/2; + j = (len + 1) / 2 - (plen + 1) / 2; len = plen; } for (i=0; i++ < len; ptr++) { @@ -652,13 +525,13 @@ pack_pack(ary, fmt) if (i & 1) byte <<= 4; else { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); byte = 0; } } if (len & 1) { - char c = byte & 0xff; + char c = castchar(byte); rb_str_buf_cat(res, &c, 1); } len = j; @@ -670,101 +543,95 @@ pack_pack(ary, fmt) case 'c': /* signed char */ case 'C': /* unsigned char */ - while (len-- > 0) { - char c; - - from = NEXTFROM; - c = num2i32(from); - rb_str_buf_cat(res, &c, sizeof(char)); - } - break; - - case 's': /* signed short */ - case 'S': /* unsigned short */ - while (len-- > 0) { - short s; - - from = NEXTFROM; - s = num2i32(from); - rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); - } - break; - - case 'i': /* signed int */ - case 'I': /* unsigned int */ - while (len-- > 0) { - long i; - - from = NEXTFROM; - i = num2i32(from); - rb_str_buf_cat(res, OFF32(&i), NATINT_LEN(int,4)); - } - break; - - case 'l': /* signed long */ - case 'L': /* unsigned long */ - while (len-- > 0) { - long l; - - from = NEXTFROM; - l = num2i32(from); - rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); - } - break; - - case 'q': /* signed quad (64bit) int */ - case 'Q': /* unsigned quad (64bit) int */ - while (len-- > 0) { - char tmp[QUAD_SIZE]; - - from = NEXTFROM; - rb_quad_pack(tmp, from); - rb_str_buf_cat(res, (char*)&tmp, QUAD_SIZE); - } - break; - - case 'n': /* unsigned short (network byte-order) */ - while (len-- > 0) { - unsigned short s; - - from = NEXTFROM; - s = num2i32(from); - s = NATINT_HTONS(s); - rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); - } - break; - - case 'N': /* unsigned long (network byte-order) */ - while (len-- > 0) { - unsigned long l; - - from = NEXTFROM; - l = num2i32(from); - l = NATINT_HTONL(l); - rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); - } - break; - - case 'v': /* unsigned short (VAX byte-order) */ - while (len-- > 0) { - unsigned short s; - - from = NEXTFROM; - s = num2i32(from); - s = NATINT_HTOVS(s); - rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); - } - break; - - case 'V': /* unsigned long (VAX byte-order) */ - while (len-- > 0) { - unsigned long l; - - from = NEXTFROM; - l = num2i32(from); - l = NATINT_HTOVL(l); - rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); - } + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto pack_integer; + + case 's': /* s for int16_t, s! for signed short */ + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'S': /* S for uint16_t, S! for unsigned short */ + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'i': /* i and i! for signed int */ + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'I': /* I and I! for unsigned int */ + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'l': /* l for int32_t, l! for signed long */ + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'L': /* L for uint32_t, L! for unsigned long */ + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'q': /* q for int64_t, q! for signed long long */ + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'Q': /* Q for uint64_t, Q! for unsigned long long */ + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'j': /* j for intptr_t */ + integer_size = sizeof(intptr_t); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'J': /* J for uintptr_t */ + integer_size = sizeof(uintptr_t); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ + integer_size = 2; + bigendian_p = 1; + goto pack_integer; + + case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ + integer_size = 4; + bigendian_p = 1; + goto pack_integer; + + case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ + integer_size = 2; + bigendian_p = 0; + goto pack_integer; + + case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ + integer_size = 4; + bigendian_p = 0; + goto pack_integer; + + pack_integer: + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } + if (integer_size > MAX_INTEGER_PACK_SIZE) + rb_bug("unexpected intger size for pack: %d", integer_size); + while (len-- > 0) { + char intbuf[MAX_INTEGER_PACK_SIZE]; + + from = NEXTFROM; + rb_integer_pack(from, intbuf, integer_size, 1, 0, + INTEGER_PACK_2COMP | + (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN)); + rb_str_buf_cat(res, intbuf, integer_size); + } break; case 'f': /* single precision float in native format */ @@ -773,32 +640,29 @@ pack_pack(ary, fmt) float f; from = NEXTFROM; - f = RFLOAT(rb_Float(from))->value; + f = (float)RFLOAT_VALUE(rb_to_float(from)); rb_str_buf_cat(res, (char*)&f, sizeof(float)); } break; case 'e': /* single precision float in VAX byte-order */ while (len-- > 0) { - float f; - FLOAT_CONVWITH(ftmp); + FLOAT_CONVWITH(tmp); from = NEXTFROM; - f = RFLOAT(rb_Float(from))->value; - f = HTOVF(f,ftmp); - rb_str_buf_cat(res, (char*)&f, sizeof(float)); + tmp.f = (float)RFLOAT_VALUE(rb_to_float(from)); + HTOVF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); } break; case 'E': /* double precision float in VAX byte-order */ while (len-- > 0) { - double d; - DOUBLE_CONVWITH(dtmp); - + DOUBLE_CONVWITH(tmp); from = NEXTFROM; - d = RFLOAT(rb_Float(from))->value; - d = HTOVD(d,dtmp); - rb_str_buf_cat(res, (char*)&d, sizeof(double)); + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOVD(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); } break; @@ -808,32 +672,29 @@ pack_pack(ary, fmt) double d; from = NEXTFROM; - d = RFLOAT(rb_Float(from))->value; + d = RFLOAT_VALUE(rb_to_float(from)); rb_str_buf_cat(res, (char*)&d, sizeof(double)); } break; case 'g': /* single precision float in network byte-order */ while (len-- > 0) { - float f; - FLOAT_CONVWITH(ftmp); - + FLOAT_CONVWITH(tmp); from = NEXTFROM; - f = RFLOAT(rb_Float(from))->value; - f = HTONF(f,ftmp); - rb_str_buf_cat(res, (char*)&f, sizeof(float)); + tmp.f = (float)RFLOAT_VALUE(rb_to_float(from)); + HTONF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); } break; case 'G': /* double precision float in network byte-order */ while (len-- > 0) { - double d; - DOUBLE_CONVWITH(dtmp); + DOUBLE_CONVWITH(tmp); from = NEXTFROM; - d = RFLOAT(rb_Float(from))->value; - d = HTOND(d,dtmp); - rb_str_buf_cat(res, (char*)&d, sizeof(double)); + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOND(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); } break; @@ -848,15 +709,14 @@ pack_pack(ary, fmt) case 'X': /* back up byte */ shrink: - plen = RSTRING(res)->len; + plen = RSTRING_LEN(res); if (plen < len) rb_raise(rb_eArgError, "X outside of string"); - RSTRING(res)->len = plen - len; - RSTRING(res)->ptr[plen - len] = '\0'; + rb_str_set_len(res, plen - len); break; case '@': /* null fill to absolute position */ - len -= RSTRING(res)->len; + len -= RSTRING_LEN(res); if (len > 0) goto grow; len = -len; if (len > 0) goto shrink; @@ -868,17 +728,17 @@ pack_pack(ary, fmt) case 'U': /* Unicode character */ while (len-- > 0) { - long l; + SIGNED_VALUE l; char buf[8]; int le; from = NEXTFROM; from = rb_to_int(from); - l = NUM2INT(from); + l = NUM2LONG(from); if (l < 0) { rb_raise(rb_eRangeError, "pack(U): value out of range"); } - le = uv_to_utf8(buf, l); + le = rb_uv_to_utf8(buf, l); rb_str_buf_cat(res, (char*)buf, le); } break; @@ -887,11 +747,19 @@ pack_pack(ary, fmt) case 'm': /* base64 encoded string */ from = NEXTFROM; StringValue(from); - ptr = RSTRING(from)->ptr; - plen = RSTRING(from)->len; + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + OBJ_INFECT(res, from); + if (len == 0 && type == 'm') { + encodes(res, ptr, plen, type, 0); + ptr += plen; + break; + } if (len <= 2) len = 45; + else if (len > 63 && type == 'u') + len = 63; else len = len / 3 * 3; while (plen > 0) { @@ -901,7 +769,7 @@ pack_pack(ary, fmt) todo = len; else todo = plen; - encodes(res, ptr, todo, type); + encodes(res, ptr, todo, type, 1); plen -= todo; ptr += todo; } @@ -909,6 +777,7 @@ pack_pack(ary, fmt) case 'M': /* quoted-printable encoded string */ from = rb_obj_as_string(NEXTFROM); + OBJ_INFECT(res, from); if (len <= 1) len = 72; qpencode(res, from, len); @@ -918,9 +787,9 @@ pack_pack(ary, fmt) from = THISFROM; if (!NIL_P(from)) { StringValue(from); - if (RSTRING(from)->len < len) { + if (RSTRING_LEN(from) < len) { rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", - RSTRING(from)->len, len); + RSTRING_LEN(from), len); } } len = 1; @@ -934,108 +803,120 @@ pack_pack(ary, fmt) } else { t = StringValuePtr(from); + OBJ_INFECT(res, from); + rb_obj_taint(from); } if (!associates) { associates = rb_ary_new(); } rb_ary_push(associates, from); - rb_obj_taint(from); rb_str_buf_cat(res, (char*)&t, sizeof(char*)); } break; case 'w': /* BER compressed integer */ while (len-- > 0) { - unsigned long ul; VALUE buf = rb_str_new(0, 0); - char c, *bufs, *bufe; + size_t numbytes; + int sign; + char *cp; from = NEXTFROM; - if (TYPE(from) == T_BIGNUM) { - VALUE big128 = rb_uint2big(128); - while (TYPE(from) == T_BIGNUM) { - from = rb_big_divmod(from, big128); - c = NUM2INT(RARRAY(from)->ptr[1]) | 0x80; /* mod */ - rb_str_buf_cat(buf, &c, sizeof(char)); - from = RARRAY(from)->ptr[0]; /* div */ - } - } + from = rb_to_int(from); + numbytes = rb_absint_numwords(from, 7, NULL); + if (numbytes == 0) + numbytes = 1; + buf = rb_str_new(NULL, numbytes); - { - long l = NUM2LONG(from); - if (l < 0) { - rb_raise(rb_eArgError, "can't compress negative numbers"); - } - ul = l; - } + sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN); - while (ul) { - c = ((ul & 0x7f) | 0x80); - rb_str_buf_cat(buf, &c, sizeof(char)); - ul >>= 7; - } + if (sign < 0) + rb_raise(rb_eArgError, "can't compress negative numbers"); + if (sign == 2) + rb_bug("buffer size problem?"); - if (RSTRING(buf)->len) { - bufs = RSTRING(buf)->ptr; - bufe = bufs + RSTRING(buf)->len - 1; - *bufs &= 0x7f; /* clear continue bit */ - while (bufs < bufe) { /* reverse */ - c = *bufs; - *bufs++ = *bufe; - *bufe-- = c; - } - rb_str_buf_cat(res, RSTRING(buf)->ptr, RSTRING(buf)->len); - } - else { - c = 0; - rb_str_buf_cat(res, &c, sizeof(char)); - } + cp = RSTRING_PTR(buf); + while (1 < numbytes) { + *cp |= 0x80; + cp++; + numbytes--; + } + + rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); } break; - default: + default: { + char unknown[5]; + if (ISPRINT(type)) { + unknown[0] = type; + unknown[1] = '\0'; + } + else { + snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); + } + rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'", + unknown, fmt); break; + } } } if (associates) { - rb_str_associate(res, associates); + str_associate(res, associates); + } + OBJ_INFECT(res, fmt); + switch (enc_info) { + case 1: + ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + break; + case 2: + rb_enc_set_index(res, rb_utf8_encindex()); + break; + default: + /* do nothing, keep ASCII-8BIT */ + break; } return res; } -static char uu_table[] = +static const char uu_table[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; -static char b64_table[] = +static const char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static void -encodes(str, s, len, type) - VALUE str; - char *s; - long len; - int type; +encodes(VALUE str, const char *s0, long len, int type, int tail_lf) { - char *buff = ALLOCA_N(char, len * 4 / 3 + 6); + enum {buff_size = 4096, encoded_unit = 4, input_unit = 3}; + char buff[buff_size + 1]; /* +1 for tail_lf */ long i = 0; - char *trans = type == 'u' ? uu_table : b64_table; - int padding; + const char *const trans = type == 'u' ? uu_table : b64_table; + char padding; + const unsigned char *s = (const unsigned char *)s0; if (type == 'u') { - buff[i++] = len + ' '; + buff[i++] = (char)len + ' '; padding = '`'; } else { padding = '='; } - while (len >= 3) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; - buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; - buff[i++] = trans[077 & s[2]]; - s += 3; - len -= 3; + while (len >= input_unit) { + while (len >= input_unit && buff_size-i >= encoded_unit) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; + buff[i++] = trans[077 & s[2]]; + s += input_unit; + len -= input_unit; + } + if (buff_size-i < encoded_unit) { + rb_str_buf_cat(str, buff, i); + i = 0; + } } + if (len == 2) { buff[i++] = trans[077 & (*s >> 2)]; buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; @@ -1048,21 +929,20 @@ encodes(str, s, len, type) buff[i++] = padding; buff[i++] = padding; } - buff[i++] = '\n'; + if (tail_lf) buff[i++] = '\n'; rb_str_buf_cat(str, buff, i); + if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun"); } -static char hex_table[] = "0123456789ABCDEF"; +static const char hex_table[] = "0123456789ABCDEF"; static void -qpencode(str, from, len) - VALUE str, from; - long len; +qpencode(VALUE str, VALUE from, long len) { char buff[1024]; long i = 0, n = 0, prev = EOF; - unsigned char *s = (unsigned char*)RSTRING(from)->ptr; - unsigned char *send = s + RSTRING(from)->len; + unsigned char *s = (unsigned char*)RSTRING_PTR(from); + unsigned char *send = s + RSTRING_LEN(from); while (s < send) { if ((*s > 126) || @@ -1110,220 +990,92 @@ qpencode(str, from, len) } static inline int -hex2num(c) - char c; +hex2num(char c) { - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return c - '0'; - case 'a': case 'b': case 'c': - case 'd': case 'e': case 'f': - return c - 'a' + 10; - case 'A': case 'B': case 'C': - case 'D': case 'E': case 'F': - return c - 'A' + 10; - default: - return -1; - } + int n; + n = ruby_digit36_to_number_table[(unsigned char)c]; + if (16 <= n) + n = -1; + return n; } #define PACK_LENGTH_ADJUST_SIZE(sz) do { \ - tmp = 0; \ - if (len > (send-s)/sz) { \ + tmp_len = 0; \ + if (len > (long)((send-s)/(sz))) { \ if (!star) { \ - tmp = len-(send-s)/sz; \ + tmp_len = len-(send-s)/(sz); \ } \ - len = (send-s)/sz; \ + len = (send-s)/(sz); \ } \ } while (0) -#ifdef NATINT_PACK -#define PACK_LENGTH_ADJUST(type,sz) do { \ - int t__len = NATINT_LEN(type,(sz)); \ - PACK_LENGTH_ADJUST_SIZE(t__len); \ +#define PACK_ITEM_ADJUST() do { \ + if (tmp_len > 0 && mode == UNPACK_ARRAY) \ + rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ } while (0) + +/* Workaround for Oracle Developer Studio (Oracle Solaris Studio) + * 12.4/12.5/12.6 C compiler optimization bug + * with "-xO4" optimization option. + */ +#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150 +# define AVOID_CC_BUG volatile #else -#define PACK_LENGTH_ADJUST(type,sz) \ - PACK_LENGTH_ADJUST_SIZE(sizeof(type)) +# define AVOID_CC_BUG #endif -#define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil) - static VALUE -infected_str_new(ptr, len, str) - const char *ptr; - long len; - VALUE str; +infected_str_new(const char *ptr, long len, VALUE str) { VALUE s = rb_str_new(ptr, len); OBJ_INFECT(s, str); return s; } - -/* - * call-seq: - * str.unpack(format) => anArray - * - * Decodes <i>str</i> (which may contain binary data) according to the - * format string, returning an array of each value extracted. The - * format string consists of a sequence of single-character directives, - * summarized in the table at the end of this entry. - * Each directive may be followed - * by a number, indicating the number of times to repeat with this - * directive. An asterisk (``<code>*</code>'') will use up all - * remaining elements. The directives <code>sSiIlL</code> may each be - * followed by an underscore (``<code>_</code>'') to use the underlying - * platform's native size for the specified type; otherwise, it uses a - * platform-independent consistent size. Spaces are ignored in the - * format string. See also <code>Array#pack</code>. - * - * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] - * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] - * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] - * "aa".unpack('b8B8') #=> ["10000110", "01100001"] - * "aaa".unpack('h2H2c') #=> ["16", "61", 97] - * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] - * "now=20is".unpack('M*') #=> ["now is"] - * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] - * - * This table summarizes the various formats and the Ruby classes - * returned by each. - * - * Format | Returns | Function - * -------+---------+----------------------------------------- - * A | String | with trailing nulls and spaces removed - * -------+---------+----------------------------------------- - * a | String | string - * -------+---------+----------------------------------------- - * B | String | extract bits from each character (msb first) - * -------+---------+----------------------------------------- - * b | String | extract bits from each character (lsb first) - * -------+---------+----------------------------------------- - * C | Fixnum | extract a character as an unsigned integer - * -------+---------+----------------------------------------- - * c | Fixnum | extract a character as an integer - * -------+---------+----------------------------------------- - * d,D | Float | treat sizeof(double) characters as - * | | a native double - * -------+---------+----------------------------------------- - * E | Float | treat sizeof(double) characters as - * | | a double in little-endian byte order - * -------+---------+----------------------------------------- - * e | Float | treat sizeof(float) characters as - * | | a float in little-endian byte order - * -------+---------+----------------------------------------- - * f,F | Float | treat sizeof(float) characters as - * | | a native float - * -------+---------+----------------------------------------- - * G | Float | treat sizeof(double) characters as - * | | a double in network byte order - * -------+---------+----------------------------------------- - * g | Float | treat sizeof(float) characters as a - * | | float in network byte order - * -------+---------+----------------------------------------- - * H | String | extract hex nibbles from each character - * | | (most significant first) - * -------+---------+----------------------------------------- - * h | String | extract hex nibbles from each character - * | | (least significant first) - * -------+---------+----------------------------------------- - * I | Integer | treat sizeof(int) (modified by _) - * | | successive characters as an unsigned - * | | native integer - * -------+---------+----------------------------------------- - * i | Integer | treat sizeof(int) (modified by _) - * | | successive characters as a signed - * | | native integer - * -------+---------+----------------------------------------- - * L | Integer | treat four (modified by _) successive - * | | characters as an unsigned native - * | | long integer - * -------+---------+----------------------------------------- - * l | Integer | treat four (modified by _) successive - * | | characters as a signed native - * | | long integer - * -------+---------+----------------------------------------- - * M | String | quoted-printable - * -------+---------+----------------------------------------- - * m | String | base64-encoded - * -------+---------+----------------------------------------- - * N | Integer | treat four characters as an unsigned - * | | long in network byte order - * -------+---------+----------------------------------------- - * n | Fixnum | treat two characters as an unsigned - * | | short in network byte order - * -------+---------+----------------------------------------- - * P | String | treat sizeof(char *) characters as a - * | | pointer, and return \emph{len} characters - * | | from the referenced location - * -------+---------+----------------------------------------- - * p | String | treat sizeof(char *) characters as a - * | | pointer to a null-terminated string - * -------+---------+----------------------------------------- - * Q | Integer | treat 8 characters as an unsigned - * | | quad word (64 bits) - * -------+---------+----------------------------------------- - * q | Integer | treat 8 characters as a signed - * | | quad word (64 bits) - * -------+---------+----------------------------------------- - * S | Fixnum | treat two (different if _ used) - * | | successive characters as an unsigned - * | | short in native byte order - * -------+---------+----------------------------------------- - * s | Fixnum | Treat two (different if _ used) - * | | successive characters as a signed short - * | | in native byte order - * -------+---------+----------------------------------------- - * U | Integer | UTF-8 characters as unsigned integers - * -------+---------+----------------------------------------- - * u | String | UU-encoded - * -------+---------+----------------------------------------- - * V | Fixnum | treat four characters as an unsigned - * | | long in little-endian byte order - * -------+---------+----------------------------------------- - * v | Fixnum | treat two characters as an unsigned - * | | short in little-endian byte order - * -------+---------+----------------------------------------- - * w | Integer | BER-compressed integer (see Array.pack) - * -------+---------+----------------------------------------- - * X | --- | skip backward one character - * -------+---------+----------------------------------------- - * x | --- | skip forward one character - * -------+---------+----------------------------------------- - * Z | String | with trailing nulls removed - * | | upto first null with * - * -------+---------+----------------------------------------- - * @ | --- | skip to the offset given by the - * | | length argument - * -------+---------+----------------------------------------- - */ + +/* unpack mode */ +#define UNPACK_ARRAY 0 +#define UNPACK_BLOCK 1 +#define UNPACK_1 2 static VALUE -pack_unpack(str, fmt) - VALUE str, fmt; +pack_unpack_internal(VALUE str, VALUE fmt, int mode) { - static char *hexdigits = "0123456789abcdef0123456789ABCDEFx"; +#define hexdigits ruby_hexdigits char *s, *send; char *p, *pend; VALUE ary; char type; long len; - int tmp, star; + AVOID_CC_BUG long tmp_len; + int star; #ifdef NATINT_PACK int natint; /* native integer */ #endif + int signed_p, integer_size, bigendian_p; +#define UNPACK_PUSH(item) do {\ + VALUE item_val = (item);\ + if ((mode) == UNPACK_BLOCK) {\ + rb_yield(item_val);\ + }\ + else if ((mode) == UNPACK_ARRAY) {\ + rb_ary_push(ary, item_val);\ + }\ + else /* if ((mode) == UNPACK_1) { */ {\ + return item_val; \ + }\ + } while (0) StringValue(str); StringValue(fmt); - s = RSTRING(str)->ptr; - send = s + RSTRING(str)->len; - p = RSTRING(fmt)->ptr; - pend = p + RSTRING(fmt)->len; + s = RSTRING_PTR(str); + send = s + RSTRING_LEN(str); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); - ary = rb_ary_new(); + ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { + int explicit_endian = 0; type = *p++; #ifdef NATINT_PACK natint = 0; @@ -1336,20 +1088,38 @@ pack_unpack(str, fmt) } continue; } + star = 0; - if (*p == '_' || *p == '!') { - char *natstr = "sSiIlL"; + { + modifiers: + switch (*p) { + case '_': + case '!': - if (strchr(natstr, type)) { + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; } } + if (p >= pend) len = 1; else if (*p == '*') { @@ -1358,7 +1128,11 @@ pack_unpack(str, fmt) p++; } else if (ISDIGIT(*p)) { - len = strtoul(p, (char**)&p, 10); + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (len < 0 || errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } } else { len = (type != '@'); @@ -1379,7 +1153,7 @@ pack_unpack(str, fmt) if (*t != ' ' && *t != '\0') break; t--; len--; } - rb_ary_push(ary, infected_str_new(s, len, str)); + UNPACK_PUSH(infected_str_new(s, len, str)); s += end; } break; @@ -1390,7 +1164,7 @@ pack_unpack(str, fmt) if (len > send-s) len = send-s; while (t < s+len && *t) t++; - rb_ary_push(ary, infected_str_new(s, t-s, str)); + UNPACK_PUSH(infected_str_new(s, t-s, str)); if (t < send) t++; s = star ? t : s+len; } @@ -1398,11 +1172,10 @@ pack_unpack(str, fmt) case 'a': if (len > send - s) len = send - s; - rb_ary_push(ary, infected_str_new(s, len, str)); + UNPACK_PUSH(infected_str_new(s, len, str)); s += len; break; - case 'b': { VALUE bitstr; @@ -1413,13 +1186,15 @@ pack_unpack(str, fmt) if (p[-1] == '*' || len > (send - s) * 8) len = (send - s) * 8; bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; + bitstr = rb_usascii_str_new(0, len); + OBJ_INFECT(bitstr, str); + t = RSTRING_PTR(bitstr); for (i=0; i<len; i++) { if (i & 7) bits >>= 1; - else bits = *s++; + else bits = (unsigned char)*s++; *t++ = (bits & 1) ? '1' : '0'; } + UNPACK_PUSH(bitstr); } break; @@ -1433,13 +1208,15 @@ pack_unpack(str, fmt) if (p[-1] == '*' || len > (send - s) * 8) len = (send - s) * 8; bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; + bitstr = rb_usascii_str_new(0, len); + OBJ_INFECT(bitstr, str); + t = RSTRING_PTR(bitstr); for (i=0; i<len; i++) { if (i & 7) bits <<= 1; - else bits = *s++; + else bits = (unsigned char)*s++; *t++ = (bits & 128) ? '1' : '0'; } + UNPACK_PUSH(bitstr); } break; @@ -1453,15 +1230,17 @@ pack_unpack(str, fmt) if (p[-1] == '*' || len > (send - s) * 2) len = (send - s) * 2; bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; + bitstr = rb_usascii_str_new(0, len); + OBJ_INFECT(bitstr, str); + t = RSTRING_PTR(bitstr); for (i=0; i<len; i++) { if (i & 1) bits >>= 4; else - bits = *s++; + bits = (unsigned char)*s++; *t++ = hexdigits[bits & 15]; } + UNPACK_PUSH(bitstr); } break; @@ -1475,246 +1254,205 @@ pack_unpack(str, fmt) if (p[-1] == '*' || len > (send - s) * 2) len = (send - s) * 2; bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; + bitstr = rb_usascii_str_new(0, len); + OBJ_INFECT(bitstr, str); + t = RSTRING_PTR(bitstr); for (i=0; i<len; i++) { if (i & 1) bits <<= 4; else - bits = *s++; + bits = (unsigned char)*s++; *t++ = hexdigits[(bits >> 4) & 15]; } + UNPACK_PUSH(bitstr); } break; case 'c': - PACK_LENGTH_ADJUST(char,sizeof(char)); - while (len-- > 0) { - int c = *s++; - if (c > (char)127) c-=256; - rb_ary_push(ary, INT2FIX(c)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 1; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; case 'C': - PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char)); - while (len-- > 0) { - unsigned char c = *s++; - rb_ary_push(ary, INT2FIX(c)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; case 's': - PACK_LENGTH_ADJUST(short,2); - while (len-- > 0) { - short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(short,2)); - EXTEND16(tmp); - s += NATINT_LEN(short,2); - rb_ary_push(ary, INT2FIX(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 1; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'S': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, INT2FIX(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'i': - PACK_LENGTH_ADJUST(int,sizeof(int)); - while (len-- > 0) { - int tmp; - memcpy(&tmp, s, sizeof(int)); - s += sizeof(int); - rb_ary_push(ary, INT2NUM(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 1; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'I': - PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int)); - while (len-- > 0) { - unsigned int tmp; - memcpy(&tmp, s, sizeof(unsigned int)); - s += sizeof(unsigned int); - rb_ary_push(ary, UINT2NUM(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'l': - PACK_LENGTH_ADJUST(long,4); - while (len-- > 0) { - long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); - EXTEND32(tmp); - s += NATINT_LEN(long,4); - rb_ary_push(ary, LONG2NUM(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 1; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + case 'L': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(unsigned long,4)); - s += NATINT_LEN(unsigned long,4); - rb_ary_push(ary, ULONG2NUM(tmp)); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'q': - PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); - while (len-- > 0) { - char *tmp = (char*)s; - s += QUAD_SIZE; - rb_ary_push(ary, rb_quad_unpack(tmp, 1)); - } - PACK_ITEM_ADJUST(); - break; - case 'Q': - PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); - while (len-- > 0) { - char *tmp = (char*)s; - s += QUAD_SIZE; - rb_ary_push(ary, rb_quad_unpack(tmp, 0)); - } - break; + signed_p = 1; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'Q': + signed_p = 0; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'j': + signed_p = 1; + integer_size = sizeof(intptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'J': + signed_p = 0; + integer_size = sizeof(uintptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; case 'n': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16B(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, UINT2NUM(ntohs(tmp))); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = 2; + bigendian_p = 1; + goto unpack_integer; case 'N': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32B(&tmp), s, NATINT_LEN(unsigned long,4)); - s += NATINT_LEN(unsigned long,4); - rb_ary_push(ary, ULONG2NUM(ntohl(tmp))); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = 4; + bigendian_p = 1; + goto unpack_integer; case 'v': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, UINT2NUM(vtohs(tmp))); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = 2; + bigendian_p = 0; + goto unpack_integer; case 'V': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); - s += NATINT_LEN(long,4); - rb_ary_push(ary, ULONG2NUM(vtohl(tmp))); - } - PACK_ITEM_ADJUST(); - break; + signed_p = 0; + integer_size = 4; + bigendian_p = 0; + goto unpack_integer; + + unpack_integer: + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } + PACK_LENGTH_ADJUST_SIZE(integer_size); + while (len-- > 0) { + int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN; + VALUE val; + if (signed_p) + flags |= INTEGER_PACK_2COMP; + val = rb_integer_unpack(s, integer_size, 1, 0, flags); + UNPACK_PUSH(val); + s += integer_size; + } + PACK_ITEM_ADJUST(); + break; case 'f': case 'F': - PACK_LENGTH_ADJUST(float,sizeof(float)); + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { float tmp; memcpy(&tmp, s, sizeof(float)); s += sizeof(float); - rb_ary_push(ary, rb_float_new((double)tmp)); + UNPACK_PUSH(DBL2NUM((double)tmp)); } PACK_ITEM_ADJUST(); break; case 'e': - PACK_LENGTH_ADJUST(float,sizeof(float)); + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { - float tmp; - FLOAT_CONVWITH(ftmp); - - memcpy(&tmp, s, sizeof(float)); + FLOAT_CONVWITH(tmp); + memcpy(tmp.buf, s, sizeof(float)); s += sizeof(float); - tmp = VTOHF(tmp,ftmp); - rb_ary_push(ary, rb_float_new((double)tmp)); + VTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); } PACK_ITEM_ADJUST(); break; - + case 'E': - PACK_LENGTH_ADJUST(double,sizeof(double)); + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { - double tmp; - DOUBLE_CONVWITH(dtmp); - - memcpy(&tmp, s, sizeof(double)); + DOUBLE_CONVWITH(tmp); + memcpy(tmp.buf, s, sizeof(double)); s += sizeof(double); - tmp = VTOHD(tmp,dtmp); - rb_ary_push(ary, rb_float_new(tmp)); + VTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); } PACK_ITEM_ADJUST(); break; - + case 'D': case 'd': - PACK_LENGTH_ADJUST(double,sizeof(double)); + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { double tmp; memcpy(&tmp, s, sizeof(double)); s += sizeof(double); - rb_ary_push(ary, rb_float_new(tmp)); + UNPACK_PUSH(DBL2NUM(tmp)); } PACK_ITEM_ADJUST(); break; case 'g': - PACK_LENGTH_ADJUST(float,sizeof(float)); + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); while (len-- > 0) { - float tmp; - FLOAT_CONVWITH(ftmp;) - - memcpy(&tmp, s, sizeof(float)); + FLOAT_CONVWITH(tmp); + memcpy(tmp.buf, s, sizeof(float)); s += sizeof(float); - tmp = NTOHF(tmp,ftmp); - rb_ary_push(ary, rb_float_new((double)tmp)); + NTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); } PACK_ITEM_ADJUST(); break; - + case 'G': - PACK_LENGTH_ADJUST(double,sizeof(double)); + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); while (len-- > 0) { - double tmp; - DOUBLE_CONVWITH(dtmp); - - memcpy(&tmp, s, sizeof(double)); + DOUBLE_CONVWITH(tmp); + memcpy(tmp.buf, s, sizeof(double)); s += sizeof(double); - tmp = NTOHD(tmp,dtmp); - rb_ary_push(ary, rb_float_new(tmp)); + NTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); } PACK_ITEM_ADJUST(); break; - + case 'U': if (len > send - s) len = send - s; while (len > 0 && s < send) { @@ -1723,154 +1461,187 @@ pack_unpack(str, fmt) l = utf8_to_uv(s, &alen); s += alen; len--; - rb_ary_push(ary, ULONG2NUM(l)); + UNPACK_PUSH(ULONG2NUM(l)); } break; case 'u': { VALUE buf = infected_str_new(0, (send - s)*3/4, str); - char *ptr = RSTRING(buf)->ptr; + char *ptr = RSTRING_PTR(buf); long total = 0; - while (s < send && *s > ' ' && *s < 'a') { + while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') { long a,b,c,d; - char hunk[4]; + char hunk[3]; + + len = ((unsigned char)*s++ - ' ') & 077; - hunk[3] = '\0'; - len = (*s++ - ' ') & 077; total += len; - if (total > RSTRING(buf)->len) { - len -= total - RSTRING(buf)->len; - total = RSTRING(buf)->len; + if (total > RSTRING_LEN(buf)) { + len -= total - RSTRING_LEN(buf); + total = RSTRING_LEN(buf); } while (len > 0) { long mlen = len > 3 ? 3 : len; - if (s < send && *s >= ' ') - a = (*s++ - ' ') & 077; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + a = ((unsigned char)*s++ - ' ') & 077; else a = 0; - if (s < send && *s >= ' ') - b = (*s++ - ' ') & 077; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + b = ((unsigned char)*s++ - ' ') & 077; else b = 0; - if (s < send && *s >= ' ') - c = (*s++ - ' ') & 077; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + c = ((unsigned char)*s++ - ' ') & 077; else c = 0; - if (s < send && *s >= ' ') - d = (*s++ - ' ') & 077; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + d = ((unsigned char)*s++ - ' ') & 077; else d = 0; - hunk[0] = a << 2 | b >> 4; - hunk[1] = b << 4 | c >> 2; - hunk[2] = c << 6 | d; + hunk[0] = (char)(a << 2 | b >> 4); + hunk[1] = (char)(b << 4 | c >> 2); + hunk[2] = (char)(c << 6 | d); memcpy(ptr, hunk, mlen); ptr += mlen; len -= mlen; } - if (*s == '\r') s++; - if (*s == '\n') s++; - else if (s < send && (s+1 == send || s[1] == '\n')) - s += 2; /* possible checksum byte */ + if (s < send && (unsigned char)*s != '\r' && *s != '\n') + s++; /* possible checksum byte */ + if (s < send && *s == '\r') s++; + if (s < send && *s == '\n') s++; } - - RSTRING(buf)->ptr[total] = '\0'; - RSTRING(buf)->len = total; - rb_ary_push(ary, buf); + + rb_str_set_len(buf, total); + UNPACK_PUSH(buf); } break; case 'm': { - VALUE buf = infected_str_new(0, (send - s)*3/4, str); - char *ptr = RSTRING(buf)->ptr; - int a = -1,b = -1,c = 0,d; - static int first = 1; - static int b64_xtable[256]; + VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */ + char *ptr = RSTRING_PTR(buf); + int a = -1,b = -1,c = 0,d = 0; + static signed char b64_xtable[256]; - if (first) { + if (b64_xtable['/'] <= 0) { int i; - first = 0; for (i = 0; i < 256; i++) { b64_xtable[i] = -1; } for (i = 0; i < 64; i++) { - b64_xtable[(int)b64_table[i]] = i; + b64_xtable[(unsigned char)b64_table[i]] = (char)i; } } - while (s < send) { - a = b = c = d = -1; - while((a = b64_xtable[(int)(*(unsigned char*)s)]) == -1 && s < send) { s++; } - if( s >= send ) break; - s++; - while((b = b64_xtable[(int)(*(unsigned char*)s)]) == -1 && s < send) { s++; } - if( s >= send ) break; - s++; - while((c = b64_xtable[(int)(*(unsigned char*)s)]) == -1 && s < send) { if( *s == '=' ) break; s++; } - if( *s == '=' || s >= send ) break; - s++; - while((d = b64_xtable[(int)(*(unsigned char*)s)]) == -1 && s < send) { if( *s == '=' ) break; s++; } - if( *s == '=' || s >= send ) break; - s++; - *ptr++ = a << 2 | b >> 4; - *ptr++ = b << 4 | c >> 2; - *ptr++ = c << 6 | d; + if (len == 0) { + while (s < send) { + a = b = c = d = -1; + a = b64_xtable[(unsigned char)*s++]; + if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); + b = b64_xtable[(unsigned char)*s++]; + if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); + if (*s == '=') { + if (s + 2 == send && *(s + 1) == '=') break; + rb_raise(rb_eArgError, "invalid base64"); + } + c = b64_xtable[(unsigned char)*s++]; + if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); + if (s + 1 == send && *s == '=') break; + d = b64_xtable[(unsigned char)*s++]; + if (d == -1) rb_raise(rb_eArgError, "invalid base64"); + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + } + if (c == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); + } + else if (d == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); + } } - if (a != -1 && b != -1) { - if (c == -1 && *s == '=') - *ptr++ = a << 2 | b >> 4; - else if (c != -1 && *s == '=') { - *ptr++ = a << 2 | b >> 4; - *ptr++ = b << 4 | c >> 2; + else { + while (s < send) { + a = b = c = d = -1; + while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + a = -1; + } + if (a != -1 && b != -1) { + if (c == -1) + *ptr++ = castchar(a << 2 | b >> 4); + else { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + } } } - *ptr = '\0'; - RSTRING(buf)->len = ptr - RSTRING(buf)->ptr; - rb_ary_push(ary, buf); + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + UNPACK_PUSH(buf); } break; case 'M': { VALUE buf = infected_str_new(0, send - s, str); - char *ptr = RSTRING(buf)->ptr; + char *ptr = RSTRING_PTR(buf), *ss = s; + int csum = 0; int c1, c2; while (s < send) { if (*s == '=') { if (++s == send) break; - if (s+1 < send && *s == '\r' && *(s+1) == '\n') - s++; + if (s+1 < send && *s == '\r' && *(s+1) == '\n') + s++; if (*s != '\n') { if ((c1 = hex2num(*s)) == -1) break; if (++s == send) break; if ((c2 = hex2num(*s)) == -1) break; - *ptr++ = c1 << 4 | c2; + csum |= *ptr++ = castchar(c1 << 4 | c2); } } else { - *ptr++ = *s; + csum |= *ptr++ = *s; } s++; + ss = s; } - *ptr = '\0'; - RSTRING(buf)->len = ptr - RSTRING(buf)->ptr; - rb_ary_push(ary, buf); + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + rb_str_buf_cat(buf, ss, send-ss); + csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum); + UNPACK_PUSH(buf); } break; case '@': - if (len > RSTRING(str)->len) + if (len > RSTRING_LEN(str)) rb_raise(rb_eArgError, "@ outside of string"); - s = RSTRING(str)->ptr + len; + s = RSTRING_PTR(str) + len; break; case 'X': - if (len > s - RSTRING(str)->ptr) + if (len > s - RSTRING_PTR(str)) rb_raise(rb_eArgError, "X outside of string"); s -= len; break; @@ -1882,26 +1653,27 @@ pack_unpack(str, fmt) break; case 'P': - if (sizeof(char *) <= send - s) { + if (sizeof(char *) <= (size_t)(send - s)) { + VALUE tmp = Qnil; char *t; - VALUE tmp; memcpy(&t, s, sizeof(char *)); s += sizeof(char *); if (t) { - VALUE a, *p, *pend; + VALUE a; + const VALUE *p, *pend; - if (!(a = rb_str_associated(str))) { + if (!(a = str_associated(str))) { rb_raise(rb_eArgError, "no associated pointer"); } - p = RARRAY(a)->ptr; - pend = p + RARRAY(a)->len; + p = RARRAY_CONST_PTR(a); + pend = p + RARRAY_LEN(a); while (p < pend) { - if (TYPE(*p) == T_STRING && RSTRING(*p)->ptr == t) { - if (len < RSTRING(*p)->len) { + if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { + if (len < RSTRING_LEN(*p)) { tmp = rb_tainted_str_new(t, len); - rb_str_associate(tmp, a); + str_associate(tmp, a); } else { tmp = *p; @@ -1914,36 +1686,34 @@ pack_unpack(str, fmt) rb_raise(rb_eArgError, "non associated pointer"); } } - else { - tmp = Qnil; - } - rb_ary_push(ary, tmp); + UNPACK_PUSH(tmp); } break; case 'p': - if (len > (send - s) / sizeof(char *)) + if (len > (long)((send - s) / sizeof(char *))) len = (send - s) / sizeof(char *); while (len-- > 0) { - if (send - s < sizeof(char *)) + if ((size_t)(send - s) < sizeof(char *)) break; else { - VALUE tmp; + VALUE tmp = Qnil; char *t; memcpy(&t, s, sizeof(char *)); s += sizeof(char *); if (t) { - VALUE a, *p, *pend; + VALUE a; + const VALUE *p, *pend; - if (!(a = rb_str_associated(str))) { + if (!(a = str_associated(str))) { rb_raise(rb_eArgError, "no associated pointer"); } - p = RARRAY(a)->ptr; - pend = p + RARRAY(a)->len; + p = RARRAY_CONST_PTR(a); + pend = p + RARRAY_LEN(a); while (p < pend) { - if (TYPE(*p) == T_STRING && RSTRING(*p)->ptr == t) { + if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { tmp = *p; break; } @@ -1953,46 +1723,31 @@ pack_unpack(str, fmt) rb_raise(rb_eArgError, "non associated pointer"); } } - else { - tmp = Qnil; - } - rb_ary_push(ary, tmp); + UNPACK_PUSH(tmp); } } break; case 'w': { - unsigned long ul = 0; - unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8); - - while (len > 0 && s < send) { - ul <<= 7; - ul |= (*s & 0x7f); - if (!(*s++ & 0x80)) { - rb_ary_push(ary, ULONG2NUM(ul)); - len--; - ul = 0; - } - else if (ul & ulmask) { - VALUE big = rb_uint2big(ul); - VALUE big128 = rb_uint2big(128); - while (s < send) { - big = rb_big_mul(big, big128); - big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); - if (!(*s++ & 0x80)) { - rb_ary_push(ary, big); - len--; - ul = 0; - break; - } - } - } - } + char *s0 = s; + while (len > 0 && s < send) { + if (*s & 0x80) { + s++; + } + else { + s++; + UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN)); + len--; + s0 = s; + } + } } break; default: + rb_warning("unknown unpack directive '%c' in '%s'", + type, RSTRING_PTR(fmt)); break; } } @@ -2000,53 +1755,193 @@ pack_unpack(str, fmt) return ary; } -#define BYTEWIDTH 8 +/* + * call-seq: + * str.unpack(format) -> anArray + * + * Decodes <i>str</i> (which may contain binary data) according to the + * format string, returning an array of each value extracted. The + * format string consists of a sequence of single-character directives, + * summarized in the table at the end of this entry. + * Each directive may be followed + * by a number, indicating the number of times to repeat with this + * directive. An asterisk (``<code>*</code>'') will use up all + * remaining elements. The directives <code>sSiIlL</code> may each be + * followed by an underscore (``<code>_</code>'') or + * exclamation mark (``<code>!</code>'') to use the underlying + * platform's native size for the specified type; otherwise, it uses a + * platform-independent consistent size. Spaces are ignored in the + * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>. + * + * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] + * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] + * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] + * "aa".unpack('b8B8') #=> ["10000110", "01100001"] + * "aaa".unpack('h2H2c') #=> ["16", "61", 97] + * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] + * "now=20is".unpack('M*') #=> ["now is"] + * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] + * + * This table summarizes the various formats and the Ruby classes + * returned by each. + * + * Integer | | + * Directive | Returns | Meaning + * ------------------------------------------------------------------ + * C | Integer | 8-bit unsigned (unsigned char) + * S | Integer | 16-bit unsigned, native endian (uint16_t) + * L | Integer | 32-bit unsigned, native endian (uint32_t) + * Q | Integer | 64-bit unsigned, native endian (uint64_t) + * J | Integer | pointer width unsigned, native endian (uintptr_t) + * | | + * c | Integer | 8-bit signed (signed char) + * s | Integer | 16-bit signed, native endian (int16_t) + * l | Integer | 32-bit signed, native endian (int32_t) + * q | Integer | 64-bit signed, native endian (int64_t) + * j | Integer | pointer width signed, native endian (intptr_t) + * | | + * S_ S! | Integer | unsigned short, native endian + * I I_ I! | Integer | unsigned int, native endian + * L_ L! | Integer | unsigned long, native endian + * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * J! | Integer | uintptr_t, native endian (same with J) + * | | + * s_ s! | Integer | signed short, native endian + * i i_ i! | Integer | signed int, native endian + * l_ l! | Integer | signed long, native endian + * q_ q! | Integer | signed long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * j! | Integer | intptr_t, native endian (same with j) + * | | + * S> s> S!> s!> | Integer | same as the directives without ">" except + * L> l> L!> l!> | | big endian + * I!> i!> | | + * Q> q> Q!> q!> | | "S>" is same as "n" + * J> j> J!> j!> | | "L>" is same as "N" + * | | + * S< s< S!< s!< | Integer | same as the directives without "<" except + * L< l< L!< l!< | | little endian + * I!< i!< | | + * Q< q< Q!< q!< | | "S<" is same as "v" + * J< j< J!< j!< | | "L<" is same as "V" + * | | + * n | Integer | 16-bit unsigned, network (big-endian) byte order + * N | Integer | 32-bit unsigned, network (big-endian) byte order + * v | Integer | 16-bit unsigned, VAX (little-endian) byte order + * V | Integer | 32-bit unsigned, VAX (little-endian) byte order + * | | + * U | Integer | UTF-8 character + * w | Integer | BER-compressed integer (see Array.pack) + * + * Float | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * D d | Float | double-precision, native format + * F f | Float | single-precision, native format + * E | Float | double-precision, little-endian byte order + * e | Float | single-precision, little-endian byte order + * G | Float | double-precision, network (big-endian) byte order + * g | Float | single-precision, network (big-endian) byte order + * + * String | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) + * a | String | arbitrary binary string + * Z | String | null-terminated string + * B | String | bit string (MSB first) + * b | String | bit string (LSB first) + * H | String | hex string (high nibble first) + * h | String | hex string (low nibble first) + * u | String | UU-encoded string + * M | String | quoted-printable, MIME encoding (see RFC2045) + * m | String | base64 encoded string (RFC 2045) (default) + * | | base64 encoded string (RFC 4648) if followed by 0 + * P | String | pointer to a structure (fixed-length string) + * p | String | pointer to a null-terminated string + * + * Misc. | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * @ | --- | skip to the offset given by the length argument + * X | --- | skip backward one byte + * x | --- | skip forward one byte + * + * HISTORY + * + * * J, J! j, and j! are available since Ruby 2.3. + * * Q_, Q!, q_, and q! are available since Ruby 2.1. + * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3. + */ -static int -uv_to_utf8(buf, uv) - char *buf; - unsigned long uv; +static VALUE +pack_unpack(VALUE str, VALUE fmt) +{ + int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; + return pack_unpack_internal(str, fmt, mode); +} + +/* + * call-seq: + * str.unpack1(format) -> obj + * + * Decodes <i>str</i> (which may contain binary data) according to the + * format string, returning the first value extracted. + * See also <code>String#unpack</code>, <code>Array#pack</code>. + */ + +static VALUE +pack_unpack1(VALUE str, VALUE fmt) +{ + return pack_unpack_internal(str, fmt, UNPACK_1); +} + +int +rb_uv_to_utf8(char buf[6], unsigned long uv) { if (uv <= 0x7f) { buf[0] = (char)uv; return 1; } if (uv <= 0x7ff) { - buf[0] = ((uv>>6)&0xff)|0xc0; - buf[1] = (uv&0x3f)|0x80; + buf[0] = castchar(((uv>>6)&0xff)|0xc0); + buf[1] = castchar((uv&0x3f)|0x80); return 2; } if (uv <= 0xffff) { - buf[0] = ((uv>>12)&0xff)|0xe0; - buf[1] = ((uv>>6)&0x3f)|0x80; - buf[2] = (uv&0x3f)|0x80; + buf[0] = castchar(((uv>>12)&0xff)|0xe0); + buf[1] = castchar(((uv>>6)&0x3f)|0x80); + buf[2] = castchar((uv&0x3f)|0x80); return 3; } if (uv <= 0x1fffff) { - buf[0] = ((uv>>18)&0xff)|0xf0; - buf[1] = ((uv>>12)&0x3f)|0x80; - buf[2] = ((uv>>6)&0x3f)|0x80; - buf[3] = (uv&0x3f)|0x80; + buf[0] = castchar(((uv>>18)&0xff)|0xf0); + buf[1] = castchar(((uv>>12)&0x3f)|0x80); + buf[2] = castchar(((uv>>6)&0x3f)|0x80); + buf[3] = castchar((uv&0x3f)|0x80); return 4; } if (uv <= 0x3ffffff) { - buf[0] = ((uv>>24)&0xff)|0xf8; - buf[1] = ((uv>>18)&0x3f)|0x80; - buf[2] = ((uv>>12)&0x3f)|0x80; - buf[3] = ((uv>>6)&0x3f)|0x80; - buf[4] = (uv&0x3f)|0x80; + buf[0] = castchar(((uv>>24)&0xff)|0xf8); + buf[1] = castchar(((uv>>18)&0x3f)|0x80); + buf[2] = castchar(((uv>>12)&0x3f)|0x80); + buf[3] = castchar(((uv>>6)&0x3f)|0x80); + buf[4] = castchar((uv&0x3f)|0x80); return 5; } if (uv <= 0x7fffffff) { - buf[0] = ((uv>>30)&0xff)|0xfc; - buf[1] = ((uv>>24)&0x3f)|0x80; - buf[2] = ((uv>>18)&0x3f)|0x80; - buf[3] = ((uv>>12)&0x3f)|0x80; - buf[4] = ((uv>>6)&0x3f)|0x80; - buf[5] = (uv&0x3f)|0x80; + buf[0] = castchar(((uv>>30)&0xff)|0xfc); + buf[1] = castchar(((uv>>24)&0x3f)|0x80); + buf[2] = castchar(((uv>>18)&0x3f)|0x80); + buf[3] = castchar(((uv>>12)&0x3f)|0x80); + buf[4] = castchar(((uv>>6)&0x3f)|0x80); + buf[5] = castchar((uv&0x3f)|0x80); return 6; } rb_raise(rb_eRangeError, "pack(U): value out of range"); + + UNREACHABLE; } static const unsigned long utf8_limits[] = { @@ -2060,9 +1955,7 @@ static const unsigned long utf8_limits[] = { }; static unsigned long -utf8_to_uv(p, lenp) - char *p; - long *lenp; +utf8_to_uv(const char *p, long *lenp) { int c = *p++ & 0xff; unsigned long uv = c; @@ -2087,7 +1980,7 @@ utf8_to_uv(p, lenp) rb_raise(rb_eArgError, "malformed UTF-8 character"); } if (n > *lenp) { - rb_raise(rb_eArgError, "malformed UTF-8 character (expected %d bytes, given %d bytes)", + rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", n, *lenp); } *lenp = n--; @@ -2112,8 +2005,11 @@ utf8_to_uv(p, lenp) } void -Init_pack() +Init_pack(void) { - rb_define_method(rb_cArray, "pack", pack_pack, 1); + rb_define_method(rb_cArray, "pack", pack_pack, -1); rb_define_method(rb_cString, "unpack", pack_unpack, 1); + rb_define_method(rb_cString, "unpack1", pack_unpack1, 1); + + id_associated = rb_make_internal_id(); } |
