diff options
Diffstat (limited to 'pack.c')
| -rw-r--r-- | pack.c | 3168 |
1 files changed, 1552 insertions, 1616 deletions
@@ -3,182 +3,61 @@ pack.c - $Author$ - $Date$ created at: Thu Feb 10 15:17:05 JST 1994 - Copyright (C) 1993-2000 Yukihiro Matsumoto + Copyright (C) 1993-2007 Yukihiro Matsumoto **********************************************************************/ -#include "ruby.h" -#include <sys/types.h> -#include <ctype.h> +#include "ruby/internal/config.h" -#define SIZE16 2 -#define SIZE32 4 - -#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 -# define NATINT_PACK -#endif +#include <ctype.h> +#include <errno.h> +#include <float.h> +#include <sys/types.h> -#ifdef NATINT_PACK -# define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16))) -# define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32))) -# define NATINT_I32(x) (natint?NUM2LONG(x):(NUM2I32(x))) -# define NATINT_U32(x) (natint?NUM2ULONG(x):(NUM2U32(x))) -# define NATINT_LEN(type,len) (natint?sizeof(type):(len)) -# ifdef WORDS_BIGENDIAN -# define OFF16(p) OFF16B(p) -# define OFF32(p) OFF32B(p) -# endif +#include "internal.h" +#include "internal/array.h" +#include "internal/bits.h" +#include "internal/string.h" +#include "internal/symbol.h" +#include "internal/variable.h" +#include "ruby/util.h" + +#include "builtin.h" + +/* + * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG + * instead of HAVE_LONG_LONG or LONG_LONG. + * This means q! and Q! means always the standard long long type and + * causes ArgumentError for platforms which has no long long type, + * even if the platform has an implementation specific 64bit type. + * This behavior is consistent with the document of pack/unpack. + */ +#ifdef HAVE_TRUE_LONG_LONG +static const char natstr[] = "sSiIlLqQjJ"; +# define endstr natstr #else -# define NATINT_I32(x) NUM2I32(x) -# define NATINT_U32(x) NUM2U32(x) -# define NATINT_LEN(type,len) sizeof(type) -#endif - -#ifndef OFF16 -# define OFF16(p) (char*)(p) -# define OFF32(p) (char*)(p) +static const char natstr[] = "sSiIlLjJ"; +static const char endstr[] = "sSiIlLqQjJ"; #endif -#ifndef OFF16B -# define OFF16B(p) (char*)(p) -# define OFF32B(p) (char*)(p) -#endif - -#define define_swapx(x, xtype) \ -static xtype \ -TOKEN_PASTE(swap,x)(z) \ - xtype z; \ -{ \ - xtype r; \ - xtype *zp; \ - unsigned char *s, *t; \ - int i; \ - \ - zp = (xtype *)malloc(sizeof(xtype));\ - *zp = z; \ - s = (char *)zp; \ - t = (char *)malloc(sizeof(xtype)); \ - for (i=0; i<sizeof(xtype); i++) { \ - t[sizeof(xtype)-i-1] = s[i]; \ - } \ - r = *(xtype *)t; \ - free(t); \ - free(zp); \ - return r; \ -} - -#if SIZEOF_SHORT == 2 -#define swaps(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF)) -#else -#if SIZEOF_SHORT == 4 -#define swaps(x) ((((x)&0xFF)<<24) \ - |(((x)>>24)&0xFF) \ - |(((x)&0x0000FF00)<<8) \ - |(((x)&0x00FF0000)>>8) ) +#ifdef HAVE_TRUE_LONG_LONG +/* It is intentional to use long long instead of LONG_LONG. */ +# define NATINT_LEN_Q NATINT_LEN(long long, 8) #else -define_swapx(s,short); -#endif +# define NATINT_LEN_Q 8 #endif -#if SIZEOF_LONG == 4 -#define swapl(x) ((((x)&0xFF)<<24) \ - |(((x)>>24)&0xFF) \ - |(((x)&0x0000FF00)<<8) \ - |(((x)&0x00FF0000)>>8) ) -#else -#if SIZEOF_LONG == 8 -#define swapl(x) ((((x)&0x00000000000000FF)<<56) \ - |(((x)&0xFF00000000000000)>>56) \ - |(((x)&0x000000000000FF00)<<40) \ - |(((x)&0x00FF000000000000)>>40) \ - |(((x)&0x0000000000FF0000)<<24) \ - |(((x)&0x0000FF0000000000)>>24) \ - |(((x)&0x00000000FF000000)<<8) \ - |(((x)&0x000000FF00000000)>>8)) -#else -define_swapx(l,long); -#endif +#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8) +# define NATINT_PACK #endif -#if SIZEOF_FLOAT == 4 -#if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */ -#define swapf(x) swapl(x) -#define FLOAT_SWAPPER unsigned long -#else -#if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */ -#define swapf(x) swaps(x) -#define FLOAT_SWAPPER unsigned short -#else /* SIZEOF_FLOAT == 4 but undivide by known size of int */ -define_swapx(f,float); -#endif /* #if SIZEOF_SHORT == 4 */ -#endif /* #if SIZEOF_LONG == 4 */ -#else /* SIZEOF_FLOAT != 4 */ -define_swapx(f,float); -#endif /* #if SIZEOF_FLOAT == 4 */ - -#if SIZEOF_DOUBLE == 8 -#if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */ -#define swapd(x) swapl(x) -#define DOUBLE_SWAPPER unsigned long -#else -#if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */ -static double -swapd(d) - const double d; -{ - double dtmp = d; - unsigned long utmp[2]; - unsigned long utmp0; - - utmp[0] = 0; utmp[1] = 0; - memcpy(utmp,&dtmp,sizeof(double)); - utmp0 = utmp[0]; - utmp[0] = swapl(utmp[1]); - utmp[1] = swapl(utmp0); - memcpy(&dtmp,utmp,sizeof(double)); - return dtmp; -} -#else -#if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */ -static double -swapd(d) - const double d; -{ - double dtmp = d; - unsigned short utmp[2]; - unsigned short utmp0; - - utmp[0] = 0; utmp[1] = 0; - memcpy(utmp,&dtmp,sizeof(double)); - utmp0 = utmp[0]; - utmp[0] = swaps(utmp[1]); - utmp[1] = swaps(utmp0); - memcpy(&dtmp,utmp,sizeof(double)); - return dtmp; -} -#else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */ -define_swapx(d, double); -#endif /* #if SIZEOF_SHORT == 4 */ -#endif /* #if SIZEOF_LONG == 4 */ -#endif /* #if SIZEOF_LONG == 8 */ -#else /* SIZEOF_DOUBLE != 8 */ -define_swapx(d, double); -#endif /* #if SIZEOF_DPOUBLE == 8 */ - -#undef define_swapx - #ifdef DYNAMIC_ENDIAN -#ifdef ntohs -#undef ntohs -#undef ntohl -#undef htons -#undef htonl -#endif +/* for universal binary of NEXTSTEP and MacOS X */ +/* useless since autoconf 2.63? */ static int -endian() +is_bigendian(void) { static int init = 0; static int endian_value; @@ -189,1589 +68,1646 @@ endian() p = (char*)&init; return endian_value = p[0]?0:1; } - -#define ntohs(x) (endian()?(x):swaps(x)) -#define ntohl(x) (endian()?(x):swapl(x)) -#define ntohf(x) (endian()?(x):swapf(x)) -#define ntohd(x) (endian()?(x):swapd(x)) -#define htons(x) (endian()?(x):swaps(x)) -#define htonl(x) (endian()?(x):swapl(x)) -#define htonf(x) (endian()?(x):swapf(x)) -#define htond(x) (endian()?(x):swapd(x)) -#define htovs(x) (endian()?swaps(x):(x)) -#define htovl(x) (endian()?swapl(x):(x)) -#define htovf(x) (endian()?swapf(x):(x)) -#define htovd(x) (endian()?swapd(x):(x)) -#define vtohs(x) (endian()?swaps(x):(x)) -#define vtohl(x) (endian()?swapl(x):(x)) -#define vtohf(x) (endian()?swapf(x):(x)) -#define vtohd(x) (endian()?swapd(x):(x)) +# define BIGENDIAN_P() (is_bigendian()) +#elif defined(WORDS_BIGENDIAN) +# define BIGENDIAN_P() 1 #else -#ifdef WORDS_BIGENDIAN -#ifndef ntohs -#define ntohs(x) (x) -#define ntohl(x) (x) -#define htons(x) (x) -#define htonl(x) (x) -#endif -#define ntohf(x) (x) -#define ntohd(x) (x) -#define htonf(x) (x) -#define htond(x) (x) -#define htovs(x) swaps(x) -#define htovl(x) swapl(x) -#define htovf(x) swapf(x) -#define htovd(x) swapd(x) -#define vtohs(x) swaps(x) -#define vtohl(x) swapl(x) -#define vtohf(x) swapf(x) -#define vtohd(x) swapd(x) -#else /* LITTLE ENDIAN */ -#ifndef ntohs -#undef ntohs -#undef ntohl -#undef htons -#undef htonl -#define ntohs(x) swaps(x) -#define ntohl(x) swapl(x) -#define htons(x) swaps(x) -#define htonl(x) swapl(x) -#endif -#define ntohf(x) swapf(x) -#define ntohd(x) swapd(x) -#define htonf(x) swapf(x) -#define htond(x) swapd(x) -#define htovs(x) (x) -#define htovl(x) (x) -#define htovf(x) (x) -#define htovd(x) (x) -#define vtohs(x) (x) -#define vtohl(x) (x) -#define vtohf(x) (x) -#define vtohd(x) (x) -#endif +# define BIGENDIAN_P() 0 #endif -#ifdef FLOAT_SWAPPER -#define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; -#define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = htonf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = htovf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = ntohf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) -#define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ - y = vtohf((FLOAT_SWAPPER)y), \ - memcpy(&x,&y,sizeof(float)), \ - x) +#ifdef NATINT_PACK +# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) #else -#define FLOAT_CONVWITH(y) -#define HTONF(x,y) htonf(x) -#define HTOVF(x,y) htovf(x) -#define NTOHF(x,y) ntohf(x) -#define VTOHF(x,y) vtohf(x) +# define NATINT_LEN(type,len) ((int)sizeof(type)) #endif -#ifdef DOUBLE_SWAPPER -#define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; -#define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = htond((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = htovd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = ntohd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ - y = vtohd((DOUBLE_SWAPPER)y), \ - memcpy(&x,&y,sizeof(double)), \ - x) -#else -#define DOUBLE_CONVWITH(y) -#define HTOND(x,y) htond(x) -#define HTOVD(x,y) htovd(x) -#define NTOHD(x,y) ntohd(x) -#define VTOHD(x,y) vtohd(x) -#endif +typedef union { + float f; + uint32_t u; + char buf[4]; +} FLOAT_SWAPPER; +typedef union { + double d; + uint64_t u; + char buf[8]; +} DOUBLE_SWAPPER; +#define swapf(x) swap32(x) +#define swapd(x) swap64(x) + +#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) +#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) +#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) +#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) +#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) +#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) +#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) +#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) + +#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x; +#define HTONF(x) ((x).u = rb_htonf((x).u)) +#define HTOVF(x) ((x).u = rb_htovf((x).u)) +#define NTOHF(x) ((x).u = rb_ntohf((x).u)) +#define VTOHF(x) ((x).u = rb_vtohf((x).u)) + +#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x; +#define HTOND(x) ((x).u = rb_htond((x).u)) +#define HTOVD(x) ((x).u = rb_htovd((x).u)) +#define NTOHD(x) ((x).u = rb_ntohd((x).u)) +#define VTOHD(x) ((x).u = rb_vtohd((x).u)) + +#define MAX_INTEGER_PACK_SIZE 8 + +static const char toofew[] = "too few arguments"; + +static void encodes(VALUE,const char*,long,int,int); +static void qpencode(VALUE,VALUE,long); + +static unsigned long utf8_to_uv(const char*,long*); + +static ID id_associated; -#if SIZEOF_LONG == SIZE32 -typedef long I32; -typedef unsigned long U32; -#define NUM2I32(x) NUM2LONG(x) -#define NUM2U32(x) NUM2ULONG(x) -#elif SIZEOF_INT == SIZE32 -typedef int I32; -typedef unsigned int U32; -#define NUM2I32(x) NUM2INT(x) -#define NUM2U32(x) NUM2UINT(x) -#endif +static void +str_associate(VALUE str, VALUE add) +{ + /* assert(NIL_P(rb_attr_get(str, id_associated))); */ + rb_ivar_set(str, id_associated, add); +} -static char *toofew = "too few arguments"; +static VALUE +str_associated(VALUE str) +{ + VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse); + if (!associates) + rb_raise(rb_eArgError, "no associated pointer"); + return associates; +} -static void encodes _((VALUE,char*,int,int)); -static void qpencode _((VALUE,VALUE,int)); +static VALUE +associated_pointer(VALUE associates, const char *t) +{ + const VALUE *p = RARRAY_CONST_PTR(associates); + const VALUE *pend = p + RARRAY_LEN(associates); + for (; p < pend; p++) { + VALUE tmp = *p; + if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp; + } + rb_raise(rb_eArgError, "non associated pointer"); + UNREACHABLE_RETURN(Qnil); +} -static int uv_to_utf8 _((char*,unsigned long)); -static unsigned long utf8_to_uv _((char*,int*)); +RBIMPL_ATTR_NORETURN() +static void +unknown_directive(const char *mode, char type, VALUE fmt) +{ + char unknown[5]; -static VALUE -pack_pack(ary, fmt) - VALUE ary, fmt; + if (ISPRINT(type)) { + unknown[0] = type; + unknown[1] = '\0'; + } + else { + snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); + } + fmt = rb_str_quote_unprintable(fmt); + rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'", + mode, unknown, fmt); +} + +static float +VALUE_to_float(VALUE obj) { - static char *nul10 = "\0\0\0\0\0\0\0\0\0\0"; - static char *spc10 = " "; - char *p, *pend; - VALUE res, from; - char type; - int items, len, idx; - char *ptr; - int plen; + VALUE v = rb_to_float(obj); + double d = RFLOAT_VALUE(v); + + if (isnan(d)) { + return NAN; + } + else if (d < -FLT_MAX) { + return -INFINITY; + } + else if (d <= FLT_MAX) { + return d; + } + else { + return INFINITY; + } +} + +static void +str_expand_fill(VALUE res, int c, long len) +{ + long olen = RSTRING_LEN(res); + memset(RSTRING_PTR(res) + olen, c, len); + rb_str_set_len(res, olen + len); +} + +static char * +skip_to_eol(const char *p, const char *pend) +{ + p = memchr(p, '\n', pend - p); + return (char *)(p ? p + 1 : pend); +} + +#define skip_blank(p, type) \ + (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1))) + +#ifndef NATINT_PACK +# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e) +#endif +static char * +pack_modifiers(const char *p, char type, int *natint, int *explicit_endian) +{ + while (1) { + switch (*p) { + case '_': + case '!': + if (strchr(natstr, type)) { #ifdef NATINT_PACK - int natint; /* native integer */ + *natint = 1; #endif - - p = rb_str2cstr(fmt, &plen); - pend = p + plen; - res = rb_str_new(0, 0); + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + break; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (*explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + *explicit_endian = *p++; + break; + default: + return (char *)p; + } + } +} + +static VALUE +pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) +{ + const char *p, *pend; + VALUE res, from, associates = 0; + long len, idx, plen; + const char *ptr; + int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ + int integer_size, bigendian_p; + + StringValue(fmt); + rb_must_asciicompat(fmt); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); + + if (NIL_P(buffer)) { + res = rb_str_buf_new(0); + } + else { + if (!RB_TYPE_P(buffer, T_STRING)) + rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); + rb_str_modify(buffer); + res = buffer; + } - items = RARRAY(ary)->len; idx = 0; -#define NEXTFROM (items-- > 0 ? RARRAY(ary)->ptr[idx++] : (rb_raise(rb_eArgError, toofew),0)) +#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) +#define MORE_ITEM (idx < RARRAY_LEN(ary)) +#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW) +#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) while (p < pend) { - type = *p++; /* get data type */ + int explicit_endian = 0; + if (RSTRING_END(fmt) != pend) { + rb_raise(rb_eRuntimeError, "format string modified"); + } + const char type = *p++; /* get data type */ #ifdef NATINT_PACK - natint = 0; + int natint = 0; /* native integer */ #endif - if (ISSPACE(type)) continue; - if (*p == '_' || *p == '!') { - char *natstr = "sSiIlL"; + if (skip_blank(p, type)) continue; + p = pack_modifiers(p, type, &natint, &explicit_endian); - if (strchr(natstr, type)) { -#ifdef NATINT_PACK - natint = 1; -#endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - } - if (*p == '*') { /* set data length */ - len = strchr("@Xxu", type) ? 0 : items; + if (*p == '*') { /* set data length */ + len = strchr("@Xxu", type) ? 0 + : strchr("PMm", type) ? 1 + : RARRAY_LEN(ary) - idx; p++; - } - else if (ISDIGIT(*p)) { - len = strtoul(p, (char**)&p, 10); - } - else { - len = 1; - } - - switch (type) { - case 'A': case 'a': case 'Z': - case 'B': case 'b': - case 'H': case 'h': - from = NEXTFROM; - if (NIL_P(from)) { - ptr = ""; - plen = 0; - } - else { - ptr = rb_str2cstr(from, &plen); - } - - if (p[-1] == '*') - len = plen; - - switch (type) { - case 'a': - case 'A': - case 'Z': - if (plen >= len) - rb_str_cat(res, ptr, len); - else { - rb_str_cat(res, ptr, plen); - len -= plen; - while (len >= 10) { - rb_str_cat(res, (type == 'A')?spc10:nul10, 10); - len -= 10; - } - rb_str_cat(res, (type == 'A')?spc10:nul10, len); - } - break; - - case 'b': - { - int byte = 0; - int i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (*ptr & 1) - byte |= 128; - if (i & 7) - byte >>= 1; - else { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte >>= 7 - (len & 7); - c = byte & 0xff; - rb_str_cat(res, &c, 1); - } - len = RSTRING(res)->len; - rb_str_resize(res, len+j); - MEMZERO(RSTRING(res)->ptr+len, char, j); - } - break; - - case 'B': - { - int byte = 0; - int i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - byte |= *ptr & 1; - if (i & 7) - byte <<= 1; - else { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte <<= 7 - (len & 7); - c = byte & 0xff; - rb_str_cat(res, &c, 1); - } - len = RSTRING(res)->len; - rb_str_resize(res, len+j); - MEMZERO(RSTRING(res)->ptr+len, char, j); - } - break; - - case 'h': - { - int byte = 0; - int i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= (((*ptr & 15) + 9) & 15) << 4; - else - byte |= (*ptr & 15) << 4; - if (i & 1) - byte >>= 4; - else { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - } - len = RSTRING(res)->len; - rb_str_resize(res, len+j); - MEMZERO(RSTRING(res)->ptr+len, char, j); - } - break; - - case 'H': - { - int byte = 0; - int i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= ((*ptr & 15) + 9) & 15; - else - byte |= *ptr & 15; - if (i & 1) - byte <<= 4; - else { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = byte & 0xff; - rb_str_cat(res, &c, 1); - } - len = RSTRING(res)->len; - rb_str_resize(res, len+j); - MEMZERO(RSTRING(res)->ptr+len, char, j); - } - break; - } - break; - - case 'c': - case 'C': - while (len-- > 0) { - char c; - - from = NEXTFROM; - if (NIL_P(from)) c = 0; - else { - c = NUM2INT(from); - } - rb_str_cat(res, &c, sizeof(char)); - } - break; - - case 's': - case 'S': - while (len-- > 0) { - short s; - - from = NEXTFROM; - if (NIL_P(from)) s = 0; - else { - s = NUM2INT(from); - } - rb_str_cat(res, OFF16(&s), NATINT_LEN(short,2)); - } - break; - - case 'i': - case 'I': - while (len-- > 0) { - int i; - - from = NEXTFROM; - if (NIL_P(from)) i = 0; - else { - i = NUM2UINT(from); - } - rb_str_cat(res, (char*)&i, sizeof(int)); - } - break; - - case 'l': - case 'L': - while (len-- > 0) { - long l; - - from = NEXTFROM; - if (NIL_P(from)) l = 0; - else { - l = NATINT_U32(from); - } - rb_str_cat(res, OFF32(&l), NATINT_LEN(long,4)); - } - break; - - case 'n': - while (len-- > 0) { - unsigned short s; - - from = NEXTFROM; - if (NIL_P(from)) s = 0; - else { - s = NUM2INT(from); - } - s = htons(s); - rb_str_cat(res, OFF16B(&s), NATINT_LEN(short,2)); - } - break; - - case 'N': - while (len-- > 0) { - unsigned long l; - - from = NEXTFROM; - if (NIL_P(from)) l = 0; - else { - l = NATINT_U32(from); - } - l = htonl(l); - rb_str_cat(res, OFF32B(&l), NATINT_LEN(long,4)); - } - break; - - case 'v': - while (len-- > 0) { - unsigned short s; - - from = NEXTFROM; - if (NIL_P(from)) s = 0; - else { - s = NUM2INT(from); - } - s = htovs(s); - rb_str_cat(res, OFF16(&s), NATINT_LEN(short,2)); - } - break; - - case 'V': - while (len-- > 0) { - unsigned long l; - - from = NEXTFROM; - if (NIL_P(from)) l = 0; - else { - l = NATINT_U32(from); - } - l = htovl(l); - rb_str_cat(res, OFF32(&l), NATINT_LEN(long,4)); - } - break; - - case 'f': - case 'F': - while (len-- > 0) { - float f; - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - f = RFLOAT(from)->value; - break; - case T_STRING: - f = strtod(RSTRING(from)->ptr, 0); - default: - f = (float)NUM2INT(from); - break; - } - rb_str_cat(res, (char*)&f, sizeof(float)); - } - break; - - case 'e': - while (len-- > 0) { - float f; - FLOAT_CONVWITH(ftmp); - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - f = RFLOAT(from)->value; - break; - case T_STRING: - f = strtod(RSTRING(from)->ptr, 0); - default: - f = (float)NUM2INT(from); - break; - } - f = HTOVF(f,ftmp); - rb_str_cat(res, (char*)&f, sizeof(float)); - } - break; - - case 'E': - while (len-- > 0) { - double d; - DOUBLE_CONVWITH(dtmp); - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - d = RFLOAT(from)->value; - break; - case T_STRING: - d = strtod(RSTRING(from)->ptr, 0); - default: - d = (double)NUM2INT(from); - break; - } - d = HTOVD(d,dtmp); - rb_str_cat(res, (char*)&d, sizeof(double)); - } - break; - - case 'd': - case 'D': - while (len-- > 0) { - double d; - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - d = RFLOAT(from)->value; - break; - case T_STRING: - d = strtod(RSTRING(from)->ptr, 0); - default: - d = (double)NUM2INT(from); - break; - } - rb_str_cat(res, (char*)&d, sizeof(double)); - } - break; - - case 'g': - while (len-- > 0) { - float f; - FLOAT_CONVWITH(ftmp); - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - f = RFLOAT(from)->value; - break; - case T_STRING: - f = strtod(RSTRING(from)->ptr, 0); - default: - f = (float)NUM2INT(from); - break; - } - f = HTONF(f,ftmp); - rb_str_cat(res, (char*)&f, sizeof(float)); - } - break; - - case 'G': - while (len-- > 0) { - double d; - DOUBLE_CONVWITH(dtmp); - - from = NEXTFROM; - switch (TYPE(from)) { - case T_FLOAT: - d = RFLOAT(from)->value; - break; - case T_STRING: - d = strtod(RSTRING(from)->ptr, 0); - default: - d = (double)NUM2INT(from); - break; - } - d = HTOND(d,dtmp); - rb_str_cat(res, (char*)&d, sizeof(double)); - } - break; - - case 'x': - grow: - while (len >= 10) { - rb_str_cat(res, nul10, 10); - len -= 10; - } - rb_str_cat(res, nul10, len); - break; - - case 'X': - shrink: - if (RSTRING(res)->len < len) - rb_raise(rb_eArgError, "X outside of string"); - RSTRING(res)->len -= len; - RSTRING(res)->ptr[RSTRING(res)->len] = '\0'; - break; - - case '@': - len -= RSTRING(res)->len; - if (len > 0) goto grow; - len = -len; - if (len > 0) goto shrink; - break; - - case '%': - rb_raise(rb_eArgError, "%% is not supported"); - break; - - case 'U': - while (len-- > 0) { - unsigned long l; - char buf[8]; - int le; - - from = NEXTFROM; - if (NIL_P(from)) l = 0; - else { - l = NUM2ULONG(from); - } - le = uv_to_utf8(buf, l); - rb_str_cat(res, (char*)buf, le); - } - break; - - case 'u': - case 'm': - ptr = rb_str2cstr(NEXTFROM, &plen); - - if (len <= 2) - len = 45; - else - len = len / 3 * 3; - while (plen > 0) { - int todo; - - if (plen > len) - todo = len; - else - todo = plen; - encodes(res, ptr, todo, type); - plen -= todo; - ptr += todo; - } - break; - - case 'M': - from = rb_obj_as_string(NEXTFROM); - if (len <= 1) - len = 72; - qpencode(res, from, len); - break; - - case 'P': - len = 1; - /* FALL THROUGH */ - case 'p': - while (len-- > 0) { - char *t; - from = NEXTFROM; - if (NIL_P(from)) t = ""; - else { - t = STR2CSTR(from); - rb_str_associate(res, from); - } - rb_str_cat(res, (char*)&t, sizeof(char*)); - } - break; - - case 'w': - while (len-- > 0) { - unsigned long ul; - VALUE buf = rb_str_new(0, 0); - char c, *bufs, *bufe; - - from = NEXTFROM; - - if (TYPE(from) == T_BIGNUM) { - VALUE big128 = rb_uint2big(128); - while (TYPE(from) == T_BIGNUM) { - from = rb_big_divmod(from, big128); - c = NUM2INT(RARRAY(from)->ptr[1]) | 0x80; /* mod */ - rb_str_cat(buf, &c, sizeof(char)); - from = RARRAY(from)->ptr[0]; /* div */ - } - } - - if (NIL_P(from)) ul = 0; - else { - ul = NUM2ULONG(from); - } - - while (ul) { - c = ((ul & 0x7f) | 0x80); - rb_str_cat(buf, &c, sizeof(char)); - ul >>= 7; - } - - if (RSTRING(buf)->len) { - bufs = RSTRING(buf)->ptr; - bufe = bufs + RSTRING(buf)->len - 1; - *bufs &= 0x7f; /* clear continue bit */ - while (bufs < bufe) { /* reverse */ - c = *bufs; - *bufs++ = *bufe; - *bufe-- = c; - } - rb_str_cat(res, RSTRING(buf)->ptr, RSTRING(buf)->len); - } - else { - c = 0; - rb_str_cat(res, &c, sizeof(char)); - } - } - break; - - default: - break; - } + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (len < 0 || errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = 1; + } + + switch (type) { + case 'U': + /* if encoding is US-ASCII, upgrade to UTF-8 */ + if (enc_info == 1) enc_info = 2; + break; + case 'm': case 'M': case 'u': + /* keep US-ASCII (do nothing) */ + break; + default: + /* fall back to BINARY */ + enc_info = 0; + break; + } + switch (type) { + case 'A': case 'a': case 'Z': + case 'B': case 'b': + case 'H': case 'h': + from = NEXTFROM; + if (NIL_P(from)) { + ptr = ""; + plen = 0; + } + else { + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + } + + if (p[-1] == '*') + len = plen; + + switch (type) { + case 'a': /* arbitrary binary string (null padded) */ + case 'A': /* arbitrary binary string (ASCII space padded) */ + case 'Z': /* null terminated string */ + if (plen >= len) { + rb_str_buf_cat(res, ptr, len); + if (p[-1] == '*' && type == 'Z') + rb_str_buf_cat(res, "", 1); + } + else { + rb_str_modify_expand(res, len); + rb_str_buf_cat(res, ptr, plen); + str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen); + } + break; + +#define castchar(from) (char)((from) & 0xff) + + case 'b': /* bit string (ascending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (*ptr & 1) + byte |= 128; + if (i & 7) + byte >>= 1; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte >>= 7 - (len & 7); + c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'B': /* bit string (descending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + byte |= *ptr & 1; + if (i & 7) + byte <<= 1; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte <<= 7 - (len & 7); + c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'h': /* hex string (low nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= (((*ptr & 15) + 9) & 15) << 4; + else + byte |= (*ptr & 15) << 4; + if (i & 1) + byte >>= 4; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'H': /* hex string (high nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= ((*ptr & 15) + 9) & 15; + else + byte |= *ptr & 15; + if (i & 1) + byte <<= 4; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + } + break; + + case 'c': /* signed char */ + case 'C': /* unsigned char */ + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto pack_integer; + + case 's': /* s for int16_t, s! for signed short */ + case 'S': /* S for uint16_t, S! for unsigned short */ + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'i': /* i and i! for signed int */ + case 'I': /* I and I! for unsigned int */ + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'l': /* l for int32_t, l! for signed long */ + case 'L': /* L for uint32_t, L! for unsigned long */ + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'q': /* q for int64_t, q! for signed long long */ + case 'Q': /* Q for uint64_t, Q! for unsigned long long */ + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'j': /* j for intptr_t */ + integer_size = sizeof(intptr_t); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'J': /* J for uintptr_t */ + integer_size = sizeof(uintptr_t); + bigendian_p = BIGENDIAN_P(); + goto pack_integer; + + case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ + integer_size = 2; + bigendian_p = 1; + goto pack_integer; + + case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ + integer_size = 4; + bigendian_p = 1; + goto pack_integer; + + case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ + integer_size = 2; + bigendian_p = 0; + goto pack_integer; + + case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ + integer_size = 4; + bigendian_p = 0; + goto pack_integer; + + pack_integer: + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } + if (integer_size > MAX_INTEGER_PACK_SIZE) + rb_bug("unexpected integer size for pack: %d", integer_size); + while (len-- > 0) { + char intbuf[MAX_INTEGER_PACK_SIZE]; + + from = NEXTFROM; + rb_integer_pack(from, intbuf, integer_size, 1, 0, + INTEGER_PACK_2COMP | + (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN)); + rb_str_buf_cat(res, intbuf, integer_size); + } + break; + + case 'f': /* single precision float in native format */ + case 'F': /* ditto */ + while (len-- > 0) { + float f; + + from = NEXTFROM; + f = VALUE_to_float(from); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'e': /* single precision float in VAX byte-order */ + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + + from = NEXTFROM; + tmp.f = VALUE_to_float(from); + HTOVF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); + } + break; + + case 'E': /* double precision float in VAX byte-order */ + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + from = NEXTFROM; + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOVD(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); + } + break; + + case 'd': /* double precision float in native format */ + case 'D': /* ditto */ + while (len-- > 0) { + double d; + + from = NEXTFROM; + d = RFLOAT_VALUE(rb_to_float(from)); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'g': /* single precision float in network byte-order */ + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + from = NEXTFROM; + tmp.f = VALUE_to_float(from); + HTONF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); + } + break; + + case 'G': /* double precision float in network byte-order */ + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + + from = NEXTFROM; + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOND(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); + } + break; + + case 'x': /* null byte */ + grow: + rb_str_modify_expand(res, len); + str_expand_fill(res, '\0', len); + break; + + case 'X': /* back up byte */ + shrink: + plen = RSTRING_LEN(res); + if (plen < len) + rb_raise(rb_eArgError, "X outside of string"); + rb_str_set_len(res, plen - len); + break; + + case '@': /* null fill to absolute position */ + len -= RSTRING_LEN(res); + if (len > 0) goto grow; + len = -len; + if (len > 0) goto shrink; + break; + + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'U': /* Unicode character */ + while (len-- > 0) { + SIGNED_VALUE l; + char buf[8]; + int le; + + from = NEXTFROM; + from = rb_to_int(from); + l = NUM2LONG(from); + if (l < 0) { + rb_raise(rb_eRangeError, "pack(U): value out of range"); + } + le = rb_uv_to_utf8(buf, l); + rb_str_buf_cat(res, (char*)buf, le); + } + break; + + case 'u': /* uuencoded string */ + case 'm': /* base64 encoded string */ + from = NEXTFROM; + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + + if (len == 0 && type == 'm') { + encodes(res, ptr, plen, type, 0); + ptr += plen; + break; + } + if (len <= 2) + len = 45; + else if (len > 63 && type == 'u') + len = 63; + else + len = len / 3 * 3; + while (plen > 0) { + long todo; + + if (plen > len) + todo = len; + else + todo = plen; + encodes(res, ptr, todo, type, 1); + plen -= todo; + ptr += todo; + } + break; + + case 'M': /* quoted-printable encoded string */ + from = rb_obj_as_string(NEXTFROM); + if (len <= 1) + len = 72; + qpencode(res, from, len); + break; + + case 'P': /* pointer to packed byte string */ + from = THISFROM; + if (!NIL_P(from)) { + StringValue(from); + if (RSTRING_LEN(from) < len) { + rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", + RSTRING_LEN(from), len); + } + } + len = 1; + /* FALL THROUGH */ + case 'p': /* pointer to string */ + while (len-- > 0) { + char *t; + from = NEXTFROM; + if (NIL_P(from)) { + t = 0; + } + else { + t = StringValuePtr(from); + } + if (!associates) { + associates = rb_ary_new(); + } + rb_ary_push(associates, from); + rb_str_buf_cat(res, (char*)&t, sizeof(char*)); + } + break; + + case 'w': /* BER compressed integer */ + while (len-- > 0) { + VALUE buf; + size_t numbytes; + int sign; + char *cp; + + from = NEXTFROM; + from = rb_to_int(from); + numbytes = rb_absint_numwords(from, 7, NULL); + if (numbytes == 0) + numbytes = 1; + buf = rb_str_new(NULL, numbytes); + + sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN); + + if (sign < 0) + rb_raise(rb_eArgError, "can't compress negative numbers"); + if (sign == 2) + rb_bug("buffer size problem?"); + + cp = RSTRING_PTR(buf); + while (1 < numbytes) { + *cp |= 0x80; + cp++; + numbytes--; + } + + rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); + } + break; + + default: { + unknown_directive("pack", type, fmt); + break; + } + } } + if (associates) { + str_associate(res, associates); + } + switch (enc_info) { + case 1: + ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + break; + case 2: + rb_enc_set_index(res, rb_utf8_encindex()); + break; + default: + /* do nothing, keep ASCII-8BIT */ + break; + } return res; } -static char uu_table[] = +VALUE +rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) +{ + return pack_pack(ec, ary, fmt, buffer); +} + +static const char uu_table[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; -static char b64_table[] = +static const char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static void -encodes(str, s, len, type) - VALUE str; - char *s; - int len; - int type; +encodes(VALUE str, const char *s0, long len, int type, int tail_lf) { - char *buff = ALLOCA_N(char, len * 4 / 3 + 6); - int i = 0; - char *trans = type == 'u' ? uu_table : b64_table; - int padding; + enum {buff_size = 4096, encoded_unit = 4, input_unit = 3}; + char buff[buff_size + 1]; /* +1 for tail_lf */ + long i = 0; + const char *const trans = type == 'u' ? uu_table : b64_table; + char padding; + const unsigned char *s = (const unsigned char *)s0; if (type == 'u') { - buff[i++] = len + ' '; - padding = '`'; + buff[i++] = (char)len + ' '; + padding = '`'; } else { - padding = '='; + padding = '='; } - while (len >= 3) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; - buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; - buff[i++] = trans[077 & s[2]]; - s += 3; - len -= 3; + while (len >= input_unit) { + while (len >= input_unit && buff_size-i >= encoded_unit) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; + buff[i++] = trans[077 & s[2]]; + s += input_unit; + len -= input_unit; + } + if (buff_size-i < encoded_unit) { + rb_str_buf_cat(str, buff, i); + i = 0; + } } + if (len == 2) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; - buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; - buff[i++] = padding; + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; + buff[i++] = padding; } else if (len == 1) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; - buff[i++] = padding; - buff[i++] = padding; + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; + buff[i++] = padding; + buff[i++] = padding; } - buff[i++] = '\n'; - rb_str_cat(str, buff, i); + if (tail_lf) buff[i++] = '\n'; + rb_str_buf_cat(str, buff, i); + if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun"); } -static char hex_table[] = "0123456789ABCDEF"; +static const char hex_table[] = "0123456789ABCDEF"; static void -qpencode(str, from, len) - VALUE str, from; - int len; +qpencode(VALUE str, VALUE from, long len) { char buff[1024]; - int i = 0, n = 0, prev = EOF; - unsigned char *s = (unsigned char*)RSTRING(from)->ptr; - unsigned char *send = s + RSTRING(from)->len; + long i = 0, n = 0, prev = EOF; + unsigned char *s = (unsigned char*)RSTRING_PTR(from); + unsigned char *send = s + RSTRING_LEN(from); while (s < send) { if ((*s > 126) || - (*s < 32 && *s != '\n' && *s != '\t') || - (*s == '=')) { - buff[i++] = '='; - buff[i++] = hex_table[*s >> 4]; - buff[i++] = hex_table[*s & 0x0f]; + (*s < 32 && *s != '\n' && *s != '\t') || + (*s == '=')) { + buff[i++] = '='; + buff[i++] = hex_table[*s >> 4]; + buff[i++] = hex_table[*s & 0x0f]; n += 3; prev = EOF; } - else if (*s == '\n') { + else if (*s == '\n') { if (prev == ' ' || prev == '\t') { - buff[i++] = '='; - buff[i++] = *s; + buff[i++] = '='; + buff[i++] = *s; } - buff[i++] = *s; + buff[i++] = *s; n = 0; prev = *s; } - else { - buff[i++] = *s; + else { + buff[i++] = *s; n++; prev = *s; } if (n > len) { - buff[i++] = '='; - buff[i++] = '\n'; + buff[i++] = '='; + buff[i++] = '\n'; n = 0; prev = '\n'; } - if (i > 1024 - 5) { - rb_str_cat(str, buff, i); - i = 0; - } - s++; + if (i > 1024 - 5) { + rb_str_buf_cat(str, buff, i); + i = 0; + } + s++; } if (n > 0) { - buff[i++] = '='; - buff[i++] = '\n'; + buff[i++] = '='; + buff[i++] = '\n'; } if (i > 0) { - rb_str_cat(str, buff, i); + rb_str_buf_cat(str, buff, i); } } static inline int -hex2num(c) - char c; +hex2num(char c) { - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return c - '0'; - case 'a': case 'b': case 'c': - case 'd': case 'e': case 'f': - return c - 'a' + 10; - case 'A': case 'B': case 'C': - case 'D': case 'E': case 'F': - return c - 'A' + 10; - default: - return -1; - } + int n; + n = ruby_digit36_to_number_table[(unsigned char)c]; + if (16 <= n) + n = -1; + return n; } -#ifdef NATINT_PACK -#define PACK_LENGTH_ADJUST(type,sz) do { \ - int t__len = NATINT_LEN(type,(sz)); \ - tmp = 0; \ - if (len > (send-s)/t__len) { \ +#define PACK_LENGTH_ADJUST_SIZE(sz) do { \ + tmp_len = 0; \ + if (len > (long)((send-s)/(sz))) { \ if (!star) { \ - tmp = len-(send-s)/t__len; \ + tmp_len = len-(send-s)/(sz); \ } \ - len = (send-s)/t__len; \ + len = (send-s)/(sz); \ } \ } while (0) -#else -#define PACK_LENGTH_ADJUST(type,sz) do { \ - tmp = 0; \ - if (len > (send-s)/sizeof(type)) { \ - if (!star) { \ - tmp = len - (send-s)/sizeof(type); \ - } \ - len = (send-s)/sizeof(type); \ - } \ + +#define PACK_ITEM_ADJUST() do { \ + if (tmp_len > 0 && mode == UNPACK_ARRAY) \ + rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ } while (0) + +/* Workaround for Oracle Developer Studio (Oracle Solaris Studio) + * 12.4/12.5/12.6 C compiler optimization bug + * with "-xO4" optimization option. + */ +#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150 +# define AVOID_CC_BUG volatile +#else +# define AVOID_CC_BUG #endif -#define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil); +enum unpack_mode { + UNPACK_ARRAY, + UNPACK_BLOCK, + UNPACK_1 +}; static VALUE -pack_unpack(str, fmt) - VALUE str, fmt; +pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset) { - static char *hexdigits = "0123456789abcdef0123456789ABCDEFx"; +#define hexdigits ruby_hexdigits char *s, *send; char *p, *pend; - VALUE ary; - char type; - int len, tmp, star; -#ifdef NATINT_PACK - int natint; /* native integer */ -#endif - - s = rb_str2cstr(str, &len); + VALUE ary, associates = Qfalse; + long len; + AVOID_CC_BUG long tmp_len; + int signed_p, integer_size, bigendian_p; +#define UNPACK_PUSH(item) do {\ + VALUE item_val = (item);\ + if ((mode) == UNPACK_BLOCK) {\ + rb_yield(item_val);\ + }\ + else if ((mode) == UNPACK_ARRAY) {\ + rb_ary_push(ary, item_val);\ + }\ + else /* if ((mode) == UNPACK_1) { */ {\ + return item_val; \ + }\ + } while (0) + + StringValue(str); + StringValue(fmt); + rb_must_asciicompat(fmt); + + if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative"); + len = RSTRING_LEN(str); + if (offset > len) rb_raise(rb_eArgError, "offset outside of string"); + + s = RSTRING_PTR(str); send = s + len; - p = rb_str2cstr(fmt, &len); - pend = p + len; + s += offset; - ary = rb_ary_new(); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); + +#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type)) + + ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { + int explicit_endian = 0; + const char type = *p++; #ifdef NATINT_PACK - natint = 0; + int natint = 0; /* native integer */ #endif - star = 0; - type = *p++; - if (*p == '_' || *p == '!') { - char *natstr = "sSiIlL"; + int star = 0; - if (strchr(natstr, type)) { -#ifdef NATINT_PACK - natint = 1; -#endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - } - if (p >= pend) - len = 1; - else if (*p == '*') { - star = 1; - len = send - s; - p++; - } - else if (ISDIGIT(*p)) { - len = strtoul(p, (char**)&p, 10); - } - else { - len = (type != '@'); - } - - switch (type) { - case '%': - rb_raise(rb_eArgError, "%% is not supported"); - break; - - case 'A': - if (len > send - s) len = send - s; - { - int end = len; - char *t = s + len - 1; - - while (t >= s) { - if (*t != ' ' && *t != '\0') break; - t--; len--; - } - rb_ary_push(ary, rb_str_new(s, len)); - s += end; - } - break; - - case 'Z': - if (len > send - s) len = send - s; - { - int end = len; - char *t = s + len - 1; - - while (t >= s) { - if (*t) break; - t--; len--; - } - rb_ary_push(ary, rb_str_new(s, len)); - s += end; - } - break; - - case 'a': - if (len > send - s) len = send - s; - rb_ary_push(ary, rb_str_new(s, len)); - s += len; - break; - - - case 'b': - { - VALUE bitstr; - char *t; - int bits, i; - - if (p[-1] == '*' || len > (send - s) * 8) - len = (send - s) * 8; - bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; - for (i=0; i<len; i++) { - if (i & 7) bits >>= 1; - else bits = *s++; - *t++ = (bits & 1) ? '1' : '0'; - } - } - break; - - case 'B': - { - VALUE bitstr; - char *t; - int bits, i; - - if (p[-1] == '*' || len > (send - s) * 8) - len = (send - s) * 8; - bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; - for (i=0; i<len; i++) { - if (i & 7) bits <<= 1; - else bits = *s++; - *t++ = (bits & 128) ? '1' : '0'; - } - } - break; - - case 'h': - { - VALUE bitstr; - char *t; - int bits, i; - - if (p[-1] == '*' || len > (send - s) * 2) - len = (send - s) * 2; - bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; - for (i=0; i<len; i++) { - if (i & 1) - bits >>= 4; - else - bits = *s++; - *t++ = hexdigits[bits & 15]; - } - } - break; - - case 'H': - { - VALUE bitstr; - char *t; - int bits, i; - - if (p[-1] == '*' || len > (send - s) * 2) - len = (send - s) * 2; - bits = 0; - rb_ary_push(ary, bitstr = rb_str_new(0, len)); - t = RSTRING(bitstr)->ptr; - for (i=0; i<len; i++) { - if (i & 1) - bits <<= 4; - else - bits = *s++; - *t++ = hexdigits[(bits >> 4) & 15]; - } - } - break; - - case 'c': - PACK_LENGTH_ADJUST(char,sizeof(char)); - while (len-- > 0) { - int c = *s++; - if (c > (char)127) c-=256; - rb_ary_push(ary, INT2FIX(c)); - } - PACK_ITEM_ADJUST(); - break; - - case 'C': - PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char)); - while (len-- > 0) { - unsigned char c = *s++; - rb_ary_push(ary, INT2FIX(c)); - } - PACK_ITEM_ADJUST(); - break; - - case 's': - PACK_LENGTH_ADJUST(short,2); - while (len-- > 0) { - short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(short,2)); - s += NATINT_LEN(short,2); - rb_ary_push(ary, INT2FIX(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'S': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, INT2FIX(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'i': - PACK_LENGTH_ADJUST(int,sizeof(int)); - while (len-- > 0) { - int tmp; - memcpy(&tmp, s, sizeof(int)); - s += sizeof(int); - rb_ary_push(ary, rb_int2inum(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'I': - PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int)); - while (len-- > 0) { - unsigned int tmp; - memcpy(&tmp, s, sizeof(unsigned int)); - s += sizeof(unsigned int); - rb_ary_push(ary, rb_uint2inum(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'l': - PACK_LENGTH_ADJUST(long,4); - while (len-- > 0) { - long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); - s += NATINT_LEN(long,4); - rb_ary_push(ary, rb_int2inum(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'L': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(unsigned long,4)); - s += NATINT_LEN(unsigned long,4); - rb_ary_push(ary, rb_uint2inum(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'n': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16B(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, rb_uint2inum(ntohs(tmp))); - } - PACK_ITEM_ADJUST(); - break; - - case 'N': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32B(&tmp), s, NATINT_LEN(unsigned long,4)); - s += NATINT_LEN(unsigned long,4); - rb_ary_push(ary, rb_uint2inum(ntohl(tmp))); - } - PACK_ITEM_ADJUST(); - break; - - case 'v': - PACK_LENGTH_ADJUST(unsigned short,2); - while (len-- > 0) { - unsigned short tmp = 0; - memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); - s += NATINT_LEN(unsigned short,2); - rb_ary_push(ary, rb_uint2inum(vtohs(tmp))); - } - PACK_ITEM_ADJUST(); - break; - - case 'V': - PACK_LENGTH_ADJUST(unsigned long,4); - while (len-- > 0) { - unsigned long tmp = 0; - memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); - s += NATINT_LEN(long,4); - rb_ary_push(ary, rb_uint2inum(vtohl(tmp))); - } - PACK_ITEM_ADJUST(); - break; - - case 'f': - case 'F': - PACK_LENGTH_ADJUST(float,sizeof(float)); - while (len-- > 0) { - float tmp; - memcpy(&tmp, s, sizeof(float)); - s += sizeof(float); - rb_ary_push(ary, rb_float_new((double)tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'e': - PACK_LENGTH_ADJUST(float,sizeof(float)); - while (len-- > 0) { - float tmp; - FLOAT_CONVWITH(ftmp); - - memcpy(&tmp, s, sizeof(float)); - s += sizeof(float); - tmp = VTOHF(tmp,ftmp); - rb_ary_push(ary, rb_float_new((double)tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'E': - PACK_LENGTH_ADJUST(double,sizeof(double)); - while (len-- > 0) { - double tmp; - DOUBLE_CONVWITH(dtmp); - - memcpy(&tmp, s, sizeof(double)); - s += sizeof(double); - tmp = VTOHD(tmp,dtmp); - rb_ary_push(ary, rb_float_new(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'D': - case 'd': - PACK_LENGTH_ADJUST(double,sizeof(double)); - while (len-- > 0) { - double tmp; - memcpy(&tmp, s, sizeof(double)); - s += sizeof(double); - rb_ary_push(ary, rb_float_new(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'g': - PACK_LENGTH_ADJUST(float,sizeof(float)); - while (len-- > 0) { - float tmp; - FLOAT_CONVWITH(ftmp;) - - memcpy(&tmp, s, sizeof(float)); - s += sizeof(float); - tmp = NTOHF(tmp,ftmp); - rb_ary_push(ary, rb_float_new((double)tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'G': - PACK_LENGTH_ADJUST(double,sizeof(double)); - while (len-- > 0) { - double tmp; - DOUBLE_CONVWITH(dtmp); - - memcpy(&tmp, s, sizeof(double)); - s += sizeof(double); - tmp = NTOHD(tmp,dtmp); - rb_ary_push(ary, rb_float_new(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'U': - if (len > send - s) len = send - s; - while (len-- > 0 && s < send) { - int alen; - unsigned long l; - - l = utf8_to_uv(s, &alen); - s += alen; - rb_ary_push(ary, rb_uint2inum(l)); - } - break; - - case 'u': - { - VALUE str = rb_str_new(0, (send - s)*3/4); - char *ptr = RSTRING(str)->ptr; - int total = 0; - - while (s < send && *s > ' ' && *s < 'a') { - long a,b,c,d; - char hunk[4]; - - hunk[3] = '\0'; - len = (*s++ - ' ') & 077; - total += len; - if (total > RSTRING(str)->len) { - len -= total - RSTRING(str)->len; - total = RSTRING(str)->len; - } - - while (len > 0) { - int mlen = len > 3 ? 3 : len; - - if (s < send && *s >= ' ') - a = (*s++ - ' ') & 077; - else - a = 0; - if (s < send && *s >= ' ') - b = (*s++ - ' ') & 077; - else - b = 0; - if (s < send && *s >= ' ') - c = (*s++ - ' ') & 077; - else - c = 0; - if (s < send && *s >= ' ') - d = (*s++ - ' ') & 077; - else - d = 0; - hunk[0] = a << 2 | b >> 4; - hunk[1] = b << 4 | c >> 2; - hunk[2] = c << 6 | d; - memcpy(ptr, hunk, mlen); - ptr += mlen; - len -= mlen; - } - if (*s == '\r') s++; - if (*s == '\n') s++; - else if (s < send && (s+1 == send || s[1] == '\n')) - s += 2; /* possible checksum byte */ - } - - RSTRING(str)->ptr[total] = '\0'; - RSTRING(str)->len = total; - rb_ary_push(ary, str); - } - break; - - case 'm': - { - VALUE str = rb_str_new(0, (send - s)*3/4); - char *ptr = RSTRING(str)->ptr; - int a,b,c,d; - static int first = 1; - static int b64_xtable[256]; - - if (first) { - int i; - first = 0; - - for (i = 0; i < 256; i++) { - b64_xtable[i] = -1; - } - for (i = 0; i < 64; i++) { - b64_xtable[(int)b64_table[i]] = i; - } - } - for (;;) { - while (s[0] == '\r' || s[0] == '\n') { s++; } - if ((a = b64_xtable[(int)s[0]]) == -1) break; - if ((b = b64_xtable[(int)s[1]]) == -1) break; - if ((c = b64_xtable[(int)s[2]]) == -1) break; - if ((d = b64_xtable[(int)s[3]]) == -1) break; - *ptr++ = a << 2 | b >> 4; - *ptr++ = b << 4 | c >> 2; - *ptr++ = c << 6 | d; - s += 4; - } - if (a != -1 && b != -1 && s[2] == '=') { - *ptr++ = a << 2 | b >> 4; - } - if (a != -1 && b != -1 && c != -1 && s[3] == '=') { - *ptr++ = a << 2 | b >> 4; - *ptr++ = b << 4 | c >> 2; - } - *ptr = '\0'; - RSTRING(str)->len = ptr - RSTRING(str)->ptr; - rb_ary_push(ary, str); - } - break; - - case 'M': - { - VALUE str = rb_str_new(0, send - s); - char *ptr = RSTRING(str)->ptr; - int c1, c2; - - while (s < send) { - if (*s == '=') { - if (++s == send) break; - if (*s != '\n') { - if ((c1 = hex2num(*s)) == -1) break; - if (++s == send) break; - if ((c2 = hex2num(*s)) == -1) break; - *ptr++ = c1 << 4 | c2; - } - } - else { - *ptr++ = *s; - } - s++; - } - *ptr = '\0'; - RSTRING(str)->len = ptr - RSTRING(str)->ptr; - rb_ary_push(ary, str); - } - break; - - case '@': - s = RSTRING(str)->ptr + len; - break; - - case 'X': - if (len > s - RSTRING(str)->ptr) - rb_raise(rb_eArgError, "X outside of string"); - s -= len; - break; - - case 'x': - if (len > send - s) - rb_raise(rb_eArgError, "x outside of string"); - s += len; - break; - - case 'P': - if (sizeof(char *) <= send - s) { - char *t; - VALUE str = rb_str_new(0, 0); - memcpy(&t, s, sizeof(char *)); - s += sizeof(char *); - if (t) - rb_str_cat(str, t, len); - rb_ary_push(ary, str); - } - break; - - case 'p': - if (len > (send - s) / sizeof(char *)) - len = (send - s) / sizeof(char *); - while (len-- > 0) { - if (send - s < sizeof(char *)) - break; - else { - char *t; - VALUE str = rb_str_new(0, 0); - memcpy(&t, s, sizeof(char *)); - s += sizeof(char *); - if (t) { - rb_str_cat2(str, t); - } - rb_ary_push(ary, str); - } - } - break; - - case 'w': - { - unsigned long ul = 0; - unsigned long ulmask = 0xfeL << ((sizeof(unsigned long) - 1) * 8); - - while (len > 0 && s < send) { - ul <<= 7; - ul |= (*s & 0x7f); - if (!(*s++ & 0x80)) { - rb_ary_push(ary, rb_uint2inum(ul)); - len--; - ul = 0; - } - else if (ul & ulmask) { - VALUE big = rb_uint2big(ul); - VALUE big128 = rb_uint2big(128); - while (s < send) { - big = rb_big_mul(big, big128); - big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); - if (!(*s++ & 0x80)) { - rb_ary_push(ary, big); - len--; - ul = 0; - break; - } - } - } - } - } - break; - - default: - break; - } + if (skip_blank(p, type)) continue; + p = pack_modifiers(p, type, &natint, &explicit_endian); + + if (p >= pend) + len = 1; + else if (*p == '*') { + star = 1; + len = send - s; + p++; + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (len < 0 || errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = (type != '@'); + } + + switch (type) { + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'A': + if (len > send - s) len = send - s; + { + long end = len; + char *t = s + len - 1; + + while (t >= s) { + if (*t != ' ' && *t != '\0') break; + t--; len--; + } + UNPACK_PUSH(rb_str_new(s, len)); + s += end; + } + break; + + case 'Z': + { + char *t = s; + + if (len > send-s) len = send-s; + while (t < s+len && *t) t++; + UNPACK_PUSH(rb_str_new(s, t-s)); + if (t < send) t++; + s = star ? t : s+len; + } + break; + + case 'a': + if (len > send - s) len = send - s; + UNPACK_PUSH(rb_str_new(s, len)); + s += len; + break; + + case 'b': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 7) bits >>= 1; + else bits = (unsigned char)*s++; + *t++ = (bits & 1) ? '1' : '0'; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'B': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 7) bits <<= 1; + else bits = (unsigned char)*s++; + *t++ = (bits & 128) ? '1' : '0'; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'h': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 1) + bits >>= 4; + else + bits = (unsigned char)*s++; + *t++ = hexdigits[bits & 15]; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'H': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 1) + bits <<= 4; + else + bits = (unsigned char)*s++; + *t++ = hexdigits[(bits >> 4) & 15]; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'c': + signed_p = 1; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; + + case 'C': + signed_p = 0; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; + + case 's': + signed_p = 1; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'S': + signed_p = 0; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'i': + signed_p = 1; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'I': + signed_p = 0; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'l': + signed_p = 1; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'L': + signed_p = 0; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'q': + signed_p = 1; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'Q': + signed_p = 0; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'j': + signed_p = 1; + integer_size = sizeof(intptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'J': + signed_p = 0; + integer_size = sizeof(uintptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'n': + signed_p = 0; + integer_size = 2; + bigendian_p = 1; + goto unpack_integer; + + case 'N': + signed_p = 0; + integer_size = 4; + bigendian_p = 1; + goto unpack_integer; + + case 'v': + signed_p = 0; + integer_size = 2; + bigendian_p = 0; + goto unpack_integer; + + case 'V': + signed_p = 0; + integer_size = 4; + bigendian_p = 0; + goto unpack_integer; + + unpack_integer: + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } + PACK_LENGTH_ADJUST_SIZE(integer_size); + while (len-- > 0) { + int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN; + VALUE val; + if (signed_p) + flags |= INTEGER_PACK_2COMP; + val = rb_integer_unpack(s, integer_size, 1, 0, flags); + UNPACK_PUSH(val); + s += integer_size; + } + PACK_ITEM_ADJUST(); + break; + + case 'f': + case 'F': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + float tmp; + UNPACK_FETCH(&tmp, float); + UNPACK_PUSH(DBL2NUM((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'e': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, float); + VTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); + } + PACK_ITEM_ADJUST(); + break; + + case 'E': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, double); + VTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); + } + PACK_ITEM_ADJUST(); + break; + + case 'D': + case 'd': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + double tmp; + UNPACK_FETCH(&tmp, double); + UNPACK_PUSH(DBL2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'g': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, float); + NTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); + } + PACK_ITEM_ADJUST(); + break; + + case 'G': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, double); + NTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); + } + PACK_ITEM_ADJUST(); + break; + + case 'U': + if (len > send - s) len = send - s; + while (len > 0 && s < send) { + long alen = send - s; + unsigned long l; + + l = utf8_to_uv(s, &alen); + s += alen; len--; + UNPACK_PUSH(ULONG2NUM(l)); + } + break; + + case 'u': + { + VALUE buf = rb_str_new(0, (send - s)*3/4); + char *ptr = RSTRING_PTR(buf); + long total = 0; + + while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') { + long a,b,c,d; + char hunk[3]; + + len = ((unsigned char)*s++ - ' ') & 077; + + total += len; + if (total > RSTRING_LEN(buf)) { + len -= total - RSTRING_LEN(buf); + total = RSTRING_LEN(buf); + } + + while (len > 0) { + long mlen = len > 3 ? 3 : len; + + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + a = ((unsigned char)*s++ - ' ') & 077; + else + a = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + b = ((unsigned char)*s++ - ' ') & 077; + else + b = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + c = ((unsigned char)*s++ - ' ') & 077; + else + c = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + d = ((unsigned char)*s++ - ' ') & 077; + else + d = 0; + hunk[0] = (char)(a << 2 | b >> 4); + hunk[1] = (char)(b << 4 | c >> 2); + hunk[2] = (char)(c << 6 | d); + memcpy(ptr, hunk, mlen); + ptr += mlen; + len -= mlen; + } + if (s < send && (unsigned char)*s != '\r' && *s != '\n') + s++; /* possible checksum byte */ + if (s < send && *s == '\r') s++; + if (s < send && *s == '\n') s++; + } + + rb_str_set_len(buf, total); + UNPACK_PUSH(buf); + } + break; + + case 'm': + { + VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */ + char *ptr = RSTRING_PTR(buf); + int a = -1,b = -1,c = 0,d = 0; + static signed char b64_xtable[256]; + + if (b64_xtable['/'] <= 0) { + int i; + + for (i = 0; i < 256; i++) { + b64_xtable[i] = -1; + } + for (i = 0; i < 64; i++) { + b64_xtable[(unsigned char)b64_table[i]] = (char)i; + } + } + if (len == 0) { + while (s < send) { + a = b = c = d = -1; + a = b64_xtable[(unsigned char)*s++]; + if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); + b = b64_xtable[(unsigned char)*s++]; + if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); + if (*s == '=') { + if (s + 2 == send && *(s + 1) == '=') break; + rb_raise(rb_eArgError, "invalid base64"); + } + c = b64_xtable[(unsigned char)*s++]; + if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); + if (s + 1 == send && *s == '=') break; + d = b64_xtable[(unsigned char)*s++]; + if (d == -1) rb_raise(rb_eArgError, "invalid base64"); + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + } + if (c == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); + } + else if (d == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); + } + } + else { + while (s < send) { + a = b = c = d = -1; + while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + a = -1; + } + if (a != -1 && b != -1) { + if (c == -1) + *ptr++ = castchar(a << 2 | b >> 4); + else { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + } + } + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + UNPACK_PUSH(buf); + } + break; + + case 'M': + { + VALUE buf = rb_str_new(0, send - s); + char *ptr = RSTRING_PTR(buf), *ss = s; + int csum = 0; + int c1, c2; + + while (s < send) { + if (*s == '=') { + if (++s == send) break; + if (s+1 < send && *s == '\r' && *(s+1) == '\n') + s++; + if (*s != '\n') { + if ((c1 = hex2num(*s)) == -1) break; + if (++s == send) break; + if ((c2 = hex2num(*s)) == -1) break; + csum |= *ptr++ = castchar(c1 << 4 | c2); + } + } + else { + csum |= *ptr++ = *s; + } + s++; + ss = s; + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + rb_str_buf_cat(buf, ss, send-ss); + csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum); + UNPACK_PUSH(buf); + } + break; + + case '@': + if (len > RSTRING_LEN(str)) + rb_raise(rb_eArgError, "@ outside of string"); + s = RSTRING_PTR(str) + len; + break; + + case 'X': + if (len > s - RSTRING_PTR(str)) + rb_raise(rb_eArgError, "X outside of string"); + s -= len; + break; + + case 'x': + if (len > send - s) + rb_raise(rb_eArgError, "x outside of string"); + s += len; + break; + + case 'P': + if (sizeof(char *) <= (size_t)(send - s)) { + VALUE tmp = Qnil; + char *t; + + UNPACK_FETCH(&t, char *); + if (t) { + if (!associates) associates = str_associated(str); + tmp = associated_pointer(associates, t); + if (len < RSTRING_LEN(tmp)) { + tmp = rb_str_new(t, len); + str_associate(tmp, associates); + } + } + UNPACK_PUSH(tmp); + } + break; + + case 'p': + if (len > (long)((send - s) / sizeof(char *))) + len = (send - s) / sizeof(char *); + while (len-- > 0) { + if ((size_t)(send - s) < sizeof(char *)) + break; + else { + VALUE tmp = Qnil; + char *t; + + UNPACK_FETCH(&t, char *); + if (t) { + if (!associates) associates = str_associated(str); + tmp = associated_pointer(associates, t); + } + UNPACK_PUSH(tmp); + } + } + break; + + case 'w': + { + char *s0 = s; + while (len > 0 && s < send) { + if (*s & 0x80) { + s++; + } + else { + s++; + UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN)); + len--; + s0 = s; + } + } + } + break; + + default: + unknown_directive("unpack", type, fmt); + break; + } } return ary; } -#define BYTEWIDTH 8 +static VALUE +pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) +{ + enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; + return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset)); +} -static int -uv_to_utf8(buf, uv) - char *buf; - unsigned long uv; +static VALUE +pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) +{ + return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset)); +} + +int +rb_uv_to_utf8(char buf[6], unsigned long uv) { if (uv <= 0x7f) { - buf[0] = (char)uv; - return 1; + buf[0] = (char)uv; + return 1; } if (uv <= 0x7ff) { - buf[0] = ((uv>>6)&0xff)|0xc0; - buf[1] = (uv&0x3f)|0x80; - return 2; + buf[0] = castchar(((uv>>6)&0xff)|0xc0); + buf[1] = castchar((uv&0x3f)|0x80); + return 2; } if (uv <= 0xffff) { - buf[0] = ((uv>>12)&0xff)|0xe0; - buf[1] = ((uv>>6)&0x3f)|0x80; - buf[2] = (uv&0x3f)|0x80; - return 3; + buf[0] = castchar(((uv>>12)&0xff)|0xe0); + buf[1] = castchar(((uv>>6)&0x3f)|0x80); + buf[2] = castchar((uv&0x3f)|0x80); + return 3; } if (uv <= 0x1fffff) { - buf[0] = ((uv>>18)&0xff)|0xf0; - buf[1] = ((uv>>12)&0x3f)|0x80; - buf[2] = ((uv>>6)&0x3f)|0x80; - buf[3] = (uv&0x3f)|0x80; - return 4; + buf[0] = castchar(((uv>>18)&0xff)|0xf0); + buf[1] = castchar(((uv>>12)&0x3f)|0x80); + buf[2] = castchar(((uv>>6)&0x3f)|0x80); + buf[3] = castchar((uv&0x3f)|0x80); + return 4; } if (uv <= 0x3ffffff) { - buf[0] = ((uv>>24)&0xff)|0xf8; - buf[1] = ((uv>>18)&0x3f)|0x80; - buf[2] = ((uv>>12)&0x3f)|0x80; - buf[3] = ((uv>>6)&0x3f)|0x80; - buf[4] = (uv&0x3f)|0x80; - return 5; + buf[0] = castchar(((uv>>24)&0xff)|0xf8); + buf[1] = castchar(((uv>>18)&0x3f)|0x80); + buf[2] = castchar(((uv>>12)&0x3f)|0x80); + buf[3] = castchar(((uv>>6)&0x3f)|0x80); + buf[4] = castchar((uv&0x3f)|0x80); + return 5; } if (uv <= 0x7fffffff) { - buf[0] = ((uv>>30)&0xff)|0xfc; - buf[1] = ((uv>>24)&0x3f)|0x80; - buf[2] = ((uv>>18)&0x3f)|0x80; - buf[3] = ((uv>>12)&0x3f)|0x80; - buf[4] = ((uv>>6)&0x3f)|0x80; - buf[5] = (uv&0x3f)|0x80; - return 6; + buf[0] = castchar(((uv>>30)&0xff)|0xfc); + buf[1] = castchar(((uv>>24)&0x3f)|0x80); + buf[2] = castchar(((uv>>18)&0x3f)|0x80); + buf[3] = castchar(((uv>>12)&0x3f)|0x80); + buf[4] = castchar(((uv>>6)&0x3f)|0x80); + buf[5] = castchar((uv&0x3f)|0x80); + return 6; } -#if SIZEOF_LONG > 4 - if (uv <= 0xfffffffff) { -#endif - buf[0] = 0xfe; - buf[1] = ((uv>>30)&0x3f)|0x80; - buf[2] = ((uv>>24)&0x3f)|0x80; - buf[3] = ((uv>>18)&0x3f)|0x80; - buf[4] = ((uv>>12)&0x3f)|0x80; - buf[5] = ((uv>>6)&0x3f)|0x80; - buf[6] = (uv&0x3f)|0x80; - return 7; -#if SIZEOF_LONG > 4 - } - rb_raise(rb_eArgError, "uv_to_utf8(); too big value"); -#endif + rb_raise(rb_eRangeError, "pack(U): value out of range"); + + UNREACHABLE_RETURN(Qnil); } +static const unsigned long utf8_limits[] = { + 0x0, /* 1 */ + 0x80, /* 2 */ + 0x800, /* 3 */ + 0x10000, /* 4 */ + 0x200000, /* 5 */ + 0x4000000, /* 6 */ + 0x80000000, /* 7 */ +}; + static unsigned long -utf8_to_uv(p, lenp) - char *p; - int *lenp; +utf8_to_uv(const char *p, long *lenp) { - int c = (*p++)&0xff; - unsigned long uv; - int n = 1; - - if (c < 0xc0) n = 1; - else if (c < 0xe0) n = 2; - else if (c < 0xf0) n = 3; - else if (c < 0xf8) n = 4; - else if (c < 0xfc) n = 5; - else if (c < 0xfe) n = 6; - else if (c == 0xfe) n = 7; - *lenp = n--; + int c = *p++ & 0xff; + unsigned long uv = c; + long n; - uv = c; + if (!(uv & 0x80)) { + *lenp = 1; + return uv; + } + if (!(uv & 0x40)) { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + + if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } + else { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + if (n > *lenp) { + rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", + n, *lenp); + } + *lenp = n--; if (n != 0) { - uv &= (1<<(BYTEWIDTH-2-n)) - 1; - while (n--) { - uv = uv << 6 | *p++ & ((1<<6)-1); - } + while (n--) { + c = *p++ & 0xff; + if ((c & 0xc0) != 0x80) { + *lenp -= n + 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + else { + c &= 0x3f; + uv = uv << 6 | c; + } + } + } + n = *lenp - 1; + if (uv < utf8_limits[n]) { + rb_raise(rb_eArgError, "redundant UTF-8 sequence"); } return uv; } +#include "pack.rbinc" + void -Init_pack() +Init_pack(void) { - rb_define_method(rb_cArray, "pack", pack_pack, 1); - rb_define_method(rb_cString, "unpack", pack_unpack, 1); + id_associated = rb_make_internal_id(); } |
