diff options
Diffstat (limited to 'pack.c')
-rw-r--r-- | pack.c | 2353 |
1 files changed, 1162 insertions, 1191 deletions
@@ -36,10 +36,11 @@ */ #ifdef HAVE_TRUE_LONG_LONG static const char natstr[] = "sSiIlLqQjJ"; +# define endstr natstr #else static const char natstr[] = "sSiIlLjJ"; -#endif static const char endstr[] = "sSiIlLqQjJ"; +#endif #ifdef HAVE_TRUE_LONG_LONG /* It is intentional to use long long instead of LONG_LONG. */ @@ -135,9 +136,26 @@ str_associate(VALUE str, VALUE add) static VALUE str_associated(VALUE str) { - return rb_ivar_lookup(str, id_associated, Qfalse); + VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse); + if (!associates) + rb_raise(rb_eArgError, "no associated pointer"); + return associates; } +static VALUE +associated_pointer(VALUE associates, const char *t) +{ + const VALUE *p = RARRAY_CONST_PTR(associates); + const VALUE *pend = p + RARRAY_LEN(associates); + for (; p < pend; p++) { + VALUE tmp = *p; + if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp; + } + rb_raise(rb_eArgError, "non associated pointer"); + UNREACHABLE_RETURN(Qnil); +} + +RBIMPL_ATTR_NORETURN() static void unknown_directive(const char *mode, char type, VALUE fmt) { @@ -151,8 +169,8 @@ unknown_directive(const char *mode, char type, VALUE fmt) snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); } fmt = rb_str_quote_unprintable(fmt); - rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'", - mode, unknown, fmt); + rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'", + mode, unknown, fmt); } static float @@ -192,16 +210,18 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) int integer_size, bigendian_p; StringValue(fmt); + rb_must_asciicompat(fmt); p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); if (NIL_P(buffer)) { - res = rb_str_buf_new(0); + res = rb_str_buf_new(0); } else { if (!RB_TYPE_P(buffer, T_STRING)) rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); - res = buffer; + rb_str_modify(buffer); + res = buffer; } idx = 0; @@ -212,329 +232,313 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) while (p < pend) { - int explicit_endian = 0; - if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { - rb_raise(rb_eRuntimeError, "format string modified"); - } - type = *p++; /* get data type */ + int explicit_endian = 0; + if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { + rb_raise(rb_eRuntimeError, "format string modified"); + } + type = *p++; /* get data type */ #ifdef NATINT_PACK - natint = 0; + natint = 0; #endif - if (ISSPACE(type)) continue; - if (type == '#') { - while ((p < pend) && (*p != '\n')) { - p++; - } - continue; - } + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } - { + { modifiers: - switch (*p) { - case '_': - case '!': - if (strchr(natstr, type)) { + switch (*p) { + case '_': + case '!': + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - goto modifiers; - - case '<': - case '>': - if (!strchr(endstr, type)) { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); - } - if (explicit_endian) { - rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); - } - explicit_endian = *p++; - goto modifiers; - } - } - - if (*p == '*') { /* set data length */ - len = strchr("@Xxu", type) ? 0 + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; + } + } + + if (*p == '*') { /* set data length */ + len = strchr("@Xxu", type) ? 0 : strchr("PMm", type) ? 1 : RARRAY_LEN(ary) - idx; - p++; - } - else if (ISDIGIT(*p)) { - errno = 0; - len = STRTOUL(p, (char**)&p, 10); - if (errno) { - rb_raise(rb_eRangeError, "pack length too big"); - } - } - else { - len = 1; - } - - switch (type) { - case 'U': - /* if encoding is US-ASCII, upgrade to UTF-8 */ - if (enc_info == 1) enc_info = 2; - break; - case 'm': case 'M': case 'u': - /* keep US-ASCII (do nothing) */ - break; - default: - /* fall back to BINARY */ - enc_info = 0; - break; - } - switch (type) { - case 'A': case 'a': case 'Z': - case 'B': case 'b': - case 'H': case 'h': - from = NEXTFROM; - if (NIL_P(from)) { - ptr = ""; - plen = 0; - } - else { - StringValue(from); - ptr = RSTRING_PTR(from); - plen = RSTRING_LEN(from); - } - - if (p[-1] == '*') - len = plen; - - switch (type) { - case 'a': /* arbitrary binary string (null padded) */ - case 'A': /* arbitrary binary string (ASCII space padded) */ - case 'Z': /* null terminated string */ - if (plen >= len) { - rb_str_buf_cat(res, ptr, len); - if (p[-1] == '*' && type == 'Z') - rb_str_buf_cat(res, nul10, 1); - } - else { - rb_str_buf_cat(res, ptr, plen); - len -= plen; - while (len >= 10) { - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); - } - break; + p++; + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = 1; + } + + switch (type) { + case 'U': + /* if encoding is US-ASCII, upgrade to UTF-8 */ + if (enc_info == 1) enc_info = 2; + break; + case 'm': case 'M': case 'u': + /* keep US-ASCII (do nothing) */ + break; + default: + /* fall back to BINARY */ + enc_info = 0; + break; + } + switch (type) { + case 'A': case 'a': case 'Z': + case 'B': case 'b': + case 'H': case 'h': + from = NEXTFROM; + if (NIL_P(from)) { + ptr = ""; + plen = 0; + } + else { + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + } + + if (p[-1] == '*') + len = plen; + + switch (type) { + case 'a': /* arbitrary binary string (null padded) */ + case 'A': /* arbitrary binary string (ASCII space padded) */ + case 'Z': /* null terminated string */ + if (plen >= len) { + rb_str_buf_cat(res, ptr, len); + if (p[-1] == '*' && type == 'Z') + rb_str_buf_cat(res, nul10, 1); + } + else { + rb_str_buf_cat(res, ptr, plen); + len -= plen; + while (len >= 10) { + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); + } + break; #define castchar(from) (char)((from) & 0xff) - case 'b': /* bit string (ascending) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (*ptr & 1) - byte |= 128; - if (i & 7) - byte >>= 1; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte >>= 7 - (len & 7); - c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'B': /* bit string (descending) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - byte |= *ptr & 1; - if (i & 7) - byte <<= 1; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte <<= 7 - (len & 7); - c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'h': /* hex string (low nibble first) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len + 1) / 2 - (plen + 1) / 2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= (((*ptr & 15) + 9) & 15) << 4; - else - byte |= (*ptr & 15) << 4; - if (i & 1) - byte >>= 4; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'H': /* hex string (high nibble first) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len + 1) / 2 - (plen + 1) / 2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= ((*ptr & 15) + 9) & 15; - else - byte |= *ptr & 15; - if (i & 1) - byte <<= 4; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - } - break; - - case 'c': /* signed char */ - case 'C': /* unsigned char */ + case 'b': /* bit string (ascending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (*ptr & 1) + byte |= 128; + if (i & 7) + byte >>= 1; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte >>= 7 - (len & 7); + c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'B': /* bit string (descending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + byte |= *ptr & 1; + if (i & 7) + byte <<= 1; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte <<= 7 - (len & 7); + c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'h': /* hex string (low nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= (((*ptr & 15) + 9) & 15) << 4; + else + byte |= (*ptr & 15) << 4; + if (i & 1) + byte >>= 4; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'H': /* hex string (high nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= ((*ptr & 15) + 9) & 15; + else + byte |= *ptr & 15; + if (i & 1) + byte <<= 4; + else { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = castchar(byte); + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + } + break; + + case 'c': /* signed char */ + case 'C': /* unsigned char */ integer_size = 1; bigendian_p = BIGENDIAN_P(); /* not effective */ goto pack_integer; - case 's': /* s for int16_t, s! for signed short */ + case 's': /* s for int16_t, s! for signed short */ + case 'S': /* S for uint16_t, S! for unsigned short */ integer_size = NATINT_LEN(short, 2); bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'S': /* S for uint16_t, S! for unsigned short */ - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'i': /* i and i! for signed int */ - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'I': /* I and I! for unsigned int */ + case 'i': /* i and i! for signed int */ + case 'I': /* I and I! for unsigned int */ integer_size = (int)sizeof(int); bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'l': /* l for int32_t, l! for signed long */ + case 'l': /* l for int32_t, l! for signed long */ + case 'L': /* L for uint32_t, L! for unsigned long */ integer_size = NATINT_LEN(long, 4); bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'L': /* L for uint32_t, L! for unsigned long */ - integer_size = NATINT_LEN(long, 4); + case 'q': /* q for int64_t, q! for signed long long */ + case 'Q': /* Q for uint64_t, Q! for unsigned long long */ + integer_size = NATINT_LEN_Q; bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'q': /* q for int64_t, q! for signed long long */ - integer_size = NATINT_LEN_Q; + case 'j': /* j for intptr_t */ + integer_size = sizeof(intptr_t); bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'Q': /* Q for uint64_t, Q! for unsigned long long */ - integer_size = NATINT_LEN_Q; + case 'J': /* J for uintptr_t */ + integer_size = sizeof(uintptr_t); bigendian_p = BIGENDIAN_P(); goto pack_integer; - case 'j': /* j for intptr_t */ - integer_size = sizeof(intptr_t); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'J': /* J for uintptr_t */ - integer_size = sizeof(uintptr_t); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ + case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ integer_size = 2; bigendian_p = 1; goto pack_integer; - case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ + case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ integer_size = 4; bigendian_p = 1; goto pack_integer; - case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ + case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ integer_size = 2; bigendian_p = 0; goto pack_integer; - case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ + case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ integer_size = 4; bigendian_p = 0; goto pack_integer; pack_integer: - if (explicit_endian) { - bigendian_p = explicit_endian == '>'; - } + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } if (integer_size > MAX_INTEGER_PACK_SIZE) - rb_bug("unexpected intger size for pack: %d", integer_size); + rb_bug("unexpected integer size for pack: %d", integer_size); while (len-- > 0) { char intbuf[MAX_INTEGER_PACK_SIZE]; @@ -544,192 +548,192 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN)); rb_str_buf_cat(res, intbuf, integer_size); } - break; + break; - case 'f': /* single precision float in native format */ - case 'F': /* ditto */ - while (len-- > 0) { - float f; + case 'f': /* single precision float in native format */ + case 'F': /* ditto */ + while (len-- > 0) { + float f; - from = NEXTFROM; + from = NEXTFROM; f = VALUE_to_float(from); - rb_str_buf_cat(res, (char*)&f, sizeof(float)); - } - break; + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; - case 'e': /* single precision float in VAX byte-order */ - while (len-- > 0) { - FLOAT_CONVWITH(tmp); + case 'e': /* single precision float in VAX byte-order */ + while (len-- > 0) { + FLOAT_CONVWITH(tmp); - from = NEXTFROM; + from = NEXTFROM; tmp.f = VALUE_to_float(from); - HTOVF(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(float)); - } - break; - - case 'E': /* double precision float in VAX byte-order */ - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - from = NEXTFROM; - tmp.d = RFLOAT_VALUE(rb_to_float(from)); - HTOVD(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(double)); - } - break; - - case 'd': /* double precision float in native format */ - case 'D': /* ditto */ - while (len-- > 0) { - double d; - - from = NEXTFROM; - d = RFLOAT_VALUE(rb_to_float(from)); - rb_str_buf_cat(res, (char*)&d, sizeof(double)); - } - break; - - case 'g': /* single precision float in network byte-order */ - while (len-- > 0) { - FLOAT_CONVWITH(tmp); - from = NEXTFROM; + HTOVF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); + } + break; + + case 'E': /* double precision float in VAX byte-order */ + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + from = NEXTFROM; + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOVD(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); + } + break; + + case 'd': /* double precision float in native format */ + case 'D': /* ditto */ + while (len-- > 0) { + double d; + + from = NEXTFROM; + d = RFLOAT_VALUE(rb_to_float(from)); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'g': /* single precision float in network byte-order */ + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + from = NEXTFROM; tmp.f = VALUE_to_float(from); - HTONF(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(float)); - } - break; - - case 'G': /* double precision float in network byte-order */ - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - - from = NEXTFROM; - tmp.d = RFLOAT_VALUE(rb_to_float(from)); - HTOND(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(double)); - } - break; - - case 'x': /* null byte */ - grow: - while (len >= 10) { - rb_str_buf_cat(res, nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, nul10, len); - break; - - case 'X': /* back up byte */ - shrink: - plen = RSTRING_LEN(res); - if (plen < len) - rb_raise(rb_eArgError, "X outside of string"); - rb_str_set_len(res, plen - len); - break; - - case '@': /* null fill to absolute position */ - len -= RSTRING_LEN(res); - if (len > 0) goto grow; - len = -len; - if (len > 0) goto shrink; - break; - - case '%': - rb_raise(rb_eArgError, "%% is not supported"); - break; - - case 'U': /* Unicode character */ - while (len-- > 0) { - SIGNED_VALUE l; - char buf[8]; - int le; - - from = NEXTFROM; - from = rb_to_int(from); - l = NUM2LONG(from); - if (l < 0) { - rb_raise(rb_eRangeError, "pack(U): value out of range"); - } - le = rb_uv_to_utf8(buf, l); - rb_str_buf_cat(res, (char*)buf, le); - } - break; - - case 'u': /* uuencoded string */ - case 'm': /* base64 encoded string */ - from = NEXTFROM; - StringValue(from); - ptr = RSTRING_PTR(from); - plen = RSTRING_LEN(from); - - if (len == 0 && type == 'm') { - encodes(res, ptr, plen, type, 0); - ptr += plen; - break; - } - if (len <= 2) - len = 45; - else if (len > 63 && type == 'u') - len = 63; - else - len = len / 3 * 3; - while (plen > 0) { - long todo; - - if (plen > len) - todo = len; - else - todo = plen; - encodes(res, ptr, todo, type, 1); - plen -= todo; - ptr += todo; - } - break; - - case 'M': /* quoted-printable encoded string */ - from = rb_obj_as_string(NEXTFROM); - if (len <= 1) - len = 72; - qpencode(res, from, len); - break; - - case 'P': /* pointer to packed byte string */ - from = THISFROM; - if (!NIL_P(from)) { - StringValue(from); - if (RSTRING_LEN(from) < len) { - rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", - RSTRING_LEN(from), len); - } - } - len = 1; - /* FALL THROUGH */ - case 'p': /* pointer to string */ - while (len-- > 0) { - char *t; - from = NEXTFROM; - if (NIL_P(from)) { - t = 0; - } - else { - t = StringValuePtr(from); - } - if (!associates) { - associates = rb_ary_new(); - } - rb_ary_push(associates, from); - rb_str_buf_cat(res, (char*)&t, sizeof(char*)); - } - break; - - case 'w': /* BER compressed integer */ - while (len-- > 0) { - VALUE buf = rb_str_new(0, 0); + HTONF(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(float)); + } + break; + + case 'G': /* double precision float in network byte-order */ + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + + from = NEXTFROM; + tmp.d = RFLOAT_VALUE(rb_to_float(from)); + HTOND(tmp); + rb_str_buf_cat(res, tmp.buf, sizeof(double)); + } + break; + + case 'x': /* null byte */ + grow: + while (len >= 10) { + rb_str_buf_cat(res, nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, nul10, len); + break; + + case 'X': /* back up byte */ + shrink: + plen = RSTRING_LEN(res); + if (plen < len) + rb_raise(rb_eArgError, "X outside of string"); + rb_str_set_len(res, plen - len); + break; + + case '@': /* null fill to absolute position */ + len -= RSTRING_LEN(res); + if (len > 0) goto grow; + len = -len; + if (len > 0) goto shrink; + break; + + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'U': /* Unicode character */ + while (len-- > 0) { + SIGNED_VALUE l; + char buf[8]; + int le; + + from = NEXTFROM; + from = rb_to_int(from); + l = NUM2LONG(from); + if (l < 0) { + rb_raise(rb_eRangeError, "pack(U): value out of range"); + } + le = rb_uv_to_utf8(buf, l); + rb_str_buf_cat(res, (char*)buf, le); + } + break; + + case 'u': /* uuencoded string */ + case 'm': /* base64 encoded string */ + from = NEXTFROM; + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + + if (len == 0 && type == 'm') { + encodes(res, ptr, plen, type, 0); + ptr += plen; + break; + } + if (len <= 2) + len = 45; + else if (len > 63 && type == 'u') + len = 63; + else + len = len / 3 * 3; + while (plen > 0) { + long todo; + + if (plen > len) + todo = len; + else + todo = plen; + encodes(res, ptr, todo, type, 1); + plen -= todo; + ptr += todo; + } + break; + + case 'M': /* quoted-printable encoded string */ + from = rb_obj_as_string(NEXTFROM); + if (len <= 1) + len = 72; + qpencode(res, from, len); + break; + + case 'P': /* pointer to packed byte string */ + from = THISFROM; + if (!NIL_P(from)) { + StringValue(from); + if (RSTRING_LEN(from) < len) { + rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", + RSTRING_LEN(from), len); + } + } + len = 1; + /* FALL THROUGH */ + case 'p': /* pointer to string */ + while (len-- > 0) { + char *t; + from = NEXTFROM; + if (NIL_P(from)) { + t = 0; + } + else { + t = StringValuePtr(from); + } + if (!associates) { + associates = rb_ary_new(); + } + rb_ary_push(associates, from); + rb_str_buf_cat(res, (char*)&t, sizeof(char*)); + } + break; + + case 'w': /* BER compressed integer */ + while (len-- > 0) { + VALUE buf = rb_str_new(0, 0); size_t numbytes; int sign; char *cp; - from = NEXTFROM; + from = NEXTFROM; from = rb_to_int(from); numbytes = rb_absint_numwords(from, 7, NULL); if (numbytes == 0) @@ -751,29 +755,29 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) } rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); - } - break; + } + break; - default: { + default: { unknown_directive("pack", type, fmt); - break; - } - } + break; + } + } } if (associates) { - str_associate(res, associates); + str_associate(res, associates); } switch (enc_info) { case 1: - ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); - break; + ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + break; case 2: - rb_enc_set_index(res, rb_utf8_encindex()); - break; + rb_enc_set_index(res, rb_utf8_encindex()); + break; default: - /* do nothing, keep ASCII-8BIT */ - break; + /* do nothing, keep ASCII-8BIT */ + break; } return res; } @@ -794,11 +798,11 @@ encodes(VALUE str, const char *s0, long len, int type, int tail_lf) const unsigned char *s = (const unsigned char *)s0; if (type == 'u') { - buff[i++] = (char)len + ' '; - padding = '`'; + buff[i++] = (char)len + ' '; + padding = '`'; } else { - padding = '='; + padding = '='; } while (len >= input_unit) { while (len >= input_unit && buff_size-i >= encoded_unit) { @@ -816,16 +820,16 @@ encodes(VALUE str, const char *s0, long len, int type, int tail_lf) } if (len == 2) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; - buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; - buff[i++] = padding; + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; + buff[i++] = padding; } else if (len == 1) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; - buff[i++] = padding; - buff[i++] = padding; + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; + buff[i++] = padding; + buff[i++] = padding; } if (tail_lf) buff[i++] = '\n'; rb_str_buf_cat(str, buff, i); @@ -844,46 +848,46 @@ qpencode(VALUE str, VALUE from, long len) while (s < send) { if ((*s > 126) || - (*s < 32 && *s != '\n' && *s != '\t') || - (*s == '=')) { - buff[i++] = '='; - buff[i++] = hex_table[*s >> 4]; - buff[i++] = hex_table[*s & 0x0f]; + (*s < 32 && *s != '\n' && *s != '\t') || + (*s == '=')) { + buff[i++] = '='; + buff[i++] = hex_table[*s >> 4]; + buff[i++] = hex_table[*s & 0x0f]; n += 3; prev = EOF; } - else if (*s == '\n') { + else if (*s == '\n') { if (prev == ' ' || prev == '\t') { - buff[i++] = '='; - buff[i++] = *s; + buff[i++] = '='; + buff[i++] = *s; } - buff[i++] = *s; + buff[i++] = *s; n = 0; prev = *s; } - else { - buff[i++] = *s; + else { + buff[i++] = *s; n++; prev = *s; } if (n > len) { - buff[i++] = '='; - buff[i++] = '\n'; + buff[i++] = '='; + buff[i++] = '\n'; n = 0; prev = '\n'; } - if (i > 1024 - 5) { - rb_str_buf_cat(str, buff, i); - i = 0; - } - s++; + if (i > 1024 - 5) { + rb_str_buf_cat(str, buff, i); + i = 0; + } + s++; } if (n > 0) { - buff[i++] = '='; - buff[i++] = '\n'; + buff[i++] = '='; + buff[i++] = '\n'; } if (i > 0) { - rb_str_buf_cat(str, buff, i); + rb_str_buf_cat(str, buff, i); } } @@ -901,15 +905,15 @@ hex2num(char c) tmp_len = 0; \ if (len > (long)((send-s)/(sz))) { \ if (!star) { \ - tmp_len = len-(send-s)/(sz); \ + tmp_len = len-(send-s)/(sz); \ } \ - len = (send-s)/(sz); \ + len = (send-s)/(sz); \ } \ } while (0) #define PACK_ITEM_ADJUST() do { \ if (tmp_len > 0 && mode == UNPACK_ARRAY) \ - rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ + rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ } while (0) /* Workaround for Oracle Developer Studio (Oracle Solaris Studio) @@ -922,18 +926,19 @@ hex2num(char c) # define AVOID_CC_BUG #endif -/* unpack mode */ -#define UNPACK_ARRAY 0 -#define UNPACK_BLOCK 1 -#define UNPACK_1 2 +enum unpack_mode { + UNPACK_ARRAY, + UNPACK_BLOCK, + UNPACK_1 +}; static VALUE -pack_unpack_internal(VALUE str, VALUE fmt, int mode) +pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset) { #define hexdigits ruby_hexdigits char *s, *send; char *p, *pend; - VALUE ary; + VALUE ary, associates = Qfalse; char type; long len; AVOID_CC_BUG long tmp_len; @@ -943,316 +948,326 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode) #endif int signed_p, integer_size, bigendian_p; #define UNPACK_PUSH(item) do {\ - VALUE item_val = (item);\ - if ((mode) == UNPACK_BLOCK) {\ - rb_yield(item_val);\ - }\ - else if ((mode) == UNPACK_ARRAY) {\ - rb_ary_push(ary, item_val);\ - }\ - else /* if ((mode) == UNPACK_1) { */ {\ - return item_val; \ - }\ + VALUE item_val = (item);\ + if ((mode) == UNPACK_BLOCK) {\ + rb_yield(item_val);\ + }\ + else if ((mode) == UNPACK_ARRAY) {\ + rb_ary_push(ary, item_val);\ + }\ + else /* if ((mode) == UNPACK_1) { */ {\ + return item_val; \ + }\ } while (0) StringValue(str); StringValue(fmt); + rb_must_asciicompat(fmt); + + if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative"); + len = RSTRING_LEN(str); + if (offset > len) rb_raise(rb_eArgError, "offset outside of string"); + s = RSTRING_PTR(str); - send = s + RSTRING_LEN(str); + send = s + len; + s += offset; + p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); +#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type)) + ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { - int explicit_endian = 0; - type = *p++; + int explicit_endian = 0; + type = *p++; #ifdef NATINT_PACK - natint = 0; + natint = 0; #endif - if (ISSPACE(type)) continue; - if (type == '#') { - while ((p < pend) && (*p != '\n')) { - p++; - } - continue; - } + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } - star = 0; - { + star = 0; + { modifiers: - switch (*p) { - case '_': - case '!': + switch (*p) { + case '_': + case '!': - if (strchr(natstr, type)) { + if (strchr(natstr, type)) { #ifdef NATINT_PACK - natint = 1; + natint = 1; #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - goto modifiers; - - case '<': - case '>': - if (!strchr(endstr, type)) { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); - } - if (explicit_endian) { - rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); - } - explicit_endian = *p++; - goto modifiers; - } - } - - if (p >= pend) - len = 1; - else if (*p == '*') { - star = 1; - len = send - s; - p++; - } - else if (ISDIGIT(*p)) { - errno = 0; - len = STRTOUL(p, (char**)&p, 10); - if (len < 0 || errno) { - rb_raise(rb_eRangeError, "pack length too big"); - } - } - else { - len = (type != '@'); - } - - switch (type) { - case '%': - rb_raise(rb_eArgError, "%% is not supported"); - break; - - case 'A': - if (len > send - s) len = send - s; - { - long end = len; - char *t = s + len - 1; - - while (t >= s) { - if (*t != ' ' && *t != '\0') break; - t--; len--; - } + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + goto modifiers; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + explicit_endian = *p++; + goto modifiers; + } + } + + if (p >= pend) + len = 1; + else if (*p == '*') { + star = 1; + len = send - s; + p++; + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (len < 0 || errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = (type != '@'); + } + + switch (type) { + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'A': + if (len > send - s) len = send - s; + { + long end = len; + char *t = s + len - 1; + + while (t >= s) { + if (*t != ' ' && *t != '\0') break; + t--; len--; + } UNPACK_PUSH(rb_str_new(s, len)); - s += end; - } - break; + s += end; + } + break; - case 'Z': - { - char *t = s; + case 'Z': + { + char *t = s; - if (len > send-s) len = send-s; - while (t < s+len && *t) t++; + if (len > send-s) len = send-s; + while (t < s+len && *t) t++; UNPACK_PUSH(rb_str_new(s, t-s)); - if (t < send) t++; - s = star ? t : s+len; - } - break; + if (t < send) t++; + s = star ? t : s+len; + } + break; - case 'a': - if (len > send - s) len = send - s; + case 'a': + if (len > send - s) len = send - s; UNPACK_PUSH(rb_str_new(s, len)); - s += len; - break; - - case 'b': - { - VALUE bitstr; - char *t; - int bits; - long i; - - if (p[-1] == '*' || len > (send - s) * 8) - len = (send - s) * 8; - bits = 0; - bitstr = rb_usascii_str_new(0, len); - t = RSTRING_PTR(bitstr); - for (i=0; i<len; i++) { - if (i & 7) bits >>= 1; - else bits = (unsigned char)*s++; - *t++ = (bits & 1) ? '1' : '0'; - } - UNPACK_PUSH(bitstr); - } - break; - - case 'B': - { - VALUE bitstr; - char *t; - int bits; - long i; - - if (p[-1] == '*' || len > (send - s) * 8) - len = (send - s) * 8; - bits = 0; - bitstr = rb_usascii_str_new(0, len); - t = RSTRING_PTR(bitstr); - for (i=0; i<len; i++) { - if (i & 7) bits <<= 1; - else bits = (unsigned char)*s++; - *t++ = (bits & 128) ? '1' : '0'; - } - UNPACK_PUSH(bitstr); - } - break; - - case 'h': - { - VALUE bitstr; - char *t; - int bits; - long i; - - if (p[-1] == '*' || len > (send - s) * 2) - len = (send - s) * 2; - bits = 0; - bitstr = rb_usascii_str_new(0, len); - t = RSTRING_PTR(bitstr); - for (i=0; i<len; i++) { - if (i & 1) - bits >>= 4; - else - bits = (unsigned char)*s++; - *t++ = hexdigits[bits & 15]; - } - UNPACK_PUSH(bitstr); - } - break; - - case 'H': - { - VALUE bitstr; - char *t; - int bits; - long i; - - if (p[-1] == '*' || len > (send - s) * 2) - len = (send - s) * 2; - bits = 0; - bitstr = rb_usascii_str_new(0, len); - t = RSTRING_PTR(bitstr); - for (i=0; i<len; i++) { - if (i & 1) - bits <<= 4; - else - bits = (unsigned char)*s++; - *t++ = hexdigits[(bits >> 4) & 15]; - } - UNPACK_PUSH(bitstr); - } - break; - - case 'c': - signed_p = 1; - integer_size = 1; - bigendian_p = BIGENDIAN_P(); /* not effective */ - goto unpack_integer; - - case 'C': - signed_p = 0; - integer_size = 1; - bigendian_p = BIGENDIAN_P(); /* not effective */ - goto unpack_integer; - - case 's': - signed_p = 1; - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'S': - signed_p = 0; - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'i': - signed_p = 1; - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'I': - signed_p = 0; - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'l': - signed_p = 1; - integer_size = NATINT_LEN(long, 4); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'L': - signed_p = 0; - integer_size = NATINT_LEN(long, 4); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'q': - signed_p = 1; - integer_size = NATINT_LEN_Q; - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'Q': - signed_p = 0; - integer_size = NATINT_LEN_Q; - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'j': - signed_p = 1; - integer_size = sizeof(intptr_t); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'J': - signed_p = 0; - integer_size = sizeof(uintptr_t); - bigendian_p = BIGENDIAN_P(); - goto unpack_integer; - - case 'n': - signed_p = 0; - integer_size = 2; - bigendian_p = 1; - goto unpack_integer; - - case 'N': - signed_p = 0; - integer_size = 4; - bigendian_p = 1; - goto unpack_integer; - - case 'v': - signed_p = 0; - integer_size = 2; - bigendian_p = 0; - goto unpack_integer; - - case 'V': - signed_p = 0; - integer_size = 4; - bigendian_p = 0; - goto unpack_integer; - - unpack_integer: - if (explicit_endian) { - bigendian_p = explicit_endian == '>'; - } + s += len; + break; + + case 'b': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 7) bits >>= 1; + else bits = (unsigned char)*s++; + *t++ = (bits & 1) ? '1' : '0'; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'B': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 7) bits <<= 1; + else bits = (unsigned char)*s++; + *t++ = (bits & 128) ? '1' : '0'; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'h': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 1) + bits >>= 4; + else + bits = (unsigned char)*s++; + *t++ = hexdigits[bits & 15]; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'H': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + bitstr = rb_usascii_str_new(0, len); + t = RSTRING_PTR(bitstr); + for (i=0; i<len; i++) { + if (i & 1) + bits <<= 4; + else + bits = (unsigned char)*s++; + *t++ = hexdigits[(bits >> 4) & 15]; + } + UNPACK_PUSH(bitstr); + } + break; + + case 'c': + signed_p = 1; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; + + case 'C': + signed_p = 0; + integer_size = 1; + bigendian_p = BIGENDIAN_P(); /* not effective */ + goto unpack_integer; + + case 's': + signed_p = 1; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'S': + signed_p = 0; + integer_size = NATINT_LEN(short, 2); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'i': + signed_p = 1; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'I': + signed_p = 0; + integer_size = (int)sizeof(int); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'l': + signed_p = 1; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'L': + signed_p = 0; + integer_size = NATINT_LEN(long, 4); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'q': + signed_p = 1; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'Q': + signed_p = 0; + integer_size = NATINT_LEN_Q; + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'j': + signed_p = 1; + integer_size = sizeof(intptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'J': + signed_p = 0; + integer_size = sizeof(uintptr_t); + bigendian_p = BIGENDIAN_P(); + goto unpack_integer; + + case 'n': + signed_p = 0; + integer_size = 2; + bigendian_p = 1; + goto unpack_integer; + + case 'N': + signed_p = 0; + integer_size = 4; + bigendian_p = 1; + goto unpack_integer; + + case 'v': + signed_p = 0; + integer_size = 2; + bigendian_p = 0; + goto unpack_integer; + + case 'V': + signed_p = 0; + integer_size = 4; + bigendian_p = 0; + goto unpack_integer; + + unpack_integer: + if (explicit_endian) { + bigendian_p = explicit_endian == '>'; + } PACK_LENGTH_ADJUST_SIZE(integer_size); while (len-- > 0) { int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN; @@ -1266,355 +1281,311 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode) PACK_ITEM_ADJUST(); break; - case 'f': - case 'F': - PACK_LENGTH_ADJUST_SIZE(sizeof(float)); - while (len-- > 0) { - float tmp; - memcpy(&tmp, s, sizeof(float)); - s += sizeof(float); - UNPACK_PUSH(DBL2NUM((double)tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'e': - PACK_LENGTH_ADJUST_SIZE(sizeof(float)); - while (len-- > 0) { - FLOAT_CONVWITH(tmp); - memcpy(tmp.buf, s, sizeof(float)); - s += sizeof(float); - VTOHF(tmp); - UNPACK_PUSH(DBL2NUM(tmp.f)); - } - PACK_ITEM_ADJUST(); - break; - - case 'E': - PACK_LENGTH_ADJUST_SIZE(sizeof(double)); - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - memcpy(tmp.buf, s, sizeof(double)); - s += sizeof(double); - VTOHD(tmp); - UNPACK_PUSH(DBL2NUM(tmp.d)); - } - PACK_ITEM_ADJUST(); - break; - - case 'D': - case 'd': - PACK_LENGTH_ADJUST_SIZE(sizeof(double)); - while (len-- > 0) { - double tmp; - memcpy(&tmp, s, sizeof(double)); - s += sizeof(double); - UNPACK_PUSH(DBL2NUM(tmp)); - } - PACK_ITEM_ADJUST(); - break; - - case 'g': - PACK_LENGTH_ADJUST_SIZE(sizeof(float)); - while (len-- > 0) { - FLOAT_CONVWITH(tmp); - memcpy(tmp.buf, s, sizeof(float)); - s += sizeof(float); - NTOHF(tmp); - UNPACK_PUSH(DBL2NUM(tmp.f)); - } - PACK_ITEM_ADJUST(); - break; - - case 'G': - PACK_LENGTH_ADJUST_SIZE(sizeof(double)); - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - memcpy(tmp.buf, s, sizeof(double)); - s += sizeof(double); - NTOHD(tmp); - UNPACK_PUSH(DBL2NUM(tmp.d)); - } - PACK_ITEM_ADJUST(); - break; - - case 'U': - if (len > send - s) len = send - s; - while (len > 0 && s < send) { - long alen = send - s; - unsigned long l; - - l = utf8_to_uv(s, &alen); - s += alen; len--; - UNPACK_PUSH(ULONG2NUM(l)); - } - break; - - case 'u': - { + case 'f': + case 'F': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + float tmp; + UNPACK_FETCH(&tmp, float); + UNPACK_PUSH(DBL2NUM((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'e': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, float); + VTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); + } + PACK_ITEM_ADJUST(); + break; + + case 'E': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, double); + VTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); + } + PACK_ITEM_ADJUST(); + break; + + case 'D': + case 'd': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + double tmp; + UNPACK_FETCH(&tmp, double); + UNPACK_PUSH(DBL2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'g': + PACK_LENGTH_ADJUST_SIZE(sizeof(float)); + while (len-- > 0) { + FLOAT_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, float); + NTOHF(tmp); + UNPACK_PUSH(DBL2NUM(tmp.f)); + } + PACK_ITEM_ADJUST(); + break; + + case 'G': + PACK_LENGTH_ADJUST_SIZE(sizeof(double)); + while (len-- > 0) { + DOUBLE_CONVWITH(tmp); + UNPACK_FETCH(tmp.buf, double); + NTOHD(tmp); + UNPACK_PUSH(DBL2NUM(tmp.d)); + } + PACK_ITEM_ADJUST(); + break; + + case 'U': + if (len > send - s) len = send - s; + while (len > 0 && s < send) { + long alen = send - s; + unsigned long l; + + l = utf8_to_uv(s, &alen); + s += alen; len--; + UNPACK_PUSH(ULONG2NUM(l)); + } + break; + + case 'u': + { VALUE buf = rb_str_new(0, (send - s)*3/4); - char *ptr = RSTRING_PTR(buf); - long total = 0; - - while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') { - long a,b,c,d; - char hunk[3]; - - len = ((unsigned char)*s++ - ' ') & 077; - - total += len; - if (total > RSTRING_LEN(buf)) { - len -= total - RSTRING_LEN(buf); - total = RSTRING_LEN(buf); - } - - while (len > 0) { - long mlen = len > 3 ? 3 : len; - - if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') - a = ((unsigned char)*s++ - ' ') & 077; - else - a = 0; - if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') - b = ((unsigned char)*s++ - ' ') & 077; - else - b = 0; - if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') - c = ((unsigned char)*s++ - ' ') & 077; - else - c = 0; - if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') - d = ((unsigned char)*s++ - ' ') & 077; - else - d = 0; - hunk[0] = (char)(a << 2 | b >> 4); - hunk[1] = (char)(b << 4 | c >> 2); - hunk[2] = (char)(c << 6 | d); - memcpy(ptr, hunk, mlen); - ptr += mlen; - len -= mlen; - } - if (s < send && (unsigned char)*s != '\r' && *s != '\n') - s++; /* possible checksum byte */ - if (s < send && *s == '\r') s++; - if (s < send && *s == '\n') s++; - } - - rb_str_set_len(buf, total); - UNPACK_PUSH(buf); - } - break; - - case 'm': - { + char *ptr = RSTRING_PTR(buf); + long total = 0; + + while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') { + long a,b,c,d; + char hunk[3]; + + len = ((unsigned char)*s++ - ' ') & 077; + + total += len; + if (total > RSTRING_LEN(buf)) { + len -= total - RSTRING_LEN(buf); + total = RSTRING_LEN(buf); + } + + while (len > 0) { + long mlen = len > 3 ? 3 : len; + + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + a = ((unsigned char)*s++ - ' ') & 077; + else + a = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + b = ((unsigned char)*s++ - ' ') & 077; + else + b = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + c = ((unsigned char)*s++ - ' ') & 077; + else + c = 0; + if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a') + d = ((unsigned char)*s++ - ' ') & 077; + else + d = 0; + hunk[0] = (char)(a << 2 | b >> 4); + hunk[1] = (char)(b << 4 | c >> 2); + hunk[2] = (char)(c << 6 | d); + memcpy(ptr, hunk, mlen); + ptr += mlen; + len -= mlen; + } + if (s < send && (unsigned char)*s != '\r' && *s != '\n') + s++; /* possible checksum byte */ + if (s < send && *s == '\r') s++; + if (s < send && *s == '\n') s++; + } + + rb_str_set_len(buf, total); + UNPACK_PUSH(buf); + } + break; + + case 'm': + { VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */ - char *ptr = RSTRING_PTR(buf); - int a = -1,b = -1,c = 0,d = 0; - static signed char b64_xtable[256]; - - if (b64_xtable['/'] <= 0) { - int i; - - for (i = 0; i < 256; i++) { - b64_xtable[i] = -1; - } - for (i = 0; i < 64; i++) { - b64_xtable[(unsigned char)b64_table[i]] = (char)i; - } - } - if (len == 0) { - while (s < send) { - a = b = c = d = -1; - a = b64_xtable[(unsigned char)*s++]; - if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); - b = b64_xtable[(unsigned char)*s++]; - if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); - if (*s == '=') { - if (s + 2 == send && *(s + 1) == '=') break; - rb_raise(rb_eArgError, "invalid base64"); - } - c = b64_xtable[(unsigned char)*s++]; - if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); - if (s + 1 == send && *s == '=') break; - d = b64_xtable[(unsigned char)*s++]; - if (d == -1) rb_raise(rb_eArgError, "invalid base64"); - *ptr++ = castchar(a << 2 | b >> 4); - *ptr++ = castchar(b << 4 | c >> 2); - *ptr++ = castchar(c << 6 | d); - } - if (c == -1) { - *ptr++ = castchar(a << 2 | b >> 4); - if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); - } - else if (d == -1) { - *ptr++ = castchar(a << 2 | b >> 4); - *ptr++ = castchar(b << 4 | c >> 2); - if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); - } - } - else { - while (s < send) { - a = b = c = d = -1; - while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} - if (s >= send) break; - s++; - while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} - if (s >= send) break; - s++; - while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} - if (*s == '=' || s >= send) break; - s++; - while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} - if (*s == '=' || s >= send) break; - s++; - *ptr++ = castchar(a << 2 | b >> 4); - *ptr++ = castchar(b << 4 | c >> 2); - *ptr++ = castchar(c << 6 | d); - a = -1; - } - if (a != -1 && b != -1) { - if (c == -1) - *ptr++ = castchar(a << 2 | b >> 4); - else { - *ptr++ = castchar(a << 2 | b >> 4); - *ptr++ = castchar(b << 4 | c >> 2); - } - } - } - rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); - UNPACK_PUSH(buf); - } - break; - - case 'M': - { + char *ptr = RSTRING_PTR(buf); + int a = -1,b = -1,c = 0,d = 0; + static signed char b64_xtable[256]; + + if (b64_xtable['/'] <= 0) { + int i; + + for (i = 0; i < 256; i++) { + b64_xtable[i] = -1; + } + for (i = 0; i < 64; i++) { + b64_xtable[(unsigned char)b64_table[i]] = (char)i; + } + } + if (len == 0) { + while (s < send) { + a = b = c = d = -1; + a = b64_xtable[(unsigned char)*s++]; + if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); + b = b64_xtable[(unsigned char)*s++]; + if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); + if (*s == '=') { + if (s + 2 == send && *(s + 1) == '=') break; + rb_raise(rb_eArgError, "invalid base64"); + } + c = b64_xtable[(unsigned char)*s++]; + if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); + if (s + 1 == send && *s == '=') break; + d = b64_xtable[(unsigned char)*s++]; + if (d == -1) rb_raise(rb_eArgError, "invalid base64"); + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + } + if (c == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); + } + else if (d == -1) { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); + } + } + else { + while (s < send) { + a = b = c = d = -1; + while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + *ptr++ = castchar(c << 6 | d); + a = -1; + } + if (a != -1 && b != -1) { + if (c == -1) + *ptr++ = castchar(a << 2 | b >> 4); + else { + *ptr++ = castchar(a << 2 | b >> 4); + *ptr++ = castchar(b << 4 | c >> 2); + } + } + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + UNPACK_PUSH(buf); + } + break; + + case 'M': + { VALUE buf = rb_str_new(0, send - s); - char *ptr = RSTRING_PTR(buf), *ss = s; - int csum = 0; - int c1, c2; - - while (s < send) { - if (*s == '=') { - if (++s == send) break; - if (s+1 < send && *s == '\r' && *(s+1) == '\n') - s++; - if (*s != '\n') { - if ((c1 = hex2num(*s)) == -1) break; - if (++s == send) break; - if ((c2 = hex2num(*s)) == -1) break; - csum |= *ptr++ = castchar(c1 << 4 | c2); - } - } - else { - csum |= *ptr++ = *s; - } - s++; - ss = s; - } - rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); - rb_str_buf_cat(buf, ss, send-ss); - csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; - ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum); - UNPACK_PUSH(buf); - } - break; - - case '@': - if (len > RSTRING_LEN(str)) - rb_raise(rb_eArgError, "@ outside of string"); - s = RSTRING_PTR(str) + len; - break; - - case 'X': - if (len > s - RSTRING_PTR(str)) - rb_raise(rb_eArgError, "X outside of string"); - s -= len; - break; - - case 'x': - if (len > send - s) - rb_raise(rb_eArgError, "x outside of string"); - s += len; - break; - - case 'P': - if (sizeof(char *) <= (size_t)(send - s)) { - VALUE tmp = Qnil; - char *t; - - memcpy(&t, s, sizeof(char *)); - s += sizeof(char *); - - if (t) { - VALUE a; - const VALUE *p, *pend; - - if (!(a = str_associated(str))) { - rb_raise(rb_eArgError, "no associated pointer"); - } - p = RARRAY_CONST_PTR(a); - pend = p + RARRAY_LEN(a); - while (p < pend) { - if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { - if (len < RSTRING_LEN(*p)) { - tmp = rb_str_new(t, len); - str_associate(tmp, a); - } - else { - tmp = *p; - } - break; - } - p++; - } - if (p == pend) { - rb_raise(rb_eArgError, "non associated pointer"); - } - } - UNPACK_PUSH(tmp); - } - break; - - case 'p': - if (len > (long)((send - s) / sizeof(char *))) - len = (send - s) / sizeof(char *); - while (len-- > 0) { - if ((size_t)(send - s) < sizeof(char *)) - break; - else { - VALUE tmp = Qnil; - char *t; - - memcpy(&t, s, sizeof(char *)); - s += sizeof(char *); - - if (t) { - VALUE a; - const VALUE *p, *pend; - - if (!(a = str_associated(str))) { - rb_raise(rb_eArgError, "no associated pointer"); - } - p = RARRAY_CONST_PTR(a); - pend = p + RARRAY_LEN(a); - while (p < pend) { - if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { - tmp = *p; - break; - } - p++; - } - if (p == pend) { - rb_raise(rb_eArgError, "non associated pointer"); - } - } - UNPACK_PUSH(tmp); - } - } - break; - - case 'w': - { + char *ptr = RSTRING_PTR(buf), *ss = s; + int csum = 0; + int c1, c2; + + while (s < send) { + if (*s == '=') { + if (++s == send) break; + if (s+1 < send && *s == '\r' && *(s+1) == '\n') + s++; + if (*s != '\n') { + if ((c1 = hex2num(*s)) == -1) break; + if (++s == send) break; + if ((c2 = hex2num(*s)) == -1) break; + csum |= *ptr++ = castchar(c1 << 4 | c2); + } + } + else { + csum |= *ptr++ = *s; + } + s++; + ss = s; + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + rb_str_buf_cat(buf, ss, send-ss); + csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum); + UNPACK_PUSH(buf); + } + break; + + case '@': + if (len > RSTRING_LEN(str)) + rb_raise(rb_eArgError, "@ outside of string"); + s = RSTRING_PTR(str) + len; + break; + + case 'X': + if (len > s - RSTRING_PTR(str)) + rb_raise(rb_eArgError, "X outside of string"); + s -= len; + break; + + case 'x': + if (len > send - s) + rb_raise(rb_eArgError, "x outside of string"); + s += len; + break; + + case 'P': + if (sizeof(char *) <= (size_t)(send - s)) { + VALUE tmp = Qnil; + char *t; + + UNPACK_FETCH(&t, char *); + if (t) { + if (!associates) associates = str_associated(str); + tmp = associated_pointer(associates, t); + if (len < RSTRING_LEN(tmp)) { + tmp = rb_str_new(t, len); + str_associate(tmp, associates); + } + } + UNPACK_PUSH(tmp); + } + break; + + case 'p': + if (len > (long)((send - s) / sizeof(char *))) + len = (send - s) / sizeof(char *); + while (len-- > 0) { + if ((size_t)(send - s) < sizeof(char *)) + break; + else { + VALUE tmp = Qnil; + char *t; + + UNPACK_FETCH(&t, char *); + if (t) { + if (!associates) associates = str_associated(str); + tmp = associated_pointer(associates, t); + } + UNPACK_PUSH(tmp); + } + } + break; + + case 'w': + { char *s0 = s; while (len > 0 && s < send) { if (*s & 0x80) { @@ -1627,72 +1598,72 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode) s0 = s; } } - } - break; + } + break; - default: + default: unknown_directive("unpack", type, fmt); - break; - } + break; + } } return ary; } static VALUE -pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt) +pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { - int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; - return pack_unpack_internal(str, fmt, mode); + enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; + return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset)); } static VALUE -pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt) +pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { - return pack_unpack_internal(str, fmt, UNPACK_1); + return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset)); } int rb_uv_to_utf8(char buf[6], unsigned long uv) { if (uv <= 0x7f) { - buf[0] = (char)uv; - return 1; + buf[0] = (char)uv; + return 1; } if (uv <= 0x7ff) { - buf[0] = castchar(((uv>>6)&0xff)|0xc0); - buf[1] = castchar((uv&0x3f)|0x80); - return 2; + buf[0] = castchar(((uv>>6)&0xff)|0xc0); + buf[1] = castchar((uv&0x3f)|0x80); + return 2; } if (uv <= 0xffff) { - buf[0] = castchar(((uv>>12)&0xff)|0xe0); - buf[1] = castchar(((uv>>6)&0x3f)|0x80); - buf[2] = castchar((uv&0x3f)|0x80); - return 3; + buf[0] = castchar(((uv>>12)&0xff)|0xe0); + buf[1] = castchar(((uv>>6)&0x3f)|0x80); + buf[2] = castchar((uv&0x3f)|0x80); + return 3; } if (uv <= 0x1fffff) { - buf[0] = castchar(((uv>>18)&0xff)|0xf0); - buf[1] = castchar(((uv>>12)&0x3f)|0x80); - buf[2] = castchar(((uv>>6)&0x3f)|0x80); - buf[3] = castchar((uv&0x3f)|0x80); - return 4; + buf[0] = castchar(((uv>>18)&0xff)|0xf0); + buf[1] = castchar(((uv>>12)&0x3f)|0x80); + buf[2] = castchar(((uv>>6)&0x3f)|0x80); + buf[3] = castchar((uv&0x3f)|0x80); + return 4; } if (uv <= 0x3ffffff) { - buf[0] = castchar(((uv>>24)&0xff)|0xf8); - buf[1] = castchar(((uv>>18)&0x3f)|0x80); - buf[2] = castchar(((uv>>12)&0x3f)|0x80); - buf[3] = castchar(((uv>>6)&0x3f)|0x80); - buf[4] = castchar((uv&0x3f)|0x80); - return 5; + buf[0] = castchar(((uv>>24)&0xff)|0xf8); + buf[1] = castchar(((uv>>18)&0x3f)|0x80); + buf[2] = castchar(((uv>>12)&0x3f)|0x80); + buf[3] = castchar(((uv>>6)&0x3f)|0x80); + buf[4] = castchar((uv&0x3f)|0x80); + return 5; } if (uv <= 0x7fffffff) { - buf[0] = castchar(((uv>>30)&0xff)|0xfc); - buf[1] = castchar(((uv>>24)&0x3f)|0x80); - buf[2] = castchar(((uv>>18)&0x3f)|0x80); - buf[3] = castchar(((uv>>12)&0x3f)|0x80); - buf[4] = castchar(((uv>>6)&0x3f)|0x80); - buf[5] = castchar((uv&0x3f)|0x80); - return 6; + buf[0] = castchar(((uv>>30)&0xff)|0xfc); + buf[1] = castchar(((uv>>24)&0x3f)|0x80); + buf[2] = castchar(((uv>>18)&0x3f)|0x80); + buf[3] = castchar(((uv>>12)&0x3f)|0x80); + buf[4] = castchar(((uv>>6)&0x3f)|0x80); + buf[5] = castchar((uv&0x3f)|0x80); + return 6; } rb_raise(rb_eRangeError, "pack(U): value out of range"); @@ -1717,12 +1688,12 @@ utf8_to_uv(const char *p, long *lenp) long n; if (!(uv & 0x80)) { - *lenp = 1; + *lenp = 1; return uv; } if (!(uv & 0x40)) { - *lenp = 1; - rb_raise(rb_eArgError, "malformed UTF-8 character"); + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); } if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } @@ -1731,30 +1702,30 @@ utf8_to_uv(const char *p, long *lenp) else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } else { - *lenp = 1; - rb_raise(rb_eArgError, "malformed UTF-8 character"); + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); } if (n > *lenp) { - rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", - n, *lenp); + rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", + n, *lenp); } *lenp = n--; if (n != 0) { - while (n--) { - c = *p++ & 0xff; - if ((c & 0xc0) != 0x80) { - *lenp -= n + 1; - rb_raise(rb_eArgError, "malformed UTF-8 character"); - } - else { - c &= 0x3f; - uv = uv << 6 | c; - } - } + while (n--) { + c = *p++ & 0xff; + if ((c & 0xc0) != 0x80) { + *lenp -= n + 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + else { + c &= 0x3f; + uv = uv << 6 | c; + } + } } n = *lenp - 1; if (uv < utf8_limits[n]) { - rb_raise(rb_eArgError, "redundant UTF-8 sequence"); + rb_raise(rb_eArgError, "redundant UTF-8 sequence"); } return uv; } |