diff options
Diffstat (limited to 'pack.c')
| -rw-r--r-- | pack.c | 354 |
1 files changed, 204 insertions, 150 deletions
@@ -19,6 +19,7 @@ #include "internal.h" #include "internal/array.h" #include "internal/bits.h" +#include "internal/numeric.h" #include "internal/string.h" #include "internal/symbol.h" #include "internal/variable.h" @@ -36,10 +37,11 @@ */ #ifdef HAVE_TRUE_LONG_LONG static const char natstr[] = "sSiIlLqQjJ"; +# define endstr natstr #else static const char natstr[] = "sSiIlLjJ"; -#endif static const char endstr[] = "sSiIlLqQjJ"; +#endif #ifdef HAVE_TRUE_LONG_LONG /* It is intentional to use long long instead of LONG_LONG. */ @@ -60,7 +62,7 @@ is_bigendian(void) { static int init = 0; static int endian_value; - char *p; + const char *p; if (init) return endian_value; init = 1; @@ -117,6 +119,7 @@ typedef union { #define MAX_INTEGER_PACK_SIZE 8 static const char toofew[] = "too few arguments"; +static const char intoitself[] = "cannot pack buffer object into itself"; static void encodes(VALUE,const char*,long,int,int); static void qpencode(VALUE,VALUE,long); @@ -154,6 +157,7 @@ associated_pointer(VALUE associates, const char *t) UNREACHABLE_RETURN(Qnil); } +RBIMPL_ATTR_NORETURN() static void unknown_directive(const char *mode, char type, VALUE fmt) { @@ -167,8 +171,8 @@ unknown_directive(const char *mode, char type, VALUE fmt) snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); } fmt = rb_str_quote_unprintable(fmt); - rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'", - mode, unknown, fmt); + rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'", + mode, unknown, fmt); } static float @@ -191,23 +195,73 @@ VALUE_to_float(VALUE obj) } } +static void +str_expand_fill(VALUE res, int c, long len) +{ + long olen = RSTRING_LEN(res); + memset(RSTRING_PTR(res) + olen, c, len); + rb_str_set_len(res, olen + len); +} + +static char * +skip_to_eol(const char *p, const char *pend) +{ + p = memchr(p, '\n', pend - p); + return (char *)(p ? p + 1 : pend); +} + +#define skip_blank(p, type) \ + (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1))) + +#ifndef NATINT_PACK +# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e) +#endif +static char * +pack_modifiers(const char *p, char type, int *natint, int *explicit_endian) +{ + while (1) { + switch (*p) { + case '_': + case '!': + if (strchr(natstr, type)) { +#ifdef NATINT_PACK + *natint = 1; +#endif + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + break; + + case '<': + case '>': + if (!strchr(endstr, type)) { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); + } + if (*explicit_endian) { + rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); + } + *explicit_endian = *p++; + break; + default: + return (char *)p; + } + } +} + static VALUE pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) { - static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; - static const char spc10[] = " "; const char *p, *pend; VALUE res, from, associates = 0; - char type; long len, idx, plen; const char *ptr; int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ -#ifdef NATINT_PACK - int natint; /* native integer */ -#endif int integer_size, bigendian_p; StringValue(fmt); + rb_must_asciicompat(fmt); p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); @@ -217,6 +271,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) else { if (!RB_TYPE_P(buffer, T_STRING)) rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); + rb_str_modify(buffer); res = buffer; } @@ -226,53 +281,21 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) #define MORE_ITEM (idx < RARRAY_LEN(ary)) #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW) #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) +#define NOT_BUFFER(val) (((val) == res) ? rb_raise(rb_eArgError, intoitself) : (void)0) +#define STR_FROM(val) NOT_BUFFER(StringValue(val)) while (p < pend) { int explicit_endian = 0; - if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { + if (RSTRING_END(fmt) != pend) { rb_raise(rb_eRuntimeError, "format string modified"); } - type = *p++; /* get data type */ + const char type = *p++; /* get data type */ #ifdef NATINT_PACK - natint = 0; + int natint = 0; /* native integer */ #endif - if (ISSPACE(type)) continue; - if (type == '#') { - while ((p < pend) && (*p != '\n')) { - p++; - } - continue; - } - - { - modifiers: - switch (*p) { - case '_': - case '!': - if (strchr(natstr, type)) { -#ifdef NATINT_PACK - natint = 1; -#endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - goto modifiers; - - case '<': - case '>': - if (!strchr(endstr, type)) { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); - } - if (explicit_endian) { - rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); - } - explicit_endian = *p++; - goto modifiers; - } - } + if (skip_blank(p, type)) continue; + p = pack_modifiers(p, type, &natint, &explicit_endian); if (*p == '*') { /* set data length */ len = strchr("@Xxu", type) ? 0 @@ -283,7 +306,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) else if (ISDIGIT(*p)) { errno = 0; len = STRTOUL(p, (char**)&p, 10); - if (errno) { + if (len < 0 || errno) { rb_raise(rb_eRangeError, "pack length too big"); } } @@ -314,7 +337,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) plen = 0; } else { - StringValue(from); + STR_FROM(from); ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); } @@ -329,16 +352,12 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) if (plen >= len) { rb_str_buf_cat(res, ptr, len); if (p[-1] == '*' && type == 'Z') - rb_str_buf_cat(res, nul10, 1); + rb_str_buf_cat(res, "", 1); } else { + rb_str_modify_expand(res, len); rb_str_buf_cat(res, ptr, plen); - len -= plen; - while (len >= 10) { - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); + str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen); } break; @@ -476,40 +495,24 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) goto pack_integer; case 's': /* s for int16_t, s! for signed short */ - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - case 'S': /* S for uint16_t, S! for unsigned short */ integer_size = NATINT_LEN(short, 2); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'i': /* i and i! for signed int */ - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - case 'I': /* I and I! for unsigned int */ integer_size = (int)sizeof(int); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'l': /* l for int32_t, l! for signed long */ - integer_size = NATINT_LEN(long, 4); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - case 'L': /* L for uint32_t, L! for unsigned long */ integer_size = NATINT_LEN(long, 4); bigendian_p = BIGENDIAN_P(); goto pack_integer; case 'q': /* q for int64_t, q! for signed long long */ - integer_size = NATINT_LEN_Q; - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - case 'Q': /* Q for uint64_t, Q! for unsigned long long */ integer_size = NATINT_LEN_Q; bigendian_p = BIGENDIAN_P(); @@ -550,7 +553,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) bigendian_p = explicit_endian == '>'; } if (integer_size > MAX_INTEGER_PACK_SIZE) - rb_bug("unexpected intger size for pack: %d", integer_size); + rb_bug("unexpected integer size for pack: %d", integer_size); while (len-- > 0) { char intbuf[MAX_INTEGER_PACK_SIZE]; @@ -628,11 +631,8 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) case 'x': /* null byte */ grow: - while (len >= 10) { - rb_str_buf_cat(res, nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, nul10, len); + rb_str_modify_expand(res, len); + str_expand_fill(res, '\0', len); break; case 'X': /* back up byte */ @@ -671,10 +671,58 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) } break; + case 'r': /* r for SLEB128 encoding (signed) */ + case 'R': /* R for ULEB128 encoding (unsigned) */ + { + int pack_flags = INTEGER_PACK_LITTLE_ENDIAN; + + if (type == 'r') { + pack_flags |= INTEGER_PACK_2COMP; + } + + while (len-- > 0) { + size_t numbytes, nlz_bits; + int sign, extra = 0; + char *cp; + const long start = RSTRING_LEN(res); + + from = NEXTFROM; + from = rb_to_int(from); + if (type == 'R' && rb_int_negative_p(from)) { + rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128"); + } + + numbytes = rb_absint_numwords(from, 7, &nlz_bits); + if (numbytes == 0) { + numbytes = 1; + } + else if (nlz_bits == 0 && type == 'r') { + /* No leading zero bits, we need an extra byte for sign extension */ + extra = 1; + } + rb_str_modify_expand(res, numbytes + extra); + + cp = RSTRING_PTR(res) + start; + sign = rb_integer_pack(from, cp, numbytes, 1, 1, pack_flags); + + if (extra) { + /* Need an extra byte */ + cp[numbytes++] = sign < 0 ? 0x7f : 0x00; + } + rb_str_set_len(res, start + numbytes); + + while (1 < numbytes) { + *cp |= 0x80; + cp++; + numbytes--; + } + } + } + break; case 'u': /* uuencoded string */ case 'm': /* base64 encoded string */ from = NEXTFROM; - StringValue(from); + STR_FROM(from); ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); @@ -704,6 +752,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) case 'M': /* quoted-printable encoded string */ from = rb_obj_as_string(NEXTFROM); + NOT_BUFFER(from); if (len <= 1) len = 72; qpencode(res, from, len); @@ -712,7 +761,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) case 'P': /* pointer to packed byte string */ from = THISFROM; if (!NIL_P(from)) { - StringValue(from); + STR_FROM(from); if (RSTRING_LEN(from) < len) { rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", RSTRING_LEN(from), len); @@ -722,13 +771,11 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) /* FALL THROUGH */ case 'p': /* pointer to string */ while (len-- > 0) { - char *t; + const char *t = 0; from = NEXTFROM; - if (NIL_P(from)) { - t = 0; - } - else { - t = StringValuePtr(from); + if (!NIL_P(from)) { + STR_FROM(from); + t = RSTRING_PTR(from); } if (!associates) { associates = rb_ary_new(); @@ -740,7 +787,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) case 'w': /* BER compressed integer */ while (len-- > 0) { - VALUE buf = rb_str_new(0, 0); + VALUE buf; size_t numbytes; int sign; char *cp; @@ -794,6 +841,12 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) return res; } +VALUE +rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) +{ + return pack_pack(ec, ary, fmt, buffer); +} + static const char uu_table[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; static const char b64_table[] = @@ -938,25 +991,21 @@ hex2num(char c) # define AVOID_CC_BUG #endif -/* unpack mode */ -#define UNPACK_ARRAY 0 -#define UNPACK_BLOCK 1 -#define UNPACK_1 2 +enum unpack_mode { + UNPACK_ARRAY, + UNPACK_BLOCK, + UNPACK_1 +}; static VALUE -pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) +pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset) { #define hexdigits ruby_hexdigits - char *s, *send; - char *p, *pend; + const char *s, *send; + const char *p, *pend; VALUE ary, associates = Qfalse; - char type; long len; AVOID_CC_BUG long tmp_len; - int star; -#ifdef NATINT_PACK - int natint; /* native integer */ -#endif int signed_p, integer_size, bigendian_p; #define UNPACK_PUSH(item) do {\ VALUE item_val = (item);\ @@ -973,10 +1022,12 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) StringValue(str); StringValue(fmt); + rb_must_asciicompat(fmt); - if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative"); len = RSTRING_LEN(str); - if (offset > len) rb_raise(rb_eArgError, "offset outside of string"); + if (offset < 0 ? (offset += len) < 0 : offset > len) { + rb_raise(rb_eArgError, "offset outside of string"); + } s = RSTRING_PTR(str); send = s + len; @@ -990,49 +1041,14 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { int explicit_endian = 0; - type = *p++; -#ifdef NATINT_PACK - natint = 0; -#endif - - if (ISSPACE(type)) continue; - if (type == '#') { - while ((p < pend) && (*p != '\n')) { - p++; - } - continue; - } - - star = 0; - { - modifiers: - switch (*p) { - case '_': - case '!': - - if (strchr(natstr, type)) { + const char type = *p++; #ifdef NATINT_PACK - natint = 1; + int natint = 0; /* native integer */ #endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - goto modifiers; + int star = 0; - case '<': - case '>': - if (!strchr(endstr, type)) { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); - } - if (explicit_endian) { - rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); - } - explicit_endian = *p++; - goto modifiers; - } - } + if (skip_blank(p, type)) continue; + p = pack_modifiers(p, type, &natint, &explicit_endian); if (p >= pend) len = 1; @@ -1061,7 +1077,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) if (len > send - s) len = send - s; { long end = len; - char *t = s + len - 1; + const char *t = s + len - 1; while (t >= s) { if (*t != ' ' && *t != '\0') break; @@ -1074,7 +1090,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) case 'Z': { - char *t = s; + const char *t = s; if (len > send-s) len = send-s; while (t < s+len && *t) t++; @@ -1508,7 +1524,8 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) case 'M': { VALUE buf = rb_str_new(0, send - s); - char *ptr = RSTRING_PTR(buf), *ss = s; + char *ptr = RSTRING_PTR(buf); + const char *ss = s; int csum = 0; int c1, c2; @@ -1556,10 +1573,14 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) s += len; break; + case '^': + UNPACK_PUSH(SSIZET2NUM(s - RSTRING_PTR(str))); + break; + case 'P': if (sizeof(char *) <= (size_t)(send - s)) { VALUE tmp = Qnil; - char *t; + const char *t; UNPACK_FETCH(&t, char *); if (t) { @@ -1582,7 +1603,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) break; else { VALUE tmp = Qnil; - char *t; + const char *t; UNPACK_FETCH(&t, char *); if (t) { @@ -1594,9 +1615,42 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) } break; + case 'r': + case 'R': + { + int pack_flags = INTEGER_PACK_LITTLE_ENDIAN; + + if (type == 'r') { + pack_flags |= INTEGER_PACK_2COMP; + } + const char *s0 = s; + while (len > 0 && s < send) { + if (*s & 0x80) { + s++; + } + else { + s++; + UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags)); + len--; + s0 = s; + } + } + /* Handle incomplete value and remaining expected values with nil (only if not using *) */ + if (!star) { + if (s0 != s && len > 0) { + UNPACK_PUSH(Qnil); + len--; + } + while (len-- > 0) { + UNPACK_PUSH(Qnil); + } + } + } + break; + case 'w': { - char *s0 = s; + const char *s0 = s; while (len > 0 && s < send) { if (*s & 0x80) { s++; @@ -1623,7 +1677,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset) static VALUE pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset) { - int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; + enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset)); } |
