summaryrefslogtreecommitdiff
path: root/pack.c
diff options
context:
space:
mode:
Diffstat (limited to 'pack.c')
-rw-r--r--pack.c312
1 files changed, 188 insertions, 124 deletions
diff --git a/pack.c b/pack.c
index 4fdaf7fd89..24221bc3d6 100644
--- a/pack.c
+++ b/pack.c
@@ -19,6 +19,7 @@
#include "internal.h"
#include "internal/array.h"
#include "internal/bits.h"
+#include "internal/numeric.h"
#include "internal/string.h"
#include "internal/symbol.h"
#include "internal/variable.h"
@@ -61,7 +62,7 @@ is_bigendian(void)
{
static int init = 0;
static int endian_value;
- char *p;
+ const char *p;
if (init) return endian_value;
init = 1;
@@ -118,6 +119,7 @@ typedef union {
#define MAX_INTEGER_PACK_SIZE 8
static const char toofew[] = "too few arguments";
+static const char intoitself[] = "cannot pack buffer object into itself";
static void encodes(VALUE,const char*,long,int,int);
static void qpencode(VALUE,VALUE,long);
@@ -193,20 +195,69 @@ VALUE_to_float(VALUE obj)
}
}
+static void
+str_expand_fill(VALUE res, int c, long len)
+{
+ long olen = RSTRING_LEN(res);
+ memset(RSTRING_PTR(res) + olen, c, len);
+ rb_str_set_len(res, olen + len);
+}
+
+static char *
+skip_to_eol(const char *p, const char *pend)
+{
+ p = memchr(p, '\n', pend - p);
+ return (char *)(p ? p + 1 : pend);
+}
+
+#define skip_blank(p, type) \
+ (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
+
+#ifndef NATINT_PACK
+# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
+#endif
+static char *
+pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
+{
+ while (1) {
+ switch (*p) {
+ case '_':
+ case '!':
+ if (strchr(natstr, type)) {
+#ifdef NATINT_PACK
+ *natint = 1;
+#endif
+ p++;
+ }
+ else {
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
+ }
+ break;
+
+ case '<':
+ case '>':
+ if (!strchr(endstr, type)) {
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
+ }
+ if (*explicit_endian) {
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
+ }
+ *explicit_endian = *p++;
+ break;
+ default:
+ return (char *)p;
+ }
+ }
+}
+
static VALUE
pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
{
- static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
- static const char spc10[] = " ";
const char *p, *pend;
VALUE res, from, associates = 0;
- char type;
long len, idx, plen;
const char *ptr;
int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
-#ifdef NATINT_PACK
- int natint; /* native integer */
-#endif
int integer_size, bigendian_p;
StringValue(fmt);
@@ -230,53 +281,21 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
#define MORE_ITEM (idx < RARRAY_LEN(ary))
#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
+#define NOT_BUFFER(val) (((val) == res) ? rb_raise(rb_eArgError, intoitself) : (void)0)
+#define STR_FROM(val) NOT_BUFFER(StringValue(val))
while (p < pend) {
int explicit_endian = 0;
- if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
+ if (RSTRING_END(fmt) != pend) {
rb_raise(rb_eRuntimeError, "format string modified");
}
- type = *p++; /* get data type */
+ const char type = *p++; /* get data type */
#ifdef NATINT_PACK
- natint = 0;
+ int natint = 0; /* native integer */
#endif
- if (ISSPACE(type)) continue;
- if (type == '#') {
- while ((p < pend) && (*p != '\n')) {
- p++;
- }
- continue;
- }
-
- {
- modifiers:
- switch (*p) {
- case '_':
- case '!':
- if (strchr(natstr, type)) {
-#ifdef NATINT_PACK
- natint = 1;
-#endif
- p++;
- }
- else {
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
- }
- goto modifiers;
-
- case '<':
- case '>':
- if (!strchr(endstr, type)) {
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
- }
- if (explicit_endian) {
- rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
- }
- explicit_endian = *p++;
- goto modifiers;
- }
- }
+ if (skip_blank(p, type)) continue;
+ p = pack_modifiers(p, type, &natint, &explicit_endian);
if (*p == '*') { /* set data length */
len = strchr("@Xxu", type) ? 0
@@ -287,7 +306,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
else if (ISDIGIT(*p)) {
errno = 0;
len = STRTOUL(p, (char**)&p, 10);
- if (errno) {
+ if (len < 0 || errno) {
rb_raise(rb_eRangeError, "pack length too big");
}
}
@@ -318,7 +337,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
plen = 0;
}
else {
- StringValue(from);
+ STR_FROM(from);
ptr = RSTRING_PTR(from);
plen = RSTRING_LEN(from);
}
@@ -333,16 +352,12 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
if (plen >= len) {
rb_str_buf_cat(res, ptr, len);
if (p[-1] == '*' && type == 'Z')
- rb_str_buf_cat(res, nul10, 1);
+ rb_str_buf_cat(res, "", 1);
}
else {
+ rb_str_modify_expand(res, len);
rb_str_buf_cat(res, ptr, plen);
- len -= plen;
- while (len >= 10) {
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
- len -= 10;
- }
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
+ str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
}
break;
@@ -616,11 +631,8 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
case 'x': /* null byte */
grow:
- while (len >= 10) {
- rb_str_buf_cat(res, nul10, 10);
- len -= 10;
- }
- rb_str_buf_cat(res, nul10, len);
+ rb_str_modify_expand(res, len);
+ str_expand_fill(res, '\0', len);
break;
case 'X': /* back up byte */
@@ -659,10 +671,58 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
}
break;
+ case 'r': /* r for SLEB128 encoding (signed) */
+ case 'R': /* R for ULEB128 encoding (unsigned) */
+ {
+ int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
+
+ if (type == 'r') {
+ pack_flags |= INTEGER_PACK_2COMP;
+ }
+
+ while (len-- > 0) {
+ size_t numbytes, nlz_bits;
+ int sign, extra = 0;
+ char *cp;
+ const long start = RSTRING_LEN(res);
+
+ from = NEXTFROM;
+ from = rb_to_int(from);
+ if (type == 'R' && rb_int_negative_p(from)) {
+ rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
+ }
+
+ numbytes = rb_absint_numwords(from, 7, &nlz_bits);
+ if (numbytes == 0) {
+ numbytes = 1;
+ }
+ else if (nlz_bits == 0 && type == 'r') {
+ /* No leading zero bits, we need an extra byte for sign extension */
+ extra = 1;
+ }
+ rb_str_modify_expand(res, numbytes + extra);
+
+ cp = RSTRING_PTR(res) + start;
+ sign = rb_integer_pack(from, cp, numbytes, 1, 1, pack_flags);
+
+ if (extra) {
+ /* Need an extra byte */
+ cp[numbytes++] = sign < 0 ? 0x7f : 0x00;
+ }
+ rb_str_set_len(res, start + numbytes);
+
+ while (1 < numbytes) {
+ *cp |= 0x80;
+ cp++;
+ numbytes--;
+ }
+ }
+ }
+ break;
case 'u': /* uuencoded string */
case 'm': /* base64 encoded string */
from = NEXTFROM;
- StringValue(from);
+ STR_FROM(from);
ptr = RSTRING_PTR(from);
plen = RSTRING_LEN(from);
@@ -692,6 +752,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
case 'M': /* quoted-printable encoded string */
from = rb_obj_as_string(NEXTFROM);
+ NOT_BUFFER(from);
if (len <= 1)
len = 72;
qpencode(res, from, len);
@@ -700,7 +761,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
case 'P': /* pointer to packed byte string */
from = THISFROM;
if (!NIL_P(from)) {
- StringValue(from);
+ STR_FROM(from);
if (RSTRING_LEN(from) < len) {
rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
RSTRING_LEN(from), len);
@@ -710,13 +771,11 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
/* FALL THROUGH */
case 'p': /* pointer to string */
while (len-- > 0) {
- char *t;
+ const char *t = 0;
from = NEXTFROM;
- if (NIL_P(from)) {
- t = 0;
- }
- else {
- t = StringValuePtr(from);
+ if (!NIL_P(from)) {
+ STR_FROM(from);
+ t = RSTRING_PTR(from);
}
if (!associates) {
associates = rb_ary_new();
@@ -728,7 +787,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
case 'w': /* BER compressed integer */
while (len-- > 0) {
- VALUE buf = rb_str_new(0, 0);
+ VALUE buf;
size_t numbytes;
int sign;
char *cp;
@@ -782,6 +841,12 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
return res;
}
+VALUE
+rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
+{
+ return pack_pack(ec, ary, fmt, buffer);
+}
+
static const char uu_table[] =
"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
static const char b64_table[] =
@@ -936,16 +1001,11 @@ static VALUE
pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
{
#define hexdigits ruby_hexdigits
- char *s, *send;
- char *p, *pend;
+ const char *s, *send;
+ const char *p, *pend;
VALUE ary, associates = Qfalse;
- char type;
long len;
AVOID_CC_BUG long tmp_len;
- int star;
-#ifdef NATINT_PACK
- int natint; /* native integer */
-#endif
int signed_p, integer_size, bigendian_p;
#define UNPACK_PUSH(item) do {\
VALUE item_val = (item);\
@@ -964,9 +1024,10 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
StringValue(fmt);
rb_must_asciicompat(fmt);
- if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
len = RSTRING_LEN(str);
- if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
+ if (offset < 0 ? (offset += len) < 0 : offset > len) {
+ rb_raise(rb_eArgError, "offset outside of string");
+ }
s = RSTRING_PTR(str);
send = s + len;
@@ -980,49 +1041,14 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
while (p < pend) {
int explicit_endian = 0;
- type = *p++;
+ const char type = *p++;
#ifdef NATINT_PACK
- natint = 0;
+ int natint = 0; /* native integer */
#endif
+ int star = 0;
- if (ISSPACE(type)) continue;
- if (type == '#') {
- while ((p < pend) && (*p != '\n')) {
- p++;
- }
- continue;
- }
-
- star = 0;
- {
- modifiers:
- switch (*p) {
- case '_':
- case '!':
-
- if (strchr(natstr, type)) {
-#ifdef NATINT_PACK
- natint = 1;
-#endif
- p++;
- }
- else {
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
- }
- goto modifiers;
-
- case '<':
- case '>':
- if (!strchr(endstr, type)) {
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
- }
- if (explicit_endian) {
- rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
- }
- explicit_endian = *p++;
- goto modifiers;
- }
- }
+ if (skip_blank(p, type)) continue;
+ p = pack_modifiers(p, type, &natint, &explicit_endian);
if (p >= pend)
len = 1;
@@ -1051,7 +1077,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
if (len > send - s) len = send - s;
{
long end = len;
- char *t = s + len - 1;
+ const char *t = s + len - 1;
while (t >= s) {
if (*t != ' ' && *t != '\0') break;
@@ -1064,7 +1090,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
case 'Z':
{
- char *t = s;
+ const char *t = s;
if (len > send-s) len = send-s;
while (t < s+len && *t) t++;
@@ -1498,7 +1524,8 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
case 'M':
{
VALUE buf = rb_str_new(0, send - s);
- char *ptr = RSTRING_PTR(buf), *ss = s;
+ char *ptr = RSTRING_PTR(buf);
+ const char *ss = s;
int csum = 0;
int c1, c2;
@@ -1546,10 +1573,14 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
s += len;
break;
+ case '^':
+ UNPACK_PUSH(SSIZET2NUM(s - RSTRING_PTR(str)));
+ break;
+
case 'P':
if (sizeof(char *) <= (size_t)(send - s)) {
VALUE tmp = Qnil;
- char *t;
+ const char *t;
UNPACK_FETCH(&t, char *);
if (t) {
@@ -1572,7 +1603,7 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
break;
else {
VALUE tmp = Qnil;
- char *t;
+ const char *t;
UNPACK_FETCH(&t, char *);
if (t) {
@@ -1584,9 +1615,42 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
}
break;
+ case 'r':
+ case 'R':
+ {
+ int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
+
+ if (type == 'r') {
+ pack_flags |= INTEGER_PACK_2COMP;
+ }
+ const char *s0 = s;
+ while (len > 0 && s < send) {
+ if (*s & 0x80) {
+ s++;
+ }
+ else {
+ s++;
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
+ len--;
+ s0 = s;
+ }
+ }
+ /* Handle incomplete value and remaining expected values with nil (only if not using *) */
+ if (!star) {
+ if (s0 != s && len > 0) {
+ UNPACK_PUSH(Qnil);
+ len--;
+ }
+ while (len-- > 0) {
+ UNPACK_PUSH(Qnil);
+ }
+ }
+ }
+ break;
+
case 'w':
{
- char *s0 = s;
+ const char *s0 = s;
while (len > 0 && s < send) {
if (*s & 0x80) {
s++;