summaryrefslogtreecommitdiff
path: root/pack.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-01 14:18:32 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-01 14:18:32 +0000
commit306f43acfef7959a9a41a13ce627b2493eb8993e (patch)
tree6f4622f4e7dcca09b6ac3ce4d9ddf34637202b22 /pack.c
parentb6e137e93c5fe62f3f6dc1a0112a6c067d456f9b (diff)
String#unpack1 [Feature #12752]
Returns the first value of String#unpack. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56959 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'pack.c')
-rw-r--r--pack.c280
1 files changed, 155 insertions, 125 deletions
diff --git a/pack.c b/pack.c
index 88adf74f5f..35dec48533 100644
--- a/pack.c
+++ b/pack.c
@@ -1021,7 +1021,7 @@ hex2num(char c)
} while (0)
#define PACK_ITEM_ADJUST() do { \
- if (tmp_len > 0 && !block_p) \
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
} while (0)
@@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long len, VALUE str)
return s;
}
-/*
- * call-seq:
- * str.unpack(format) -> anArray
- *
- * Decodes <i>str</i> (which may contain binary data) according to the
- * format string, returning an array of each value extracted. The
- * format string consists of a sequence of single-character directives,
- * summarized in the table at the end of this entry.
- * Each directive may be followed
- * by a number, indicating the number of times to repeat with this
- * directive. An asterisk (``<code>*</code>'') will use up all
- * remaining elements. The directives <code>sSiIlL</code> may each be
- * followed by an underscore (``<code>_</code>'') or
- * exclamation mark (``<code>!</code>'') to use the underlying
- * platform's native size for the specified type; otherwise, it uses a
- * platform-independent consistent size. Spaces are ignored in the
- * format string. See also <code>Array#pack</code>.
- *
- * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
- * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
- * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
- * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
- * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
- * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
- * "now=20is".unpack('M*') #=> ["now is"]
- * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
- *
- * This table summarizes the various formats and the Ruby classes
- * returned by each.
- *
- * Integer | |
- * Directive | Returns | Meaning
- * ------------------------------------------------------------------
- * C | Integer | 8-bit unsigned (unsigned char)
- * S | Integer | 16-bit unsigned, native endian (uint16_t)
- * L | Integer | 32-bit unsigned, native endian (uint32_t)
- * Q | Integer | 64-bit unsigned, native endian (uint64_t)
- * J | Integer | pointer width unsigned, native endian (uintptr_t)
- * | | (J is available since Ruby 2.3.)
- * | |
- * c | Integer | 8-bit signed (signed char)
- * s | Integer | 16-bit signed, native endian (int16_t)
- * l | Integer | 32-bit signed, native endian (int32_t)
- * q | Integer | 64-bit signed, native endian (int64_t)
- * j | Integer | pointer width signed, native endian (intptr_t)
- * | | (j is available since Ruby 2.3.)
- * | |
- * S_ S! | Integer | unsigned short, native endian
- * I I_ I! | Integer | unsigned int, native endian
- * L_ L! | Integer | unsigned long, native endian
- * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
- * | | if the platform has no long long type.)
- * | | (Q_ and Q! is available since Ruby 2.1.)
- * J! | Integer | uintptr_t, native endian (same with J)
- * | | (J! is available since Ruby 2.3.)
- * | |
- * s_ s! | Integer | signed short, native endian
- * i i_ i! | Integer | signed int, native endian
- * l_ l! | Integer | signed long, native endian
- * q_ q! | Integer | signed long long, native endian (ArgumentError
- * | | if the platform has no long long type.)
- * | | (q_ and q! is available since Ruby 2.1.)
- * j! | Integer | intptr_t, native endian (same with j)
- * | | (j! is available since Ruby 2.3.)
- * | |
- * S> s> S!> s!> | Integer | same as the directives without ">" except
- * L> l> L!> l!> | | big endian
- * I!> i!> | | (available since Ruby 1.9.3)
- * Q> q> Q!> q!> | | "S>" is same as "n"
- * J> j> J!> j!> | | "L>" is same as "N"
- * | |
- * S< s< S!< s!< | Integer | same as the directives without "<" except
- * L< l< L!< l!< | | little endian
- * I!< i!< | | (available since Ruby 1.9.3)
- * Q< q< Q!< q!< | | "S<" is same as "v"
- * J< j< J!< j!< | | "L<" is same as "V"
- * | |
- * n | Integer | 16-bit unsigned, network (big-endian) byte order
- * N | Integer | 32-bit unsigned, network (big-endian) byte order
- * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
- * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
- * | |
- * U | Integer | UTF-8 character
- * w | Integer | BER-compressed integer (see Array.pack)
- *
- * Float | |
- * Directive | Returns | Meaning
- * -----------------------------------------------------------------
- * D d | Float | double-precision, native format
- * F f | Float | single-precision, native format
- * E | Float | double-precision, little-endian byte order
- * e | Float | single-precision, little-endian byte order
- * G | Float | double-precision, network (big-endian) byte order
- * g | Float | single-precision, network (big-endian) byte order
- *
- * String | |
- * Directive | Returns | Meaning
- * -----------------------------------------------------------------
- * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
- * a | String | arbitrary binary string
- * Z | String | null-terminated string
- * B | String | bit string (MSB first)
- * b | String | bit string (LSB first)
- * H | String | hex string (high nibble first)
- * h | String | hex string (low nibble first)
- * u | String | UU-encoded string
- * M | String | quoted-printable, MIME encoding (see RFC2045)
- * m | String | base64 encoded string (RFC 2045) (default)
- * | | base64 encoded string (RFC 4648) if followed by 0
- * P | String | pointer to a structure (fixed-length string)
- * p | String | pointer to a null-terminated string
- *
- * Misc. | |
- * Directive | Returns | Meaning
- * -----------------------------------------------------------------
- * @ | --- | skip to the offset given by the length argument
- * X | --- | skip backward one byte
- * x | --- | skip forward one byte
- */
+/* unpack mode */
+#define UNPACK_ARRAY 0
+#define UNPACK_BLOCK 1
+#define UNPACK_1 2
static VALUE
-pack_unpack(VALUE str, VALUE fmt)
+pack_unpack_internal(VALUE str, VALUE fmt, int mode)
{
#define hexdigits ruby_hexdigits
char *s, *send;
@@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt)
#ifdef NATINT_PACK
int natint; /* native integer */
#endif
- int block_p = rb_block_given_p();
int signed_p, integer_size, bigendian_p;
#define UNPACK_PUSH(item) do {\
VALUE item_val = (item);\
- if (block_p) {\
+ if ((mode) == UNPACK_BLOCK) {\
rb_yield(item_val);\
}\
- else {\
+ else if ((mode) == UNPACK_ARRAY) {\
rb_ary_push(ary, item_val);\
}\
+ else /* if ((mode) == UNPACK_1) { */ {\
+ return item_val; \
+ }\
} while (0)
StringValue(str);
@@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt)
p = RSTRING_PTR(fmt);
pend = p + RSTRING_LEN(fmt);
- ary = block_p ? Qnil : rb_ary_new();
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
while (p < pend) {
int explicit_endian = 0;
type = *p++;
@@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt)
return ary;
}
+/*
+ * call-seq:
+ * str.unpack(format) -> anArray
+ *
+ * Decodes <i>str</i> (which may contain binary data) according to the
+ * format string, returning an array of each value extracted. The
+ * format string consists of a sequence of single-character directives,
+ * summarized in the table at the end of this entry.
+ * Each directive may be followed
+ * by a number, indicating the number of times to repeat with this
+ * directive. An asterisk (``<code>*</code>'') will use up all
+ * remaining elements. The directives <code>sSiIlL</code> may each be
+ * followed by an underscore (``<code>_</code>'') or
+ * exclamation mark (``<code>!</code>'') to use the underlying
+ * platform's native size for the specified type; otherwise, it uses a
+ * platform-independent consistent size. Spaces are ignored in the
+ * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
+ *
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
+ * "now=20is".unpack('M*') #=> ["now is"]
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
+ *
+ * This table summarizes the various formats and the Ruby classes
+ * returned by each.
+ *
+ * Integer | |
+ * Directive | Returns | Meaning
+ * ------------------------------------------------------------------
+ * C | Integer | 8-bit unsigned (unsigned char)
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
+ * | |
+ * c | Integer | 8-bit signed (signed char)
+ * s | Integer | 16-bit signed, native endian (int16_t)
+ * l | Integer | 32-bit signed, native endian (int32_t)
+ * q | Integer | 64-bit signed, native endian (int64_t)
+ * j | Integer | pointer width signed, native endian (intptr_t)
+ * | |
+ * S_ S! | Integer | unsigned short, native endian
+ * I I_ I! | Integer | unsigned int, native endian
+ * L_ L! | Integer | unsigned long, native endian
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
+ * | | if the platform has no long long type.)
+ * J! | Integer | uintptr_t, native endian (same with J)
+ * | |
+ * s_ s! | Integer | signed short, native endian
+ * i i_ i! | Integer | signed int, native endian
+ * l_ l! | Integer | signed long, native endian
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
+ * | | if the platform has no long long type.)
+ * j! | Integer | intptr_t, native endian (same with j)
+ * | |
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
+ * L> l> L!> l!> | | big endian
+ * I!> i!> | |
+ * Q> q> Q!> q!> | | "S>" is same as "n"
+ * J> j> J!> j!> | | "L>" is same as "N"
+ * | |
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
+ * L< l< L!< l!< | | little endian
+ * I!< i!< | |
+ * Q< q< Q!< q!< | | "S<" is same as "v"
+ * J< j< J!< j!< | | "L<" is same as "V"
+ * | |
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
+ * | |
+ * U | Integer | UTF-8 character
+ * w | Integer | BER-compressed integer (see Array.pack)
+ *
+ * Float | |
+ * Directive | Returns | Meaning
+ * -----------------------------------------------------------------
+ * D d | Float | double-precision, native format
+ * F f | Float | single-precision, native format
+ * E | Float | double-precision, little-endian byte order
+ * e | Float | single-precision, little-endian byte order
+ * G | Float | double-precision, network (big-endian) byte order
+ * g | Float | single-precision, network (big-endian) byte order
+ *
+ * String | |
+ * Directive | Returns | Meaning
+ * -----------------------------------------------------------------
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
+ * a | String | arbitrary binary string
+ * Z | String | null-terminated string
+ * B | String | bit string (MSB first)
+ * b | String | bit string (LSB first)
+ * H | String | hex string (high nibble first)
+ * h | String | hex string (low nibble first)
+ * u | String | UU-encoded string
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
+ * m | String | base64 encoded string (RFC 2045) (default)
+ * | | base64 encoded string (RFC 4648) if followed by 0
+ * P | String | pointer to a structure (fixed-length string)
+ * p | String | pointer to a null-terminated string
+ *
+ * Misc. | |
+ * Directive | Returns | Meaning
+ * -----------------------------------------------------------------
+ * @ | --- | skip to the offset given by the length argument
+ * X | --- | skip backward one byte
+ * x | --- | skip forward one byte
+ *
+ * HISTORY
+ *
+ * * J, J! j, and j! are available since Ruby 2.3.
+ * * Q_, Q!, q_, and q! are available since Ruby 2.1.
+ * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
+ */
+
+static VALUE
+pack_unpack(VALUE str, VALUE fmt)
+{
+ int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
+ return pack_unpack_internal(str, fmt, mode);
+}
+
+/*
+ * call-seq:
+ * str.unpack1(format) -> obj
+ *
+ * Decodes <i>str</i> (which may contain binary data) according to the
+ * format string, returning the first value extracted.
+ * See also <code>String#unpack</code>, <code>Array#pack</code>.
+ */
+
+static VALUE
+pack_unpack1(VALUE str, VALUE fmt)
+{
+ return pack_unpack_internal(str, fmt, UNPACK_1);
+}
+
int
rb_uv_to_utf8(char buf[6], unsigned long uv)
{
@@ -1980,6 +2009,7 @@ Init_pack(void)
{
rb_define_method(rb_cArray, "pack", pack_pack, -1);
rb_define_method(rb_cString, "unpack", pack_unpack, 1);
+ rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
id_associated = rb_make_internal_id();
}