From 306f43acfef7959a9a41a13ce627b2493eb8993e Mon Sep 17 00:00:00 2001 From: naruse Date: Thu, 1 Dec 2016 14:18:32 +0000 Subject: String#unpack1 [Feature #12752] Returns the first value of String#unpack. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56959 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- pack.c | 280 ++++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 155 insertions(+), 125 deletions(-) (limited to 'pack.c') diff --git a/pack.c b/pack.c index 88adf74f5f..35dec48533 100644 --- a/pack.c +++ b/pack.c @@ -1021,7 +1021,7 @@ hex2num(char c) } while (0) #define PACK_ITEM_ADJUST() do { \ - if (tmp_len > 0 && !block_p) \ + if (tmp_len > 0 && mode == UNPACK_ARRAY) \ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ } while (0) @@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long len, VALUE str) return s; } -/* - * call-seq: - * str.unpack(format) -> anArray - * - * Decodes str (which may contain binary data) according to the - * format string, returning an array of each value extracted. The - * format string consists of a sequence of single-character directives, - * summarized in the table at the end of this entry. - * Each directive may be followed - * by a number, indicating the number of times to repeat with this - * directive. An asterisk (``*'') will use up all - * remaining elements. The directives sSiIlL may each be - * followed by an underscore (``_'') or - * exclamation mark (``!'') to use the underlying - * platform's native size for the specified type; otherwise, it uses a - * platform-independent consistent size. Spaces are ignored in the - * format string. See also Array#pack. - * - * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] - * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] - * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] - * "aa".unpack('b8B8') #=> ["10000110", "01100001"] - * "aaa".unpack('h2H2c') #=> ["16", "61", 97] - * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] - * "now=20is".unpack('M*') #=> ["now is"] - * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] - * - * This table summarizes the various formats and the Ruby classes - * returned by each. - * - * Integer | | - * Directive | Returns | Meaning - * ------------------------------------------------------------------ - * C | Integer | 8-bit unsigned (unsigned char) - * S | Integer | 16-bit unsigned, native endian (uint16_t) - * L | Integer | 32-bit unsigned, native endian (uint32_t) - * Q | Integer | 64-bit unsigned, native endian (uint64_t) - * J | Integer | pointer width unsigned, native endian (uintptr_t) - * | | (J is available since Ruby 2.3.) - * | | - * c | Integer | 8-bit signed (signed char) - * s | Integer | 16-bit signed, native endian (int16_t) - * l | Integer | 32-bit signed, native endian (int32_t) - * q | Integer | 64-bit signed, native endian (int64_t) - * j | Integer | pointer width signed, native endian (intptr_t) - * | | (j is available since Ruby 2.3.) - * | | - * S_ S! | Integer | unsigned short, native endian - * I I_ I! | Integer | unsigned int, native endian - * L_ L! | Integer | unsigned long, native endian - * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * | | (Q_ and Q! is available since Ruby 2.1.) - * J! | Integer | uintptr_t, native endian (same with J) - * | | (J! is available since Ruby 2.3.) - * | | - * s_ s! | Integer | signed short, native endian - * i i_ i! | Integer | signed int, native endian - * l_ l! | Integer | signed long, native endian - * q_ q! | Integer | signed long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * | | (q_ and q! is available since Ruby 2.1.) - * j! | Integer | intptr_t, native endian (same with j) - * | | (j! is available since Ruby 2.3.) - * | | - * S> s> S!> s!> | Integer | same as the directives without ">" except - * L> l> L!> l!> | | big endian - * I!> i!> | | (available since Ruby 1.9.3) - * Q> q> Q!> q!> | | "S>" is same as "n" - * J> j> J!> j!> | | "L>" is same as "N" - * | | - * S< s< S!< s!< | Integer | same as the directives without "<" except - * L< l< L!< l!< | | little endian - * I!< i!< | | (available since Ruby 1.9.3) - * Q< q< Q!< q!< | | "S<" is same as "v" - * J< j< J!< j!< | | "L<" is same as "V" - * | | - * n | Integer | 16-bit unsigned, network (big-endian) byte order - * N | Integer | 32-bit unsigned, network (big-endian) byte order - * v | Integer | 16-bit unsigned, VAX (little-endian) byte order - * V | Integer | 32-bit unsigned, VAX (little-endian) byte order - * | | - * U | Integer | UTF-8 character - * w | Integer | BER-compressed integer (see Array.pack) - * - * Float | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * D d | Float | double-precision, native format - * F f | Float | single-precision, native format - * E | Float | double-precision, little-endian byte order - * e | Float | single-precision, little-endian byte order - * G | Float | double-precision, network (big-endian) byte order - * g | Float | single-precision, network (big-endian) byte order - * - * String | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) - * a | String | arbitrary binary string - * Z | String | null-terminated string - * B | String | bit string (MSB first) - * b | String | bit string (LSB first) - * H | String | hex string (high nibble first) - * h | String | hex string (low nibble first) - * u | String | UU-encoded string - * M | String | quoted-printable, MIME encoding (see RFC2045) - * m | String | base64 encoded string (RFC 2045) (default) - * | | base64 encoded string (RFC 4648) if followed by 0 - * P | String | pointer to a structure (fixed-length string) - * p | String | pointer to a null-terminated string - * - * Misc. | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * @ | --- | skip to the offset given by the length argument - * X | --- | skip backward one byte - * x | --- | skip forward one byte - */ +/* unpack mode */ +#define UNPACK_ARRAY 0 +#define UNPACK_BLOCK 1 +#define UNPACK_1 2 static VALUE -pack_unpack(VALUE str, VALUE fmt) +pack_unpack_internal(VALUE str, VALUE fmt, int mode) { #define hexdigits ruby_hexdigits char *s, *send; @@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt) #ifdef NATINT_PACK int natint; /* native integer */ #endif - int block_p = rb_block_given_p(); int signed_p, integer_size, bigendian_p; #define UNPACK_PUSH(item) do {\ VALUE item_val = (item);\ - if (block_p) {\ + if ((mode) == UNPACK_BLOCK) {\ rb_yield(item_val);\ }\ - else {\ + else if ((mode) == UNPACK_ARRAY) {\ rb_ary_push(ary, item_val);\ }\ + else /* if ((mode) == UNPACK_1) { */ {\ + return item_val; \ + }\ } while (0) StringValue(str); @@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt) p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); - ary = block_p ? Qnil : rb_ary_new(); + ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; while (p < pend) { int explicit_endian = 0; type = *p++; @@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt) return ary; } +/* + * call-seq: + * str.unpack(format) -> anArray + * + * Decodes str (which may contain binary data) according to the + * format string, returning an array of each value extracted. The + * format string consists of a sequence of single-character directives, + * summarized in the table at the end of this entry. + * Each directive may be followed + * by a number, indicating the number of times to repeat with this + * directive. An asterisk (``*'') will use up all + * remaining elements. The directives sSiIlL may each be + * followed by an underscore (``_'') or + * exclamation mark (``!'') to use the underlying + * platform's native size for the specified type; otherwise, it uses a + * platform-independent consistent size. Spaces are ignored in the + * format string. See also String#unpack1, Array#pack. + * + * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] + * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] + * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] + * "aa".unpack('b8B8') #=> ["10000110", "01100001"] + * "aaa".unpack('h2H2c') #=> ["16", "61", 97] + * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] + * "now=20is".unpack('M*') #=> ["now is"] + * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] + * + * This table summarizes the various formats and the Ruby classes + * returned by each. + * + * Integer | | + * Directive | Returns | Meaning + * ------------------------------------------------------------------ + * C | Integer | 8-bit unsigned (unsigned char) + * S | Integer | 16-bit unsigned, native endian (uint16_t) + * L | Integer | 32-bit unsigned, native endian (uint32_t) + * Q | Integer | 64-bit unsigned, native endian (uint64_t) + * J | Integer | pointer width unsigned, native endian (uintptr_t) + * | | + * c | Integer | 8-bit signed (signed char) + * s | Integer | 16-bit signed, native endian (int16_t) + * l | Integer | 32-bit signed, native endian (int32_t) + * q | Integer | 64-bit signed, native endian (int64_t) + * j | Integer | pointer width signed, native endian (intptr_t) + * | | + * S_ S! | Integer | unsigned short, native endian + * I I_ I! | Integer | unsigned int, native endian + * L_ L! | Integer | unsigned long, native endian + * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * J! | Integer | uintptr_t, native endian (same with J) + * | | + * s_ s! | Integer | signed short, native endian + * i i_ i! | Integer | signed int, native endian + * l_ l! | Integer | signed long, native endian + * q_ q! | Integer | signed long long, native endian (ArgumentError + * | | if the platform has no long long type.) + * j! | Integer | intptr_t, native endian (same with j) + * | | + * S> s> S!> s!> | Integer | same as the directives without ">" except + * L> l> L!> l!> | | big endian + * I!> i!> | | + * Q> q> Q!> q!> | | "S>" is same as "n" + * J> j> J!> j!> | | "L>" is same as "N" + * | | + * S< s< S!< s!< | Integer | same as the directives without "<" except + * L< l< L!< l!< | | little endian + * I!< i!< | | + * Q< q< Q!< q!< | | "S<" is same as "v" + * J< j< J!< j!< | | "L<" is same as "V" + * | | + * n | Integer | 16-bit unsigned, network (big-endian) byte order + * N | Integer | 32-bit unsigned, network (big-endian) byte order + * v | Integer | 16-bit unsigned, VAX (little-endian) byte order + * V | Integer | 32-bit unsigned, VAX (little-endian) byte order + * | | + * U | Integer | UTF-8 character + * w | Integer | BER-compressed integer (see Array.pack) + * + * Float | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * D d | Float | double-precision, native format + * F f | Float | single-precision, native format + * E | Float | double-precision, little-endian byte order + * e | Float | single-precision, little-endian byte order + * G | Float | double-precision, network (big-endian) byte order + * g | Float | single-precision, network (big-endian) byte order + * + * String | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) + * a | String | arbitrary binary string + * Z | String | null-terminated string + * B | String | bit string (MSB first) + * b | String | bit string (LSB first) + * H | String | hex string (high nibble first) + * h | String | hex string (low nibble first) + * u | String | UU-encoded string + * M | String | quoted-printable, MIME encoding (see RFC2045) + * m | String | base64 encoded string (RFC 2045) (default) + * | | base64 encoded string (RFC 4648) if followed by 0 + * P | String | pointer to a structure (fixed-length string) + * p | String | pointer to a null-terminated string + * + * Misc. | | + * Directive | Returns | Meaning + * ----------------------------------------------------------------- + * @ | --- | skip to the offset given by the length argument + * X | --- | skip backward one byte + * x | --- | skip forward one byte + * + * HISTORY + * + * * J, J! j, and j! are available since Ruby 2.3. + * * Q_, Q!, q_, and q! are available since Ruby 2.1. + * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3. + */ + +static VALUE +pack_unpack(VALUE str, VALUE fmt) +{ + int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; + return pack_unpack_internal(str, fmt, mode); +} + +/* + * call-seq: + * str.unpack1(format) -> obj + * + * Decodes str (which may contain binary data) according to the + * format string, returning the first value extracted. + * See also String#unpack, Array#pack. + */ + +static VALUE +pack_unpack1(VALUE str, VALUE fmt) +{ + return pack_unpack_internal(str, fmt, UNPACK_1); +} + int rb_uv_to_utf8(char buf[6], unsigned long uv) { @@ -1980,6 +2009,7 @@ Init_pack(void) { rb_define_method(rb_cArray, "pack", pack_pack, -1); rb_define_method(rb_cString, "unpack", pack_unpack, 1); + rb_define_method(rb_cString, "unpack1", pack_unpack1, 1); id_associated = rb_make_internal_id(); } -- cgit v1.2.3