pack.c: add an offset argument to unpack and unpack1

[Feature #18254] This is useful to avoid repeteadly copying strings when parsing binary formats
author: Jean Boussier <jean.boussier@gmail.com> 2021-10-18 16:23:54 +0200
committer: Jean Boussier <jean.boussier@gmail.com> 2021-10-26 22:27:30 +0200
commit: e5319dc9856298f38aa9cdc6ed55e39ad0e8e070 (patch)
tree: 7c1d685ae65979ce2ae3bee4ccce54507d3f49f8 /pack.rb
parent: 717ab0bb2ee63dfe76076e0c9f91fbac3a0de4fd (diff)
1 files changed, 24 insertions, 7 deletions
diff --git a/pack.rb b/pack.rb
index e4d629e0f2..8f01861dc9 100644
--- a/pack.rb
+++ b/pack.rb
@@ -148,10 +148,11 @@ end
 class String
   # call-seq:
   #    str.unpack(format)    ->  anArray
+  #    str.unpack(format, offset: anInteger)    ->  anArray
   #
   # Decodes <i>str</i> (which may contain binary data) according to the
-  # format string, returning an array of each value extracted. The
-  # format string consists of a sequence of single-character directives,
+  # format string, returning an array of each value extracted.
+  # The format string consists of a sequence of single-character directives,
   # summarized in the table at the end of this entry.
   # Each directive may be followed
   # by a number, indicating the number of times to repeat with this
@@ -161,7 +162,15 @@ class String
   # exclamation mark (``<code>!</code>'') to use the underlying
   # platform's native size for the specified type; otherwise, it uses a
   # platform-independent consistent size. Spaces are ignored in the
-  # format string. See also String#unpack1,  Array#pack.
+  # format string.
+  #
+  # The keyword <i>offset</i> can be given to start the decoding after skipping
+  # the specified amount of bytes:
+  #   "abc".unpack("C*") # => [97, 98, 99]
+  #   "abc".unpack("C*", offset: 2) # => [99]
+  #   "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
+  #
+  # See also String#unpack1,  Array#pack.
   #
   #    "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
   #    "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
@@ -263,15 +272,23 @@ class String
   # * J, J! j, and j! are available since Ruby 2.3.
   # * Q_, Q!, q_, and q! are available since Ruby 2.1.
   # * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
-  def unpack(fmt)
-    Primitive.pack_unpack(fmt)
+  def unpack(fmt, offset: 0)
+    Primitive.pack_unpack(fmt, offset)
   end
 
   # call-seq:
   #    str.unpack1(format)    ->  obj
+  #    str.unpack1(format, offset: anInteger)    ->  obj
   #
   # Decodes <i>str</i> (which may contain binary data) according to the
   # format string, returning the first value extracted.
+  #
+  # The keyword <i>offset</i> can be given to start the decoding after skipping
+  # the specified amount of bytes:
+  #   "abc".unpack1("C*") # => 97
+  #   "abc".unpack1("C*", offset: 2) # => 99
+  #   "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
+  #
   # See also String#unpack, Array#pack.
   #
   # Contrast with String#unpack:
@@ -287,7 +304,7 @@ class String
   #
   # Thus unpack1 is convenient, makes clear the intention and signals
   # the expected return value to those reading the code.
-  def unpack1(fmt)
-    Primitive.pack_unpack1(fmt)
+  def unpack1(fmt, offset: 0)
+    Primitive.pack_unpack1(fmt, offset)
   end
 end
author	Jean Boussier <jean.boussier@gmail.com>	2021-10-18 16:23:54 +0200
committer	Jean Boussier <jean.boussier@gmail.com>	2021-10-26 22:27:30 +0200
commit	e5319dc9856298f38aa9cdc6ed55e39ad0e8e070 (patch)
tree	7c1d685ae65979ce2ae3bee4ccce54507d3f49f8 /pack.rb
parent	717ab0bb2ee63dfe76076e0c9f91fbac3a0de4fd (diff)