summaryrefslogtreecommitdiff
path: root/ext/zlib/zlib.c
diff options
context:
space:
mode:
authordrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-07-10 18:28:40 +0000
committerdrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-07-10 18:28:40 +0000
commit3cf7d1b57e3622430065f6a6ce8cbd5548d3d894 (patch)
treea5ddd1f0cdd3ee638521f1b275c252f1e49fb00e /ext/zlib/zlib.c
parentc51a826764c3307a7fe9258e1d18ddca93cb7b5f (diff)
* ext/zlib/zlib.c: Added streaming support to inflate processing.
This allows zlib streams to be processed without huge memory growth. [Feature #6612] * NEWS: ditto * ext/zlib/zlib.c (zstream_expand_buffer): Uses rb_yield when a block is given for streaming support. Refactored to use zstream_expand_buffer_into to remove duplicate code. * ext/zlib/zlib.c (zstream_expand_buffer_protect): Added wrapper function to pass jump state back through GVL-free section to allow zstream clean-up before terminating the ruby call. * ext/zlib/zlib.c (zstream_expand_buffer_without_gvl): Acquire GVL to yield processed chunk of output stream. * ext/zlib/zlib.c (zstream_detach_buffer): When a block is given, returns Qnil mid-stream and yields the output buffer at the end of the stream. * test/zlib/test_zlib.rb: Updated tests git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36356 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/zlib/zlib.c')
-rw-r--r--ext/zlib/zlib.c184
1 files changed, 133 insertions, 51 deletions
diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c
index 3fb1be9..e2b4f44 100644
--- a/ext/zlib/zlib.c
+++ b/ext/zlib/zlib.c
@@ -545,12 +545,17 @@ struct zstream {
#define ZSTREAM_FLAG_IN_STREAM 0x2
#define ZSTREAM_FLAG_FINISHED 0x4
#define ZSTREAM_FLAG_CLOSING 0x8
-#define ZSTREAM_FLAG_UNUSED 0x10
+#define ZSTREAM_FLAG_GZFILE 0x10 /* disallows yield from expand_buffer for
+ gzip*/
+#define ZSTREAM_FLAG_UNUSED 0x20
#define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY)
#define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY)
#define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED)
#define ZSTREAM_IS_CLOSING(z) ((z)->flags & ZSTREAM_FLAG_CLOSING)
+#define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE)
+
+#define ZSTREAM_EXPAND_BUFFER_OK 0
/* I think that more better value should be found,
but I gave up finding it. B) */
@@ -569,8 +574,10 @@ static const struct zstream_funcs inflate_funcs = {
struct zstream_run_args {
struct zstream * z;
- int flush;
- int interrupt;
+ int flush; /* stream flush value for inflate() or deflate() */
+ int interrupt; /* stop processing the stream and return to ruby */
+ int jump_state; /* for buffer expansion block break or exception */
+ int stream_output; /* for streaming zlib processing */
};
static voidpf
@@ -615,33 +622,50 @@ zstream_init(struct zstream *z, const struct zstream_funcs *func)
static void
zstream_expand_buffer(struct zstream *z)
{
- long inc;
-
if (NIL_P(z->buf)) {
- /* I uses rb_str_new here not rb_str_buf_new because
- rb_str_buf_new makes a zero-length string. */
- z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE);
- z->buf_filled = 0;
- z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf);
- z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE;
- RBASIC(z->buf)->klass = 0;
+ zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE);
return;
}
- if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
- /* to keep other threads from freezing */
- z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
+ if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) {
+ if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
+ int state = 0;
+ VALUE self = (VALUE)z->stream.opaque;
+
+ rb_str_resize(z->buf, z->buf_filled);
+ RBASIC(z->buf)->klass = rb_cString;
+ OBJ_INFECT(z->buf, self);
+
+ rb_protect(rb_yield, z->buf, &state);
+
+ z->buf = Qnil;
+ zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX);
+
+ if (state)
+ rb_jump_tag(state);
+
+ return;
+ }
+ else {
+ zstream_expand_buffer_into(z,
+ ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled);
+ }
}
else {
- inc = z->buf_filled / 2;
- if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
- inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
+ if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
+ z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
}
- rb_str_resize(z->buf, z->buf_filled + inc);
- z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
- (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
+ else {
+ long inc = z->buf_filled / 2;
+ if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
+ inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
+ }
+ rb_str_resize(z->buf, z->buf_filled + inc);
+ z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
+ (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
+ }
+ z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
}
- z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
}
static void
@@ -663,6 +687,17 @@ zstream_expand_buffer_into(struct zstream *z, unsigned long size)
}
}
+static void *
+zstream_expand_buffer_protect(void *ptr)
+{
+ struct zstream *z = (struct zstream *)ptr;
+ int state = 0;
+
+ rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state);
+
+ return (void *)state;
+}
+
static int
zstream_expand_buffer_without_gvl(struct zstream *z)
{
@@ -682,9 +717,6 @@ zstream_expand_buffer_without_gvl(struct zstream *z)
new_str = ruby_xrealloc(RSTRING(z->buf)->as.heap.ptr, len + 1);
- if (!new_str)
- return 0;
-
/* from rb_str_resize */
RSTRING(z->buf)->as.heap.ptr = new_str;
RSTRING(z->buf)->as.heap.ptr[len] = '\0'; /* sentinel */
@@ -696,7 +728,7 @@ zstream_expand_buffer_without_gvl(struct zstream *z)
}
z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
- return 1;
+ return ZSTREAM_EXPAND_BUFFER_OK;
}
static void
@@ -737,6 +769,13 @@ zstream_detach_buffer(struct zstream *z)
{
VALUE dst, self = (VALUE)z->stream.opaque;
+ if (!ZSTREAM_IS_FINISHED(z) && !ZSTREAM_IS_GZFILE(z) &&
+ rb_block_given_p()) {
+ /* prevent tiny yields mid-stream, save for next
+ * zstream_expand_buffer() or stream end */
+ return Qnil;
+ }
+
if (NIL_P(z->buf)) {
dst = rb_str_new(0, 0);
}
@@ -752,6 +791,12 @@ zstream_detach_buffer(struct zstream *z)
z->buf_filled = 0;
z->stream.next_out = 0;
z->stream.avail_out = 0;
+
+ if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) {
+ rb_yield(dst);
+ dst = Qnil;
+ }
+
return dst;
}
@@ -921,7 +966,7 @@ static void *
zstream_run_func(void *ptr)
{
struct zstream_run_args *args = (struct zstream_run_args *)ptr;
- int err, flush = args->flush;
+ int err, state, flush = args->flush;
struct zstream *z = args->z;
uInt n;
@@ -945,8 +990,16 @@ zstream_run_func(void *ptr)
break;
}
- if (!zstream_expand_buffer_without_gvl(z)) {
- err = Z_MEM_ERROR; /* realloc failed */
+ if (args->stream_output) {
+ state = (int)rb_thread_call_with_gvl(zstream_expand_buffer_protect,
+ (void *)z);
+ } else {
+ state = zstream_expand_buffer_without_gvl(z);
+ }
+
+ if (state) {
+ err = Z_OK; /* buffer expanded but stream processing was stopped */
+ args->jump_state = state;
break;
}
}
@@ -975,6 +1028,8 @@ zstream_run(struct zstream *z, Bytef *src, long len, int flush)
args.z = z;
args.flush = flush;
args.interrupt = 0;
+ args.jump_state = 0;
+ args.stream_output = !ZSTREAM_IS_GZFILE(z) && rb_block_given_p();
if (NIL_P(z->input) && len == 0) {
z->stream.next_in = (Bytef*)"";
@@ -1026,6 +1081,9 @@ loop:
zstream_append_input(z, z->stream.next_in, z->stream.avail_in);
guard = Qnil; /* prevent tail call to make guard effective */
}
+
+ if (args.jump_state)
+ rb_jump_tag(args.jump_state);
}
static VALUE
@@ -1208,8 +1266,13 @@ rb_zstream_reset(VALUE obj)
}
/*
- * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and
- * Zlib::Inflate#finish for details of this behavior.
+ * call-seq:
+ * finish -> String
+ * finish { |chunk| ... } -> nil
+ *
+ * Finishes the stream and flushes output buffer. If a block is given each
+ * chunk is yielded to the block until the input buffer has been flushed to
+ * the output buffer.
*/
static VALUE
rb_zstream_finish(VALUE obj)
@@ -1222,7 +1285,13 @@ rb_zstream_finish(VALUE obj)
}
/*
- * Flushes input buffer and returns all data in that buffer.
+ * call-seq:
+ * flush_next_out -> String
+ * flush_next_out { |chunk| ... } -> nil
+ *
+ * Flushes output buffer and returns all data in that buffer. If a block is
+ * given each chunk is yielded to the block until the current output buffer
+ * has been flushed.
*/
static VALUE
rb_zstream_flush_next_in(VALUE obj)
@@ -1504,13 +1573,13 @@ deflate_run(VALUE args)
/*
* Document-method: Zlib::Deflate.deflate
*
- * call-seq: Zlib.deflate(string[, level])
- * Zlib::Deflate.deflate(string[, level])
+ * call-seq:
+ * Zlib.deflate(string[, level])
+ * Zlib::Deflate.deflate(string[, level])
*
* Compresses the given +string+. Valid values of level are
- * <tt>NO_COMPRESSION</tt>, <tt>BEST_SPEED</tt>,
- * <tt>BEST_COMPRESSION</tt>, <tt>DEFAULT_COMPRESSION</tt>, and an
- * integer from 0 to 9 (the default is 6).
+ * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, Zlib::BEST_COMPRESSION,
+ * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6).
*
* This method is almost equivalent to the following code:
*
@@ -1564,17 +1633,19 @@ do_deflate(struct zstream *z, VALUE src, int flush)
}
/*
- * Document-method: Zlib#deflate
+ * Document-method: Zlib::Deflate#deflate
*
* call-seq:
- * deflate(string, flush = Zlib::NO_FLUSH)
+ * z.deflate(string, flush = Zlib::NO_FLUSH) -> String
+ * z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil
*
* Inputs +string+ into the deflate stream and returns the output from the
* stream. On calling this method, both the input and the output buffers of
- * the stream are flushed.
+ * the stream are flushed. If +string+ is nil, this method finishes the
+ * stream, just like Zlib::ZStream#finish.
*
- * If +string+ is nil, this method finishes the stream, just like
- * Zlib::ZStream#finish.
+ * If a block is given consecutive deflated chunks from the +string+ are
+ * yielded to the block and +nil+ is returned.
*
* The +flush+ parameter specifies the flush mode. The following constants
* may be used:
@@ -1621,10 +1692,13 @@ rb_deflate_addstr(VALUE obj, VALUE src)
* Document-method: Zlib::Deflate#flush
*
* call-seq:
- * flush(flush = Zlib::SYNC_FLUSH)
+ * flush(flush = Zlib::SYNC_FLUSH) -> String
+ * flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil
*
* This method is equivalent to <tt>deflate('', flush)</tt>. This method is
- * just provided to improve the readability of your Ruby program.
+ * just provided to improve the readability of your Ruby program. If a block
+ * is given chunks of deflate output are yielded to the block until the buffer
+ * is flushed.
*
* See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH,
* SYNC_FLUSH, FULL_FLUSH and FINISH.
@@ -1812,9 +1886,11 @@ inflate_run(VALUE args)
}
/*
- * Document-method: Zlib::Inflate.inflate
+ * Document-method: Zlib::inflate
*
- * call-seq: Zlib::Inflate.inflate(string)
+ * call-seq:
+ * Zlib.inflate(string)
+ * Zlib::Inflate.inflate(string)
*
* Decompresses +string+. Raises a Zlib::NeedDict exception if a preset
* dictionary is needed for decompression.
@@ -1890,12 +1966,17 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) {
/*
* Document-method: Zlib::Inflate#inflate
*
- * call-seq: inflate(string)
+ * call-seq:
+ * inflate(deflate_string) -> String
+ * inflate(deflate_string) { |chunk| ... } -> nil
+ *
+ * Inputs +deflate_string+ into the inflate stream and returns the output from
+ * the stream. Calling this method, both the input and the output buffer of
+ * the stream are flushed. If string is +nil+, this method finishes the
+ * stream, just like Zlib::ZStream#finish.
*
- * Inputs +string+ into the inflate stream and returns the output from the
- * stream. Calling this method, both the input and the output buffer of the
- * stream are flushed. If string is +nil+, this method finishes the stream,
- * just like Zlib::ZStream#finish.
+ * If a block is given consecutive inflated chunks from the +deflate_string+
+ * are yielded to the block and +nil+ is returned.
*
* Raises a Zlib::NeedDict exception if a preset dictionary is needed to
* decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then
@@ -2169,6 +2250,7 @@ gzfile_new(klass, funcs, endfunc)
obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz);
zstream_init(&gz->z, funcs);
+ gz->z.flags |= ZSTREAM_FLAG_GZFILE;
gz->io = Qnil;
gz->level = 0;
gz->mtime = 0;