summaryrefslogtreecommitdiff
path: root/enumerator.c
diff options
context:
space:
mode:
Diffstat (limited to 'enumerator.c')
-rw-r--r--enumerator.c363
1 files changed, 236 insertions, 127 deletions
diff --git a/enumerator.c b/enumerator.c
index 0d54058215..69c96b2d8f 100644
--- a/enumerator.c
+++ b/enumerator.c
@@ -18,6 +18,7 @@
#include <float.h>
#endif
+#include <limits.h>
#include "id.h"
#include "internal.h"
#include "internal/class.h"
@@ -33,79 +34,93 @@
/*
* Document-class: Enumerator
*
- * A class which allows both internal and external iteration.
+ * \Class \Enumerator supports:
*
- * An Enumerator can be created by the following methods.
- * - Object#to_enum
- * - Object#enum_for
- * - Enumerator.new
+ * - {External iteration}[rdoc-ref:Enumerator@External+Iteration].
+ * - {Internal iteration}[rdoc-ref:Enumerator@Internal+Iteration].
*
- * Most methods have two forms: a block form where the contents
- * are evaluated for each item in the enumeration, and a non-block form
- * which returns a new Enumerator wrapping the iteration.
+ * An \Enumerator may be created by the following methods:
*
- * enumerator = %w(one two three).each
- * puts enumerator.class # => Enumerator
+ * - Object#to_enum.
+ * - Object#enum_for.
+ * - Enumerator.new.
*
- * enumerator.each_with_object("foo") do |item, obj|
- * puts "#{obj}: #{item}"
- * end
+ * In addition, certain Ruby methods return \Enumerator objects:
+ * a Ruby iterator method that accepts a block
+ * may return an \Enumerator if no block is given.
+ * There are many such methods, for example, in classes Array and Hash.
+ * (In the documentation for those classes, search for `new_enumerator`.)
*
- * # foo: one
- * # foo: two
- * # foo: three
+ * == Internal Iteration
*
- * enum_with_obj = enumerator.each_with_object("foo")
- * puts enum_with_obj.class # => Enumerator
+ * In _internal iteration_, an iterator method drives the iteration
+ * and the caller's block handles the processing;
+ * this example uses method #each_with_index:
*
- * enum_with_obj.each do |item, obj|
- * puts "#{obj}: #{item}"
- * end
+ * words = %w[foo bar baz] # => ["foo", "bar", "baz"]
+ * enumerator = words.each # => #<Enumerator: ...>
+ * enumerator.each_with_index {|word, i| puts "#{i}: #{word}" }
+ * 0: foo
+ * 1: bar
+ * 2: baz
*
- * # foo: one
- * # foo: two
- * # foo: three
+ * Iterator methods in class \Enumerator include:
*
- * This allows you to chain Enumerators together. For example, you
- * can map a list's elements to strings containing the index
- * and the element as a string via:
+ * - #each:
+ * passes each item to the block.
+ * - #each_with_index:
+ * passes each item and its index to the block.
+ * - #each_with_object (aliased as #with_object):
+ * passes each item and a given object to the block.
+ * - #with_index:
+ * like #each_with_index, but starting at a given offset (instead of zero).
*
- * puts %w[foo bar baz].map.with_index { |w, i| "#{i}:#{w}" }
- * # => ["0:foo", "1:bar", "2:baz"]
+ * \Class \Enumerator includes module Enumerable,
+ * which provides many more iterator methods.
*
* == External Iteration
*
- * An Enumerator can also be used as an external iterator.
- * For example, Enumerator#next returns the next value of the iterator
- * or raises StopIteration if the Enumerator is at the end.
- *
- * e = [1,2,3].each # returns an enumerator object.
- * puts e.next # => 1
- * puts e.next # => 2
- * puts e.next # => 3
- * puts e.next # raises StopIteration
- *
- * +next+, +next_values+, +peek+, and +peek_values+ are the only methods
- * which use external iteration (and Array#zip(Enumerable-not-Array) which uses +next+ internally).
- *
- * These methods do not affect other internal enumeration methods,
- * unless the underlying iteration method itself has side-effect, e.g. IO#each_line.
- *
- * FrozenError will be raised if these methods are called against a frozen enumerator.
- * Since +rewind+ and +feed+ also change state for external iteration,
- * these methods may raise FrozenError too.
- *
- * External iteration differs *significantly* from internal iteration
- * due to using a Fiber:
- * - The Fiber adds some overhead compared to internal enumeration.
- * - The stacktrace will only include the stack from the Enumerator, not above.
- * - Fiber-local variables are *not* inherited inside the Enumerator Fiber,
- * which instead starts with no Fiber-local variables.
- * - Fiber storage variables *are* inherited and are designed
- * to handle Enumerator Fibers. Assigning to a Fiber storage variable
- * only affects the current Fiber, so if you want to change state
- * in the caller Fiber of the Enumerator Fiber, you need to use an
- * extra indirection (e.g., use some object in the Fiber storage
+ * In _external iteration_, the user's program both drives the iteration
+ * and handles the processing in stream-like fashion;
+ * this example uses method #next:
+ *
+ * words = %w[foo bar baz]
+ * enumerator = words.each
+ * enumerator.next # => "foo"
+ * enumerator.next # => "bar"
+ * enumerator.next # => "baz"
+ * enumerator.next # Raises StopIteration: iteration reached an end
+ *
+ * External iteration methods in class \Enumerator include:
+ *
+ * - #feed:
+ * sets the value that is next to be returned.
+ * - #next:
+ * returns the next value and increments the position.
+ * - #next_values:
+ * returns the next value in a 1-element array and increments the position.
+ * - #peek:
+ * returns the next value but does not increment the position.
+ * - #peek_values:
+ * returns the next value in a 1-element array but does not increment the position.
+ * - #rewind:
+ * sets the position to zero.
+ *
+ * Each of these methods raises FrozenError if called from a frozen \Enumerator.
+ *
+ * == External Iteration and \Fiber
+ *
+ * External iteration that uses Fiber differs *significantly* from internal iteration:
+ *
+ * - Using \Fiber adds some overhead compared to internal enumeration.
+ * - The stacktrace will only include the stack from the \Enumerator, not above.
+ * - \Fiber-local variables are *not* inherited inside the \Enumerator \Fiber,
+ * which instead starts with no \Fiber-local variables.
+ * - \Fiber storage variables *are* inherited and are designed
+ * to handle \Enumerator Fibers. Assigning to a \Fiber storage variable
+ * only affects the current \Fiber, so if you want to change state
+ * in the caller \Fiber of the \Enumerator \Fiber, you need to use an
+ * extra indirection (e.g., use some object in the \Fiber storage
* variable and mutate some ivar of it).
*
* Concretely:
@@ -125,7 +140,7 @@
* e.each { p _1 }
* p Fiber[:storage_var] # => 2 (it ran in the same Fiber/"stack" as the current Fiber)
*
- * == Convert External Iteration to Internal Iteration
+ * == Converting External Iteration to Internal Iteration
*
* You can use an external iterator to implement an internal iterator as follows:
*
@@ -221,6 +236,7 @@ struct yielder {
struct producer {
VALUE init;
VALUE proc;
+ VALUE size;
};
typedef struct MEMO *lazyenum_proc_func(VALUE, struct MEMO *, VALUE, long);
@@ -443,28 +459,31 @@ convert_to_feasible_size_value(VALUE obj)
/*
* call-seq:
- * Enumerator.new(size = nil) { |yielder| ... }
+ * Enumerator.new(size = nil) {|yielder| ... }
*
- * Creates a new Enumerator object, which can be used as an
- * Enumerable.
+ * Returns a new \Enumerator object that can be used for iteration.
*
- * Iteration is defined by the given block, in
- * which a "yielder" object, given as block parameter, can be used to
- * yield a value by calling the +yield+ method (aliased as <code><<</code>):
+ * The given block defines the iteration;
+ * it is called with a "yielder" object that can yield an object
+ * via a call to method <tt>yielder.yield</tt>:
*
- * fib = Enumerator.new do |y|
- * a = b = 1
- * loop do
- * y << a
- * a, b = b, a + b
+ * fib = Enumerator.new do |yielder|
+ * n = next_n = 1
+ * while true do
+ * yielder.yield(n)
+ * n, next_n = next_n, n + next_n
* end
* end
*
* fib.take(10) # => [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
*
- * The optional parameter can be used to specify how to calculate the size
- * in a lazy fashion (see Enumerator#size). It can either be a value or
- * a callable object.
+ * Parameter +size+ specifies how the size is to be calculated (see #size);
+ * it can either be a value or a callable object:
+ *
+ * Enumerator.new{}.size # => nil
+ * Enumerator.new(42){}.size # => 42
+ * Enumerator.new(-> {42}){}.size # => 42
+ *
*/
static VALUE
enumerator_initialize(int argc, VALUE *argv, VALUE obj)
@@ -670,7 +689,7 @@ enumerator_with_index(int argc, VALUE *argv, VALUE obj)
rb_check_arity(argc, 0, 1);
RETURN_SIZED_ENUMERATOR(obj, argc, argv, enumerator_enum_size);
memo = (!argc || NIL_P(memo = argv[0])) ? INT2FIX(0) : rb_to_int(memo);
- return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)MEMO_NEW(memo, 0, 0));
+ return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)rb_imemo_memo_new(memo, 0, 0));
}
/*
@@ -1084,6 +1103,7 @@ enumerator_rewind(VALUE obj)
static struct generator *generator_ptr(VALUE obj);
static VALUE append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args);
+static VALUE append_method_args(VALUE obj, VALUE str, VALUE default_args);
static VALUE
inspect_enumerator(VALUE obj, VALUE dummy, int recur)
@@ -1156,7 +1176,7 @@ kwd_append(VALUE key, VALUE val, VALUE str)
static VALUE
append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args)
{
- VALUE method, eargs;
+ VALUE method;
method = rb_attr_get(obj, id_method);
if (method != Qfalse) {
@@ -1170,6 +1190,13 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args)
rb_str_buf_cat2(str, ":");
rb_str_buf_append(str, method);
}
+ return append_method_args(obj, str, default_args);
+}
+
+static VALUE
+append_method_args(VALUE obj, VALUE str, VALUE default_args)
+{
+ VALUE eargs;
eargs = rb_attr_get(obj, id_arguments);
if (NIL_P(eargs)) {
@@ -1199,10 +1226,11 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args)
if (!NIL_P(kwds)) {
rb_hash_foreach(kwds, kwd_append, str);
}
- rb_str_set_len(str, RSTRING_LEN(str)-2);
+ rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */
rb_str_buf_cat2(str, ")");
}
}
+ RB_GC_GUARD(eargs);
return str;
}
@@ -1229,6 +1257,24 @@ enumerator_inspect(VALUE obj)
* (1..100).to_a.permutation(4).size # => 94109400
* loop.size # => Float::INFINITY
* (1..100).drop_while.size # => nil
+ *
+ * Note that enumerator size might be inaccurate, and should be rather treated as a hint.
+ * For example, there is no check that the size provided to ::new is accurate:
+ *
+ * e = Enumerator.new(5) { |y| 2.times { y << it} }
+ * e.size # => 5
+ * e.to_a.size # => 2
+ *
+ * Another example is an enumerator created by ::produce without a +size+ argument.
+ * Such enumerators return +Infinity+ for size, but this is inaccurate if the passed
+ * block raises StopIteration:
+ *
+ * e = Enumerator.produce(1) { it + 1 }
+ * e.size # => Infinity
+ *
+ * e = Enumerator.produce(1) { it > 3 ? raise(StopIteration) : it + 1 }
+ * e.size # => Infinity
+ * e.to_a.size # => 4
*/
static VALUE
@@ -1594,7 +1640,7 @@ lazy_init_yielder(RB_BLOCK_CALL_FUNC_ARGLIST(_, m))
VALUE memos = rb_attr_get(yielder, id_memo);
struct MEMO *result;
- result = MEMO_NEW(m, rb_enum_values_pack(argc, argv),
+ result = rb_imemo_memo_new(m, rb_enum_values_pack(argc, argv),
argc > 1 ? LAZY_MEMO_PACKED : 0);
return lazy_yielder_result(result, yielder, procs_array, memos, 0);
}
@@ -2734,6 +2780,52 @@ lazy_with_index(int argc, VALUE *argv, VALUE obj)
return lazy_add_method(obj, 0, 0, memo, rb_ary_new_from_values(1, &memo), &lazy_with_index_funcs);
}
+static struct MEMO *
+lazy_tap_each_proc(VALUE proc_entry, struct MEMO *result, VALUE memos, long memo_index)
+{
+ struct proc_entry *entry = proc_entry_ptr(proc_entry);
+
+ rb_proc_call_with_block(entry->proc, 1, &result->memo_value, Qnil);
+
+ return result;
+}
+
+static const lazyenum_funcs lazy_tap_each_funcs = {
+ lazy_tap_each_proc, 0,
+};
+
+/*
+ * call-seq:
+ * lazy.tap_each { |item| ... } -> lazy_enumerator
+ *
+ * Passes each element through to the block for side effects only,
+ * without modifying the element or affecting the enumeration.
+ * Returns a new lazy enumerator.
+ *
+ * This is useful for debugging or logging inside lazy chains,
+ * without breaking laziness or misusing +map+.
+ *
+ * (1..).lazy
+ * .tap_each { |x| puts "got #{x}" }
+ * .select(&:even?)
+ * .first(3)
+ * # prints: got 1, got 2, ..., got 6
+ * # returns: [2, 4, 6]
+ *
+ * Similar in intent to Java's Stream#peek.
+ */
+
+static VALUE
+lazy_tap_each(VALUE obj)
+{
+ if (!rb_block_given_p())
+ {
+ rb_raise(rb_eArgError, "tried to call lazy tap_each without a block");
+ }
+
+ return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_tap_each_funcs);
+}
+
#if 0 /* for RDoc */
/*
@@ -2876,6 +2968,7 @@ producer_mark_and_move(void *p)
struct producer *ptr = p;
rb_gc_mark_and_move(&ptr->init);
rb_gc_mark_and_move(&ptr->proc);
+ rb_gc_mark_and_move(&ptr->size);
}
#define producer_free RUBY_TYPED_DEFAULT_FREE
@@ -2919,12 +3012,13 @@ producer_allocate(VALUE klass)
obj = TypedData_Make_Struct(klass, struct producer, &producer_data_type, ptr);
ptr->init = Qundef;
ptr->proc = Qundef;
+ ptr->size = Qnil;
return obj;
}
static VALUE
-producer_init(VALUE obj, VALUE init, VALUE proc)
+producer_init(VALUE obj, VALUE init, VALUE proc, VALUE size)
{
struct producer *ptr;
@@ -2936,6 +3030,7 @@ producer_init(VALUE obj, VALUE init, VALUE proc)
RB_OBJ_WRITE(obj, &ptr->init, init);
RB_OBJ_WRITE(obj, &ptr->proc, proc);
+ RB_OBJ_WRITE(obj, &ptr->size, size);
return obj;
}
@@ -2986,12 +3081,18 @@ producer_each(VALUE obj)
static VALUE
producer_size(VALUE obj, VALUE args, VALUE eobj)
{
- return DBL2NUM(HUGE_VAL);
+ struct producer *ptr = producer_ptr(obj);
+ VALUE size = ptr->size;
+
+ if (NIL_P(size)) return Qnil;
+ if (RB_INTEGER_TYPE_P(size) || RB_FLOAT_TYPE_P(size)) return size;
+
+ return rb_funcall(size, id_call, 0);
}
/*
* call-seq:
- * Enumerator.produce(initial = nil) { |prev| block } -> enumerator
+ * Enumerator.produce(initial = nil, size: nil) { |prev| block } -> enumerator
*
* Creates an infinite enumerator from any block, just called over and
* over. The result of the previous iteration is passed to the next one.
@@ -3023,19 +3124,50 @@ producer_size(VALUE obj, VALUE args, VALUE eobj)
* PATTERN = %r{\d+|[-/+*]}
* Enumerator.produce { scanner.scan(PATTERN) }.slice_after { scanner.eos? }.first
* # => ["7", "+", "38", "/", "6"]
+ *
+ * The optional +size+ keyword argument specifies the size of the enumerator,
+ * which can be retrieved by Enumerator#size. It can be an integer,
+ * +Float::INFINITY+, a callable object (such as a lambda), or +nil+ to
+ * indicate unknown size. When not specified, the size defaults to
+ * +Float::INFINITY+.
+ *
+ * # Infinite enumerator
+ * enum = Enumerator.produce(1, size: Float::INFINITY, &:succ)
+ * enum.size # => Float::INFINITY
+ *
+ * # Finite enumerator with known/computable size
+ * abs_dir = File.expand_path("./baz") # => "/foo/bar/baz"
+ * traverser = Enumerator.produce(abs_dir, size: -> { abs_dir.count("/") + 1 }) {
+ * raise StopIteration if it == "/"
+ * File.dirname(it)
+ * }
+ * traverser.size # => 4
+ *
+ * # Finite enumerator with unknown size
+ * calendar = Enumerator.produce(Date.today, size: nil) {
+ * it.monday? ? raise(StopIteration) : it + 1
+ * }
+ * calendar.size # => nil
*/
static VALUE
enumerator_s_produce(int argc, VALUE *argv, VALUE klass)
{
- VALUE init, producer;
+ VALUE init, producer, opts, size;
+ ID keyword_ids[1];
if (!rb_block_given_p()) rb_raise(rb_eArgError, "no block given");
- if (rb_scan_args(argc, argv, "01", &init) == 0) {
+ keyword_ids[0] = rb_intern("size");
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "01:", &init, &opts);
+ rb_get_kwargs(opts, keyword_ids, 0, 1, &size);
+
+ size = UNDEF_P(size) ? DBL2NUM(HUGE_VAL) : convert_to_feasible_size_value(size);
+
+ if (argc == 0 || (argc == 1 && !NIL_P(opts))) {
init = Qundef;
}
- producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc());
+ producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc(), size);
return rb_enumeratorize_with_size_kw(producer, sym_each, 0, 0, producer_size, RB_NO_KEYWORDS);
}
@@ -3522,9 +3654,9 @@ enum_product_enum_size(VALUE obj, VALUE args, VALUE eobj)
struct product_state {
VALUE obj;
VALUE block;
+ int index;
int argc;
VALUE *argv;
- int index;
};
static VALUE product_each(VALUE, struct product_state *);
@@ -3563,15 +3695,23 @@ enum_product_run(VALUE obj, VALUE block)
{
struct enum_product *ptr = enum_product_ptr(obj);
int argc = RARRAY_LENINT(ptr->enums);
+ if (argc == 0) { /* no need to allocate state.argv */
+ rb_funcall(block, id_call, 1, rb_ary_new());
+ return obj;
+ }
+
+ VALUE argsbuf = 0;
struct product_state state = {
.obj = obj,
.block = block,
.index = 0,
.argc = argc,
- .argv = ALLOCA_N(VALUE, argc),
+ .argv = ALLOCV_N(VALUE, argsbuf, argc),
};
- return product_each(obj, &state);
+ VALUE ret = product_each(obj, &state);
+ ALLOCV_END(argsbuf);
+ return ret;
}
/*
@@ -3941,7 +4081,7 @@ arith_seq_take(VALUE self, VALUE num)
ary = rb_ary_new_capa((n < len) ? n : len);
while (n > 0 && i < end) {
rb_ary_push(ary, LONG2FIX(i));
- if (i + unit < i) break;
+ if (i > LONG_MAX - unit) break;
i += unit;
--n;
}
@@ -3954,7 +4094,7 @@ arith_seq_take(VALUE self, VALUE num)
ary = rb_ary_new_capa((n < len) ? n : len);
while (n > 0 && i > end) {
rb_ary_push(ary, LONG2FIX(i));
- if (i + unit > i) break;
+ if (i < LONG_MIN - unit) break;
i += unit;
--n;
}
@@ -4209,7 +4349,7 @@ static VALUE
arith_seq_inspect(VALUE self)
{
struct enumerator *e;
- VALUE eobj, str, eargs;
+ VALUE eobj, str;
int range_p;
TypedData_Get_Struct(self, struct enumerator, &enumerator_data_type, e);
@@ -4223,39 +4363,7 @@ arith_seq_inspect(VALUE self)
str = rb_sprintf("(%s%"PRIsVALUE"%s.", range_p ? "(" : "", eobj, range_p ? ")" : "");
rb_str_buf_append(str, rb_id2str(e->meth));
-
- eargs = rb_attr_get(eobj, id_arguments);
- if (NIL_P(eargs)) {
- eargs = e->args;
- }
- if (eargs != Qfalse) {
- long argc = RARRAY_LEN(eargs);
- const VALUE *argv = RARRAY_CONST_PTR(eargs); /* WB: no new reference */
-
- if (argc > 0) {
- VALUE kwds = Qnil;
-
- rb_str_buf_cat2(str, "(");
-
- if (RB_TYPE_P(argv[argc-1], T_HASH)) {
- int all_key = TRUE;
- rb_hash_foreach(argv[argc-1], key_symbol_p, (VALUE)&all_key);
- if (all_key) kwds = argv[--argc];
- }
-
- while (argc--) {
- VALUE arg = *argv++;
-
- rb_str_append(str, rb_inspect(arg));
- rb_str_buf_cat2(str, ", ");
- }
- if (!NIL_P(kwds)) {
- rb_hash_foreach(kwds, kwd_append, str);
- }
- rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */
- rb_str_buf_cat2(str, ")");
- }
- }
+ append_method_args(eobj, str, e->args);
rb_str_buf_cat2(str, ")");
@@ -4561,6 +4669,7 @@ InitVM_Enumerator(void)
rb_define_method(rb_cLazy, "uniq", lazy_uniq, 0);
rb_define_method(rb_cLazy, "compact", lazy_compact, 0);
rb_define_method(rb_cLazy, "with_index", lazy_with_index, -1);
+ rb_define_method(rb_cLazy, "tap_each", lazy_tap_each, 0);
lazy_use_super_method = rb_hash_new_with_size(18);
rb_hash_aset(lazy_use_super_method, sym("map"), sym("_enumerable_map"));
@@ -4597,7 +4706,7 @@ InitVM_Enumerator(void)
rb_eStopIteration = rb_define_class("StopIteration", rb_eIndexError);
rb_define_method(rb_eStopIteration, "result", stop_result, 0);
- /* Generator */
+ /* :nodoc: Generator */
rb_cGenerator = rb_define_class_under(rb_cEnumerator, "Generator", rb_cObject);
rb_include_module(rb_cGenerator, rb_mEnumerable);
rb_define_alloc_func(rb_cGenerator, generator_allocate);
@@ -4605,7 +4714,7 @@ InitVM_Enumerator(void)
rb_define_method(rb_cGenerator, "initialize_copy", generator_init_copy, 1);
rb_define_method(rb_cGenerator, "each", generator_each, -1);
- /* Yielder */
+ /* :nodoc: Yielder */
rb_cYielder = rb_define_class_under(rb_cEnumerator, "Yielder", rb_cObject);
rb_define_alloc_func(rb_cYielder, yielder_allocate);
rb_define_method(rb_cYielder, "initialize", yielder_initialize, 0);
@@ -4613,7 +4722,7 @@ InitVM_Enumerator(void)
rb_define_method(rb_cYielder, "<<", yielder_yield_push, 1);
rb_define_method(rb_cYielder, "to_proc", yielder_to_proc, 0);
- /* Producer */
+ /* :nodoc: Producer */
rb_cEnumProducer = rb_define_class_under(rb_cEnumerator, "Producer", rb_cObject);
rb_define_alloc_func(rb_cEnumProducer, producer_allocate);
rb_define_method(rb_cEnumProducer, "each", producer_each, 0);