diff options
Diffstat (limited to 'enumerator.c')
| -rw-r--r-- | enumerator.c | 1063 |
1 files changed, 618 insertions, 445 deletions
diff --git a/enumerator.c b/enumerator.c index 2c9858cda6..69c96b2d8f 100644 --- a/enumerator.c +++ b/enumerator.c @@ -18,8 +18,10 @@ #include <float.h> #endif +#include <limits.h> #include "id.h" #include "internal.h" +#include "internal/class.h" #include "internal/enumerator.h" #include "internal/error.h" #include "internal/hash.h" @@ -32,65 +34,115 @@ /* * Document-class: Enumerator * - * A class which allows both internal and external iteration. - * - * An Enumerator can be created by the following methods. - * - Object#to_enum - * - Object#enum_for - * - Enumerator.new - * - * Most methods have two forms: a block form where the contents - * are evaluated for each item in the enumeration, and a non-block form - * which returns a new Enumerator wrapping the iteration. - * - * enumerator = %w(one two three).each - * puts enumerator.class # => Enumerator - * - * enumerator.each_with_object("foo") do |item, obj| - * puts "#{obj}: #{item}" + * \Class \Enumerator supports: + * + * - {External iteration}[rdoc-ref:Enumerator@External+Iteration]. + * - {Internal iteration}[rdoc-ref:Enumerator@Internal+Iteration]. + * + * An \Enumerator may be created by the following methods: + * + * - Object#to_enum. + * - Object#enum_for. + * - Enumerator.new. + * + * In addition, certain Ruby methods return \Enumerator objects: + * a Ruby iterator method that accepts a block + * may return an \Enumerator if no block is given. + * There are many such methods, for example, in classes Array and Hash. + * (In the documentation for those classes, search for `new_enumerator`.) + * + * == Internal Iteration + * + * In _internal iteration_, an iterator method drives the iteration + * and the caller's block handles the processing; + * this example uses method #each_with_index: + * + * words = %w[foo bar baz] # => ["foo", "bar", "baz"] + * enumerator = words.each # => #<Enumerator: ...> + * enumerator.each_with_index {|word, i| puts "#{i}: #{word}" } + * 0: foo + * 1: bar + * 2: baz + * + * Iterator methods in class \Enumerator include: + * + * - #each: + * passes each item to the block. + * - #each_with_index: + * passes each item and its index to the block. + * - #each_with_object (aliased as #with_object): + * passes each item and a given object to the block. + * - #with_index: + * like #each_with_index, but starting at a given offset (instead of zero). + * + * \Class \Enumerator includes module Enumerable, + * which provides many more iterator methods. + * + * == External Iteration + * + * In _external iteration_, the user's program both drives the iteration + * and handles the processing in stream-like fashion; + * this example uses method #next: + * + * words = %w[foo bar baz] + * enumerator = words.each + * enumerator.next # => "foo" + * enumerator.next # => "bar" + * enumerator.next # => "baz" + * enumerator.next # Raises StopIteration: iteration reached an end + * + * External iteration methods in class \Enumerator include: + * + * - #feed: + * sets the value that is next to be returned. + * - #next: + * returns the next value and increments the position. + * - #next_values: + * returns the next value in a 1-element array and increments the position. + * - #peek: + * returns the next value but does not increment the position. + * - #peek_values: + * returns the next value in a 1-element array but does not increment the position. + * - #rewind: + * sets the position to zero. + * + * Each of these methods raises FrozenError if called from a frozen \Enumerator. + * + * == External Iteration and \Fiber + * + * External iteration that uses Fiber differs *significantly* from internal iteration: + * + * - Using \Fiber adds some overhead compared to internal enumeration. + * - The stacktrace will only include the stack from the \Enumerator, not above. + * - \Fiber-local variables are *not* inherited inside the \Enumerator \Fiber, + * which instead starts with no \Fiber-local variables. + * - \Fiber storage variables *are* inherited and are designed + * to handle \Enumerator Fibers. Assigning to a \Fiber storage variable + * only affects the current \Fiber, so if you want to change state + * in the caller \Fiber of the \Enumerator \Fiber, you need to use an + * extra indirection (e.g., use some object in the \Fiber storage + * variable and mutate some ivar of it). + * + * Concretely: + * + * Thread.current[:fiber_local] = 1 + * Fiber[:storage_var] = 1 + * e = Enumerator.new do |y| + * p Thread.current[:fiber_local] # for external iteration: nil, for internal iteration: 1 + * p Fiber[:storage_var] # => 1, inherited + * Fiber[:storage_var] += 1 + * y << 42 * end * - * # foo: one - * # foo: two - * # foo: three - * - * enum_with_obj = enumerator.each_with_object("foo") - * puts enum_with_obj.class # => Enumerator - * - * enum_with_obj.each do |item, obj| - * puts "#{obj}: #{item}" - * end - * - * # foo: one - * # foo: two - * # foo: three - * - * This allows you to chain Enumerators together. For example, you - * can map a list's elements to strings containing the index - * and the element as a string via: - * - * puts %w[foo bar baz].map.with_index { |w, i| "#{i}:#{w}" } - * # => ["0:foo", "1:bar", "2:baz"] - * - * An Enumerator can also be used as an external iterator. - * For example, Enumerator#next returns the next value of the iterator - * or raises StopIteration if the Enumerator is at the end. + * p e.next # => 42 + * p Fiber[:storage_var] # => 1 (it ran in a different Fiber) * - * e = [1,2,3].each # returns an enumerator object. - * puts e.next # => 1 - * puts e.next # => 2 - * puts e.next # => 3 - * puts e.next # raises StopIteration + * e.each { p _1 } + * p Fiber[:storage_var] # => 2 (it ran in the same Fiber/"stack" as the current Fiber) * - * Note that enumeration sequence by +next+, +next_values+, +peek+ and - * +peek_values+ do not affect other non-external - * enumeration methods, unless the underlying iteration method itself has - * side-effect, e.g. IO#each_line. + * == Converting External Iteration to Internal Iteration * - * Moreover, implementation typically uses fibers so performance could be - * slower and exception stacktraces different than expected. - * - * You can use this to implement an internal iterator as follows: + * You can use an external iterator to implement an internal iterator as follows: * * def ext_each(e) * while true @@ -125,14 +177,16 @@ */ VALUE rb_cEnumerator; static VALUE rb_cLazy; -static ID id_rewind, id_new, id_to_enum, id_each_entry; +static ID id_rewind, id_to_enum, id_each_entry; static ID id_next, id_result, id_receiver, id_arguments, id_memo, id_method, id_force; -static ID id_begin, id_end, id_step, id_exclude_end; -static VALUE sym_each, sym_cycle, sym_yield; +static VALUE sym_each, sym_yield; static VALUE lazy_use_super_method; +extern ID ruby_static_id_cause; + #define id_call idCall +#define id_cause ruby_static_id_cause #define id_each idEach #define id_eqq idEqq #define id_initialize idInitialize @@ -155,6 +209,19 @@ struct enumerator { int kw_splat; }; +RUBY_REFERENCES(enumerator_refs) = { + RUBY_REF_EDGE(struct enumerator, obj), + RUBY_REF_EDGE(struct enumerator, args), + RUBY_REF_EDGE(struct enumerator, fib), + RUBY_REF_EDGE(struct enumerator, dst), + RUBY_REF_EDGE(struct enumerator, lookahead), + RUBY_REF_EDGE(struct enumerator, feedvalue), + RUBY_REF_EDGE(struct enumerator, stop_exc), + RUBY_REF_EDGE(struct enumerator, size), + RUBY_REF_EDGE(struct enumerator, procs), + RUBY_REF_END +}; + static VALUE rb_cGenerator, rb_cYielder, rb_cEnumProducer; struct generator { @@ -169,13 +236,16 @@ struct yielder { struct producer { VALUE init; VALUE proc; + VALUE size; }; typedef struct MEMO *lazyenum_proc_func(VALUE, struct MEMO *, VALUE, long); typedef VALUE lazyenum_size_func(VALUE, VALUE); +typedef int lazyenum_precheck_func(VALUE proc_entry); typedef struct { lazyenum_proc_func *proc; lazyenum_size_func *size; + lazyenum_precheck_func *precheck; } lazyenum_funcs; struct proc_entry { @@ -202,56 +272,15 @@ struct enum_product { VALUE rb_cArithSeq; -/* - * Enumerator - */ -static void -enumerator_mark(void *p) -{ - struct enumerator *ptr = p; - rb_gc_mark_movable(ptr->obj); - rb_gc_mark_movable(ptr->args); - rb_gc_mark_movable(ptr->fib); - rb_gc_mark_movable(ptr->dst); - rb_gc_mark_movable(ptr->lookahead); - rb_gc_mark_movable(ptr->feedvalue); - rb_gc_mark_movable(ptr->stop_exc); - rb_gc_mark_movable(ptr->size); - rb_gc_mark_movable(ptr->procs); -} - -static void -enumerator_compact(void *p) -{ - struct enumerator *ptr = p; - ptr->obj = rb_gc_location(ptr->obj); - ptr->args = rb_gc_location(ptr->args); - ptr->fib = rb_gc_location(ptr->fib); - ptr->dst = rb_gc_location(ptr->dst); - ptr->lookahead = rb_gc_location(ptr->lookahead); - ptr->feedvalue = rb_gc_location(ptr->feedvalue); - ptr->stop_exc = rb_gc_location(ptr->stop_exc); - ptr->size = rb_gc_location(ptr->size); - ptr->procs = rb_gc_location(ptr->procs); -} - -#define enumerator_free RUBY_TYPED_DEFAULT_FREE - -static size_t -enumerator_memsize(const void *p) -{ - return sizeof(struct enumerator); -} - static const rb_data_type_t enumerator_data_type = { "enumerator", { - enumerator_mark, - enumerator_free, - enumerator_memsize, - enumerator_compact, + RUBY_REFS_LIST_PTR(enumerator_refs), + RUBY_TYPED_DEFAULT_FREE, + NULL, // Nothing allocated externally, so don't need a memsize function + NULL, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE }; static struct enumerator * @@ -260,44 +289,29 @@ enumerator_ptr(VALUE obj) struct enumerator *ptr; TypedData_Get_Struct(obj, struct enumerator, &enumerator_data_type, ptr); - if (!ptr || ptr->obj == Qundef) { + if (!ptr || UNDEF_P(ptr->obj)) { rb_raise(rb_eArgError, "uninitialized enumerator"); } return ptr; } static void -proc_entry_mark(void *p) +proc_entry_mark_and_move(void *p) { struct proc_entry *ptr = p; - rb_gc_mark_movable(ptr->proc); - rb_gc_mark_movable(ptr->memo); -} - -static void -proc_entry_compact(void *p) -{ - struct proc_entry *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); - ptr->memo = rb_gc_location(ptr->memo); -} - -#define proc_entry_free RUBY_TYPED_DEFAULT_FREE - -static size_t -proc_entry_memsize(const void *p) -{ - return p ? sizeof(struct proc_entry) : 0; + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->memo); } static const rb_data_type_t proc_entry_data_type = { "proc_entry", { - proc_entry_mark, - proc_entry_free, - proc_entry_memsize, - proc_entry_compact, + proc_entry_mark_and_move, + RUBY_TYPED_DEFAULT_FREE, + NULL, // Nothing allocated externally, so don't need a memsize function + proc_entry_mark_and_move, }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; static struct proc_entry * @@ -382,7 +396,7 @@ obj_to_enum(int argc, VALUE *argv, VALUE obj) } enumerator = rb_enumeratorize_with_size(obj, meth, argc, argv, 0); if (rb_block_given_p()) { - enumerator_ptr(enumerator)->size = rb_block_proc(); + RB_OBJ_WRITE(enumerator, &enumerator_ptr(enumerator)->size, rb_block_proc()); } return enumerator; } @@ -411,15 +425,15 @@ enumerator_init(VALUE enum_obj, VALUE obj, VALUE meth, int argc, const VALUE *ar rb_raise(rb_eArgError, "unallocated enumerator"); } - ptr->obj = obj; + RB_OBJ_WRITE(enum_obj, &ptr->obj, obj); ptr->meth = rb_to_id(meth); - if (argc) ptr->args = rb_ary_new4(argc, argv); + if (argc) RB_OBJ_WRITE(enum_obj, &ptr->args, rb_ary_new4(argc, argv)); ptr->fib = 0; ptr->dst = Qnil; ptr->lookahead = Qundef; ptr->feedvalue = Qundef; ptr->stop_exc = Qfalse; - ptr->size = size; + RB_OBJ_WRITE(enum_obj, &ptr->size, size); ptr->size_fn = size_fn; ptr->kw_splat = kw_splat; @@ -445,28 +459,31 @@ convert_to_feasible_size_value(VALUE obj) /* * call-seq: - * Enumerator.new(size = nil) { |yielder| ... } + * Enumerator.new(size = nil) {|yielder| ... } * - * Creates a new Enumerator object, which can be used as an - * Enumerable. + * Returns a new \Enumerator object that can be used for iteration. * - * Iteration is defined by the given block, in - * which a "yielder" object, given as block parameter, can be used to - * yield a value by calling the +yield+ method (aliased as <code><<</code>): + * The given block defines the iteration; + * it is called with a "yielder" object that can yield an object + * via a call to method <tt>yielder.yield</tt>: * - * fib = Enumerator.new do |y| - * a = b = 1 - * loop do - * y << a - * a, b = b, a + b + * fib = Enumerator.new do |yielder| + * n = next_n = 1 + * while true do + * yielder.yield(n) + * n, next_n = next_n, n + next_n * end * end * * fib.take(10) # => [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] * - * The optional parameter can be used to specify how to calculate the size - * in a lazy fashion (see Enumerator#size). It can either be a value or - * a callable object. + * Parameter +size+ specifies how the size is to be calculated (see #size); + * it can either be a value or a callable object: + * + * Enumerator.new{}.size # => nil + * Enumerator.new(42){}.size # => 42 + * Enumerator.new(-> {42}){}.size # => 42 + * */ static VALUE enumerator_initialize(int argc, VALUE *argv, VALUE obj) @@ -498,13 +515,13 @@ enumerator_init_copy(VALUE obj, VALUE orig) rb_raise(rb_eArgError, "unallocated enumerator"); } - ptr1->obj = ptr0->obj; + RB_OBJ_WRITE(obj, &ptr1->obj, ptr0->obj); ptr1->meth = ptr0->meth; - ptr1->args = ptr0->args; + RB_OBJ_WRITE(obj, &ptr1->args, ptr0->args); ptr1->fib = 0; ptr1->lookahead = Qundef; ptr1->feedvalue = Qundef; - ptr1->size = ptr0->size; + RB_OBJ_WRITE(obj, &ptr1->size, ptr0->size); ptr1->size_fn = ptr0->size_fn; return obj; @@ -519,8 +536,8 @@ rb_enumeratorize(VALUE obj, VALUE meth, int argc, const VALUE *argv) return rb_enumeratorize_with_size(obj, meth, argc, argv, 0); } -static VALUE -lazy_to_enum_i(VALUE self, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *size_fn, int kw_splat); +static VALUE lazy_to_enum_i(VALUE self, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *size_fn, int kw_splat); +static int lazy_precheck(VALUE procs); VALUE rb_enumeratorize_with_size_kw(VALUE obj, VALUE meth, int argc, const VALUE *argv, rb_enumerator_size_func *size_fn, int kw_splat) @@ -552,11 +569,17 @@ enumerator_block_call(VALUE obj, rb_block_call_func *func, VALUE arg) const struct enumerator *e = enumerator_ptr(obj); ID meth = e->meth; - if (e->args) { - argc = RARRAY_LENINT(e->args); - argv = RARRAY_CONST_PTR(e->args); + VALUE args = e->args; + if (args) { + argc = RARRAY_LENINT(args); + argv = RARRAY_CONST_PTR(args); } - return rb_block_call_kw(e->obj, meth, argc, argv, func, arg, e->kw_splat); + + VALUE ret = rb_block_call_kw(e->obj, meth, argc, argv, func, arg, e->kw_splat); + + RB_GC_GUARD(args); + + return ret; } /* @@ -598,9 +621,10 @@ enumerator_block_call(VALUE obj, rb_block_call_func *func, VALUE arg) static VALUE enumerator_each(int argc, VALUE *argv, VALUE obj) { + struct enumerator *e = enumerator_ptr(obj); + if (argc > 0) { - struct enumerator *e = enumerator_ptr(obj = rb_obj_dup(obj)); - VALUE args = e->args; + VALUE args = (e = enumerator_ptr(obj = rb_obj_dup(obj)))->args; if (args) { #if SIZEOF_INT < SIZEOF_LONG /* check int range overflow */ @@ -612,11 +636,14 @@ enumerator_each(int argc, VALUE *argv, VALUE obj) else { args = rb_ary_new4(argc, argv); } - e->args = args; + RB_OBJ_WRITE(obj, &e->args, args); e->size = Qnil; e->size_fn = 0; } if (!rb_block_given_p()) return obj; + + if (!lazy_precheck(e->procs)) return Qnil; + return enumerator_block_call(obj, 0, obj); } @@ -662,7 +689,7 @@ enumerator_with_index(int argc, VALUE *argv, VALUE obj) rb_check_arity(argc, 0, 1); RETURN_SIZED_ENUMERATOR(obj, argc, argv, enumerator_enum_size); memo = (!argc || NIL_P(memo = argv[0])) ? INT2FIX(0) : rb_to_int(memo); - return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)MEMO_NEW(memo, 0, 0)); + return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)rb_imemo_memo_new(memo, 0, 0)); } /* @@ -735,7 +762,7 @@ next_ii(RB_BLOCK_CALL_FUNC_ARGLIST(i, obj)) VALUE feedvalue = Qnil; VALUE args = rb_ary_new4(argc, argv); rb_fiber_yield(1, &args); - if (e->feedvalue != Qundef) { + if (!UNDEF_P(e->feedvalue)) { feedvalue = e->feedvalue; e->feedvalue = Qundef; } @@ -750,7 +777,7 @@ next_i(RB_BLOCK_CALL_FUNC_ARGLIST(_, obj)) VALUE result; result = rb_block_call(obj, id_each, 0, 0, next_ii, obj); - e->stop_exc = rb_exc_new2(rb_eStopIteration, "iteration reached an end"); + RB_OBJ_WRITE(obj, &e->stop_exc, rb_exc_new2(rb_eStopIteration, "iteration reached an end")); rb_ivar_set(e->stop_exc, id_result, result); return rb_fiber_yield(1, &nil); } @@ -759,8 +786,8 @@ static void next_init(VALUE obj, struct enumerator *e) { VALUE curr = rb_fiber_current(); - e->dst = curr; - e->fib = rb_fiber_new(next_i, obj); + RB_OBJ_WRITE(obj, &e->dst, curr); + RB_OBJ_WRITE(obj, &e->fib, rb_fiber_new(next_i, obj)); e->lookahead = Qundef; } @@ -769,8 +796,16 @@ get_next_values(VALUE obj, struct enumerator *e) { VALUE curr, vs; - if (e->stop_exc) - rb_exc_raise(e->stop_exc); + if (e->stop_exc) { + VALUE exc = e->stop_exc; + VALUE result = rb_attr_get(exc, id_result); + VALUE mesg = rb_attr_get(exc, idMesg); + if (!NIL_P(mesg)) mesg = rb_str_dup(mesg); + VALUE stop_exc = rb_exc_new_str(rb_eStopIteration, mesg); + rb_ivar_set(stop_exc, id_cause, exc); + rb_ivar_set(stop_exc, id_result, result); + rb_exc_raise(stop_exc); + } curr = rb_fiber_current(); @@ -840,7 +875,9 @@ enumerator_next_values(VALUE obj) struct enumerator *e = enumerator_ptr(obj); VALUE vs; - if (e->lookahead != Qundef) { + rb_check_frozen(obj); + + if (!UNDEF_P(e->lookahead)) { vs = e->lookahead; e->lookahead = Qundef; return vs; @@ -901,9 +938,12 @@ enumerator_peek_values(VALUE obj) { struct enumerator *e = enumerator_ptr(obj); - if (e->lookahead == Qundef) { - e->lookahead = get_next_values(obj, e); + rb_check_frozen(obj); + + if (UNDEF_P(e->lookahead)) { + RB_OBJ_WRITE(obj, &e->lookahead, get_next_values(obj, e)); } + return e->lookahead; } @@ -1025,10 +1065,12 @@ enumerator_feed(VALUE obj, VALUE v) { struct enumerator *e = enumerator_ptr(obj); - if (e->feedvalue != Qundef) { + rb_check_frozen(obj); + + if (!UNDEF_P(e->feedvalue)) { rb_raise(rb_eTypeError, "feed value already set"); } - e->feedvalue = v; + RB_OBJ_WRITE(obj, &e->feedvalue, v); return Qnil; } @@ -1047,6 +1089,8 @@ enumerator_rewind(VALUE obj) { struct enumerator *e = enumerator_ptr(obj); + rb_check_frozen(obj); + rb_check_funcall(e->obj, id_rewind, 0, 0); e->fib = 0; @@ -1059,6 +1103,7 @@ enumerator_rewind(VALUE obj) static struct generator *generator_ptr(VALUE obj); static VALUE append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args); +static VALUE append_method_args(VALUE obj, VALUE str, VALUE default_args); static VALUE inspect_enumerator(VALUE obj, VALUE dummy, int recur) @@ -1070,7 +1115,7 @@ inspect_enumerator(VALUE obj, VALUE dummy, int recur) cname = rb_obj_class(obj); - if (!e || e->obj == Qundef) { + if (!e || UNDEF_P(e->obj)) { return rb_sprintf("#<%"PRIsVALUE": uninitialized>", rb_class_path(cname)); } @@ -1131,7 +1176,7 @@ kwd_append(VALUE key, VALUE val, VALUE str) static VALUE append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) { - VALUE method, eargs; + VALUE method; method = rb_attr_get(obj, id_method); if (method != Qfalse) { @@ -1145,6 +1190,13 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) rb_str_buf_cat2(str, ":"); rb_str_buf_append(str, method); } + return append_method_args(obj, str, default_args); +} + +static VALUE +append_method_args(VALUE obj, VALUE str, VALUE default_args) +{ + VALUE eargs; eargs = rb_attr_get(obj, id_arguments); if (NIL_P(eargs)) { @@ -1174,10 +1226,11 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) if (!NIL_P(kwds)) { rb_hash_foreach(kwds, kwd_append, str); } - rb_str_set_len(str, RSTRING_LEN(str)-2); + rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */ rb_str_buf_cat2(str, ")"); } } + RB_GC_GUARD(eargs); return str; } @@ -1204,6 +1257,24 @@ enumerator_inspect(VALUE obj) * (1..100).to_a.permutation(4).size # => 94109400 * loop.size # => Float::INFINITY * (1..100).drop_while.size # => nil + * + * Note that enumerator size might be inaccurate, and should be rather treated as a hint. + * For example, there is no check that the size provided to ::new is accurate: + * + * e = Enumerator.new(5) { |y| 2.times { y << it} } + * e.size # => 5 + * e.to_a.size # => 2 + * + * Another example is an enumerator created by ::produce without a +size+ argument. + * Such enumerators return +Infinity+ for size, but this is inaccurate if the passed + * block raises StopIteration: + * + * e = Enumerator.produce(1) { it + 1 } + * e.size # => Infinity + * + * e = Enumerator.produce(1) { it > 3 ? raise(StopIteration) : it + 1 } + * e.size # => Infinity + * e.to_a.size # => 4 */ static VALUE @@ -1239,7 +1310,7 @@ enumerator_size(VALUE obj) argv = RARRAY_CONST_PTR(e->args); } size = rb_check_funcall_kw(e->size, id_call, argc, argv, e->kw_splat); - if (size != Qundef) return size; + if (!UNDEF_P(size)) return size; return e->size; } @@ -1247,36 +1318,21 @@ enumerator_size(VALUE obj) * Yielder */ static void -yielder_mark(void *p) -{ - struct yielder *ptr = p; - rb_gc_mark_movable(ptr->proc); -} - -static void -yielder_compact(void *p) +yielder_mark_and_move(void *p) { struct yielder *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); -} - -#define yielder_free RUBY_TYPED_DEFAULT_FREE - -static size_t -yielder_memsize(const void *p) -{ - return sizeof(struct yielder); + rb_gc_mark_and_move(&ptr->proc); } static const rb_data_type_t yielder_data_type = { "yielder", { - yielder_mark, - yielder_free, - yielder_memsize, - yielder_compact, + yielder_mark_and_move, + RUBY_TYPED_DEFAULT_FREE, + NULL, + yielder_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; static struct yielder * @@ -1285,7 +1341,7 @@ yielder_ptr(VALUE obj) struct yielder *ptr; TypedData_Get_Struct(obj, struct yielder, &yielder_data_type, ptr); - if (!ptr || ptr->proc == Qundef) { + if (!ptr || UNDEF_P(ptr->proc)) { rb_raise(rb_eArgError, "uninitialized yielder"); } return ptr; @@ -1315,7 +1371,7 @@ yielder_init(VALUE obj, VALUE proc) rb_raise(rb_eArgError, "unallocated yielder"); } - ptr->proc = proc; + RB_OBJ_WRITE(obj, &ptr->proc, proc); return obj; } @@ -1385,38 +1441,22 @@ yielder_new(void) * Generator */ static void -generator_mark(void *p) +generator_mark_and_move(void *p) { struct generator *ptr = p; - rb_gc_mark_movable(ptr->proc); - rb_gc_mark_movable(ptr->obj); -} - -static void -generator_compact(void *p) -{ - struct generator *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); - ptr->obj = rb_gc_location(ptr->obj); -} - -#define generator_free RUBY_TYPED_DEFAULT_FREE - -static size_t -generator_memsize(const void *p) -{ - return sizeof(struct generator); + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->obj); } static const rb_data_type_t generator_data_type = { "generator", { - generator_mark, - generator_free, - generator_memsize, - generator_compact, + generator_mark_and_move, + RUBY_TYPED_DEFAULT_FREE, + NULL, + generator_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; static struct generator * @@ -1425,7 +1465,7 @@ generator_ptr(VALUE obj) struct generator *ptr; TypedData_Get_Struct(obj, struct generator, &generator_data_type, ptr); - if (!ptr || ptr->proc == Qundef) { + if (!ptr || UNDEF_P(ptr->proc)) { rb_raise(rb_eArgError, "uninitialized generator"); } return ptr; @@ -1456,7 +1496,7 @@ generator_init(VALUE obj, VALUE proc) rb_raise(rb_eArgError, "unallocated generator"); } - ptr->proc = proc; + RB_OBJ_WRITE(obj, &ptr->proc, proc); return obj; } @@ -1504,7 +1544,7 @@ generator_init_copy(VALUE obj, VALUE orig) rb_raise(rb_eArgError, "unallocated generator"); } - ptr1->proc = ptr0->proc; + RB_OBJ_WRITE(obj, &ptr1->proc, ptr0->proc); return obj; } @@ -1529,7 +1569,7 @@ static VALUE enum_size(VALUE self) { VALUE r = rb_check_funcall(self, id_size, 0, 0); - return (r == Qundef) ? Qnil : r; + return UNDEF_P(r) ? Qnil : r; } static VALUE @@ -1562,7 +1602,7 @@ lazy_init_iterator(RB_BLOCK_CALL_FUNC_ARGLIST(val, m)) result = rb_yield_values2(len, nargv); ALLOCV_END(args); } - if (result == Qundef) rb_iter_break(); + if (UNDEF_P(result)) rb_iter_break(); return Qnil; } @@ -1585,6 +1625,11 @@ lazy_init_block_i(RB_BLOCK_CALL_FUNC_ARGLIST(val, m)) #define LAZY_MEMO_SET_PACKED(memo) ((memo)->memo_flags |= LAZY_MEMO_PACKED) #define LAZY_MEMO_RESET_PACKED(memo) ((memo)->memo_flags &= ~LAZY_MEMO_PACKED) +#define LAZY_NEED_BLOCK(func) \ + if (!rb_block_given_p()) { \ + rb_raise(rb_eArgError, "tried to call lazy " #func " without a block"); \ + } + static VALUE lazy_yielder_result(struct MEMO *result, VALUE yielder, VALUE procs_array, VALUE memos, long i); static VALUE @@ -1595,7 +1640,7 @@ lazy_init_yielder(RB_BLOCK_CALL_FUNC_ARGLIST(_, m)) VALUE memos = rb_attr_get(yielder, id_memo); struct MEMO *result; - result = MEMO_NEW(m, rb_enum_values_pack(argc, argv), + result = rb_imemo_memo_new(m, rb_enum_values_pack(argc, argv), argc > 1 ? LAZY_MEMO_PACKED : 0); return lazy_yielder_result(result, yielder, procs_array, memos, 0); } @@ -1671,11 +1716,27 @@ lazy_generator_init(VALUE enumerator, VALUE procs) lazy_init_block, rb_ary_new3(2, obj, procs)); gen_ptr = generator_ptr(generator); - gen_ptr->obj = obj; + RB_OBJ_WRITE(generator, &gen_ptr->obj, obj); return generator; } +static int +lazy_precheck(VALUE procs) +{ + if (RTEST(procs)) { + long num_procs = RARRAY_LEN(procs), i = num_procs; + while (i-- > 0) { + VALUE proc = RARRAY_AREF(procs, i); + struct proc_entry *entry = proc_entry_ptr(proc); + lazyenum_precheck_func *precheck = entry->fn->precheck; + if (precheck && !precheck(proc)) return FALSE; + } + } + + return TRUE; +} + /* * Document-class: Enumerator::Lazy * @@ -1772,9 +1833,7 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) VALUE generator; rb_check_arity(argc, 1, 2); - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy new without a block"); - } + LAZY_NEED_BLOCK(new); obj = argv[0]; if (argc > 1) { size = argv[1]; @@ -1840,10 +1899,10 @@ lazy_add_method(VALUE obj, int argc, VALUE *argv, VALUE args, VALUE memo, VALUE entry_obj = TypedData_Make_Struct(rb_cObject, struct proc_entry, &proc_entry_data_type, entry); if (rb_block_given_p()) { - entry->proc = rb_block_proc(); + RB_OBJ_WRITE(entry_obj, &entry->proc, rb_block_proc()); } entry->fn = fn; - entry->memo = args; + RB_OBJ_WRITE(entry_obj, &entry->memo, args); lazy_set_args(entry_obj, memo); @@ -1852,9 +1911,9 @@ lazy_add_method(VALUE obj, int argc, VALUE *argv, VALUE args, VALUE memo, rb_ary_push(new_procs, entry_obj); new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); - new_e = DATA_PTR(new_obj); - new_e->obj = new_generator; - new_e->procs = new_procs; + new_e = RTYPEDDATA_GET_DATA(new_obj); + RB_OBJ_WRITE(new_obj, &new_e->obj, new_generator); + RB_OBJ_WRITE(new_obj, &new_e->procs, new_procs); if (argc > 0) { new_e->meth = rb_to_id(*argv++); @@ -1863,7 +1922,9 @@ lazy_add_method(VALUE obj, int argc, VALUE *argv, VALUE args, VALUE memo, else { new_e->meth = id_each; } - new_e->args = rb_ary_new4(argc, argv); + + RB_OBJ_WRITE(new_obj, &new_e->args, rb_ary_new4(argc, argv)); + return new_obj; } @@ -1949,7 +2010,7 @@ lazy_to_enum(int argc, VALUE *argv, VALUE self) } lazy = lazy_to_enum_i(self, meth, argc, argv, 0, rb_keyword_given_p()); if (rb_block_given_p()) { - enumerator_ptr(lazy)->size = rb_block_proc(); + RB_OBJ_WRITE(lazy, &enumerator_ptr(lazy)->size, rb_block_proc()); } return lazy; } @@ -2030,10 +2091,7 @@ static const lazyenum_funcs lazy_map_funcs = { static VALUE lazy_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy map without a block"); - } - + LAZY_NEED_BLOCK(map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_map_funcs); } @@ -2115,10 +2173,7 @@ static const lazyenum_funcs lazy_flat_map_funcs = { static VALUE lazy_flat_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy flat_map without a block"); - } - + LAZY_NEED_BLOCK(flat_map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_flat_map_funcs); } @@ -2145,10 +2200,7 @@ static const lazyenum_funcs lazy_select_funcs = { static VALUE lazy_select(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy select without a block"); - } - + LAZY_NEED_BLOCK(select); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_select_funcs); } @@ -2179,10 +2231,7 @@ static const lazyenum_funcs lazy_filter_map_funcs = { static VALUE lazy_filter_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy filter_map without a block"); - } - + LAZY_NEED_BLOCK(filter_map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_filter_map_funcs); } @@ -2208,10 +2257,7 @@ static const lazyenum_funcs lazy_reject_funcs = { static VALUE lazy_reject(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy reject without a block"); - } - + LAZY_NEED_BLOCK(reject); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_reject_funcs); } @@ -2335,7 +2381,6 @@ lazy_zip_arrays_func(VALUE proc_entry, struct MEMO *result, VALUE memos, long me rb_ary_push(ary, rb_ary_entry(RARRAY_AREF(arrays, i), count)); } LAZY_MEMO_SET_VALUE(result, ary); - LAZY_MEMO_SET_PACKED(result); rb_ary_store(memos, memo_index, LONG2NUM(++count)); return result; } @@ -2365,7 +2410,6 @@ lazy_zip_func(VALUE proc_entry, struct MEMO *result, VALUE memos, long memo_inde rb_ary_push(ary, v); } LAZY_MEMO_SET_VALUE(result, ary); - LAZY_MEMO_SET_PACKED(result); return result; } @@ -2425,13 +2469,8 @@ lazy_take_proc(VALUE proc_entry, struct MEMO *result, VALUE memos, long memo_ind } remain = NUM2LONG(memo); - if (remain == 0) { - LAZY_MEMO_SET_BREAK(result); - } - else { - if (--remain == 0) LAZY_MEMO_SET_BREAK(result); - rb_ary_store(memos, memo_index, LONG2NUM(remain)); - } + if (--remain == 0) LAZY_MEMO_SET_BREAK(result); + rb_ary_store(memos, memo_index, LONG2NUM(remain)); return result; } @@ -2444,8 +2483,15 @@ lazy_take_size(VALUE entry, VALUE receiver) return LONG2NUM(len); } +static int +lazy_take_precheck(VALUE proc_entry) +{ + struct proc_entry *entry = proc_entry_ptr(proc_entry); + return entry->memo != INT2FIX(0); +} + static const lazyenum_funcs lazy_take_funcs = { - lazy_take_proc, lazy_take_size, + lazy_take_proc, lazy_take_size, lazy_take_precheck, }; /* @@ -2459,20 +2505,14 @@ static VALUE lazy_take(VALUE obj, VALUE n) { long len = NUM2LONG(n); - int argc = 0; - VALUE argv[2]; if (len < 0) { rb_raise(rb_eArgError, "attempt to take negative size"); } - if (len == 0) { - argv[0] = sym_cycle; - argv[1] = INT2NUM(0); - argc = 2; - } + n = LONG2NUM(len); /* no more conversion */ - return lazy_add_method(obj, argc, argv, n, rb_ary_new3(1, n), &lazy_take_funcs); + return lazy_add_method(obj, 0, 0, n, rb_ary_new3(1, n), &lazy_take_funcs); } static struct MEMO * @@ -2500,10 +2540,7 @@ static const lazyenum_funcs lazy_take_while_funcs = { static VALUE lazy_take_while(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy take_while without a block"); - } - + LAZY_NEED_BLOCK(take_while); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_take_while_funcs); } @@ -2598,10 +2635,7 @@ static const lazyenum_funcs lazy_drop_while_funcs = { static VALUE lazy_drop_while(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy drop_while without a block"); - } - + LAZY_NEED_BLOCK(drop_while); return lazy_add_method(obj, 0, 0, Qfalse, Qnil, &lazy_drop_while_funcs); } @@ -2746,6 +2780,52 @@ lazy_with_index(int argc, VALUE *argv, VALUE obj) return lazy_add_method(obj, 0, 0, memo, rb_ary_new_from_values(1, &memo), &lazy_with_index_funcs); } +static struct MEMO * +lazy_tap_each_proc(VALUE proc_entry, struct MEMO *result, VALUE memos, long memo_index) +{ + struct proc_entry *entry = proc_entry_ptr(proc_entry); + + rb_proc_call_with_block(entry->proc, 1, &result->memo_value, Qnil); + + return result; +} + +static const lazyenum_funcs lazy_tap_each_funcs = { + lazy_tap_each_proc, 0, +}; + +/* + * call-seq: + * lazy.tap_each { |item| ... } -> lazy_enumerator + * + * Passes each element through to the block for side effects only, + * without modifying the element or affecting the enumeration. + * Returns a new lazy enumerator. + * + * This is useful for debugging or logging inside lazy chains, + * without breaking laziness or misusing +map+. + * + * (1..).lazy + * .tap_each { |x| puts "got #{x}" } + * .select(&:even?) + * .first(3) + * # prints: got 1, got 2, ..., got 6 + * # returns: [2, 4, 6] + * + * Similar in intent to Java's Stream#peek. + */ + +static VALUE +lazy_tap_each(VALUE obj) +{ + if (!rb_block_given_p()) + { + rb_raise(rb_eArgError, "tried to call lazy tap_each without a block"); + } + + return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_tap_each_funcs); +} + #if 0 /* for RDoc */ /* @@ -2883,19 +2963,12 @@ stop_result(VALUE self) */ static void -producer_mark(void *p) -{ - struct producer *ptr = p; - rb_gc_mark_movable(ptr->init); - rb_gc_mark_movable(ptr->proc); -} - -static void -producer_compact(void *p) +producer_mark_and_move(void *p) { struct producer *ptr = p; - ptr->init = rb_gc_location(ptr->init); - ptr->proc = rb_gc_location(ptr->proc); + rb_gc_mark_and_move(&ptr->init); + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->size); } #define producer_free RUBY_TYPED_DEFAULT_FREE @@ -2909,12 +2982,12 @@ producer_memsize(const void *p) static const rb_data_type_t producer_data_type = { "producer", { - producer_mark, + producer_mark_and_move, producer_free, producer_memsize, - producer_compact, + producer_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; static struct producer * @@ -2923,7 +2996,7 @@ producer_ptr(VALUE obj) struct producer *ptr; TypedData_Get_Struct(obj, struct producer, &producer_data_type, ptr); - if (!ptr || ptr->proc == Qundef) { + if (!ptr || UNDEF_P(ptr->proc)) { rb_raise(rb_eArgError, "uninitialized producer"); } return ptr; @@ -2939,12 +3012,13 @@ producer_allocate(VALUE klass) obj = TypedData_Make_Struct(klass, struct producer, &producer_data_type, ptr); ptr->init = Qundef; ptr->proc = Qundef; + ptr->size = Qnil; return obj; } static VALUE -producer_init(VALUE obj, VALUE init, VALUE proc) +producer_init(VALUE obj, VALUE init, VALUE proc, VALUE size) { struct producer *ptr; @@ -2954,8 +3028,9 @@ producer_init(VALUE obj, VALUE init, VALUE proc) rb_raise(rb_eArgError, "unallocated producer"); } - ptr->init = init; - ptr->proc = proc; + RB_OBJ_WRITE(obj, &ptr->init, init); + RB_OBJ_WRITE(obj, &ptr->proc, proc); + RB_OBJ_WRITE(obj, &ptr->size, size); return obj; } @@ -2978,7 +3053,7 @@ producer_each_i(VALUE obj) init = ptr->init; proc = ptr->proc; - if (init == Qundef) { + if (UNDEF_P(init)) { curr = Qnil; } else { @@ -3006,12 +3081,18 @@ producer_each(VALUE obj) static VALUE producer_size(VALUE obj, VALUE args, VALUE eobj) { - return DBL2NUM(HUGE_VAL); + struct producer *ptr = producer_ptr(obj); + VALUE size = ptr->size; + + if (NIL_P(size)) return Qnil; + if (RB_INTEGER_TYPE_P(size) || RB_FLOAT_TYPE_P(size)) return size; + + return rb_funcall(size, id_call, 0); } /* * call-seq: - * Enumerator.produce(initial = nil) { |prev| block } -> enumerator + * Enumerator.produce(initial = nil, size: nil) { |prev| block } -> enumerator * * Creates an infinite enumerator from any block, just called over and * over. The result of the previous iteration is passed to the next one. @@ -3043,19 +3124,50 @@ producer_size(VALUE obj, VALUE args, VALUE eobj) * PATTERN = %r{\d+|[-/+*]} * Enumerator.produce { scanner.scan(PATTERN) }.slice_after { scanner.eos? }.first * # => ["7", "+", "38", "/", "6"] + * + * The optional +size+ keyword argument specifies the size of the enumerator, + * which can be retrieved by Enumerator#size. It can be an integer, + * +Float::INFINITY+, a callable object (such as a lambda), or +nil+ to + * indicate unknown size. When not specified, the size defaults to + * +Float::INFINITY+. + * + * # Infinite enumerator + * enum = Enumerator.produce(1, size: Float::INFINITY, &:succ) + * enum.size # => Float::INFINITY + * + * # Finite enumerator with known/computable size + * abs_dir = File.expand_path("./baz") # => "/foo/bar/baz" + * traverser = Enumerator.produce(abs_dir, size: -> { abs_dir.count("/") + 1 }) { + * raise StopIteration if it == "/" + * File.dirname(it) + * } + * traverser.size # => 4 + * + * # Finite enumerator with unknown size + * calendar = Enumerator.produce(Date.today, size: nil) { + * it.monday? ? raise(StopIteration) : it + 1 + * } + * calendar.size # => nil */ static VALUE enumerator_s_produce(int argc, VALUE *argv, VALUE klass) { - VALUE init, producer; + VALUE init, producer, opts, size; + ID keyword_ids[1]; if (!rb_block_given_p()) rb_raise(rb_eArgError, "no block given"); - if (rb_scan_args(argc, argv, "01", &init) == 0) { + keyword_ids[0] = rb_intern("size"); + rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "01:", &init, &opts); + rb_get_kwargs(opts, keyword_ids, 0, 1, &size); + + size = UNDEF_P(size) ? DBL2NUM(HUGE_VAL) : convert_to_feasible_size_value(size); + + if (argc == 0 || (argc == 1 && !NIL_P(opts))) { init = Qundef; } - producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc()); + producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc(), size); return rb_enumeratorize_with_size_kw(producer, sym_each, 0, 0, producer_size, RB_NO_KEYWORDS); } @@ -3071,17 +3183,10 @@ enumerator_s_produce(int argc, VALUE *argv, VALUE klass) */ static void -enum_chain_mark(void *p) +enum_chain_mark_and_move(void *p) { struct enum_chain *ptr = p; - rb_gc_mark_movable(ptr->enums); -} - -static void -enum_chain_compact(void *p) -{ - struct enum_chain *ptr = p; - ptr->enums = rb_gc_location(ptr->enums); + rb_gc_mark_and_move(&ptr->enums); } #define enum_chain_free RUBY_TYPED_DEFAULT_FREE @@ -3095,12 +3200,12 @@ enum_chain_memsize(const void *p) static const rb_data_type_t enum_chain_data_type = { "chain", { - enum_chain_mark, + enum_chain_mark_and_move, enum_chain_free, enum_chain_memsize, - enum_chain_compact, + enum_chain_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static struct enum_chain * @@ -3109,7 +3214,7 @@ enum_chain_ptr(VALUE obj) struct enum_chain *ptr; TypedData_Get_Struct(obj, struct enum_chain, &enum_chain_data_type, ptr); - if (!ptr || ptr->enums == Qundef) { + if (!ptr || UNDEF_P(ptr->enums)) { rb_raise(rb_eArgError, "uninitialized chain"); } return ptr; @@ -3150,7 +3255,7 @@ enum_chain_initialize(VALUE obj, VALUE enums) if (!ptr) rb_raise(rb_eArgError, "unallocated chain"); - ptr->enums = rb_obj_freeze(enums); + RB_OBJ_WRITE(obj, &ptr->enums, rb_ary_freeze(enums)); ptr->pos = -1; return obj; @@ -3184,7 +3289,7 @@ enum_chain_init_copy(VALUE obj, VALUE orig) if (!ptr1) rb_raise(rb_eArgError, "unallocated chain"); - ptr1->enums = ptr0->enums; + RB_OBJ_WRITE(obj, &ptr1->enums, ptr0->enums); ptr1->pos = ptr0->pos; return obj; @@ -3302,7 +3407,7 @@ inspect_enum_chain(VALUE obj, VALUE dummy, int recur) TypedData_Get_Struct(obj, struct enum_chain, &enum_chain_data_type, ptr); - if (!ptr || ptr->enums == Qundef) { + if (!ptr || UNDEF_P(ptr->enums)) { return rb_sprintf("#<%"PRIsVALUE": uninitialized>", rb_class_path(klass)); } @@ -3384,7 +3489,7 @@ enumerator_plus(VALUE obj, VALUE eobj) * * The method used against each enumerable object is `each_entry` * instead of `each` so that the product of N enumerable objects - * yields exactly N arguments in each iteration. + * yields an array of exactly N elements in each iteration. * * When no enumerator is given, it calls a given block once yielding * an empty argument list. @@ -3393,17 +3498,10 @@ enumerator_plus(VALUE obj, VALUE eobj) */ static void -enum_product_mark(void *p) -{ - struct enum_product *ptr = p; - rb_gc_mark_movable(ptr->enums); -} - -static void -enum_product_compact(void *p) +enum_product_mark_and_move(void *p) { struct enum_product *ptr = p; - ptr->enums = rb_gc_location(ptr->enums); + rb_gc_mark_and_move(&ptr->enums); } #define enum_product_free RUBY_TYPED_DEFAULT_FREE @@ -3417,12 +3515,12 @@ enum_product_memsize(const void *p) static const rb_data_type_t enum_product_data_type = { "product", { - enum_product_mark, + enum_product_mark_and_move, enum_product_free, enum_product_memsize, - enum_product_compact, + enum_product_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static struct enum_product * @@ -3431,7 +3529,7 @@ enum_product_ptr(VALUE obj) struct enum_product *ptr; TypedData_Get_Struct(obj, struct enum_product, &enum_product_data_type, ptr); - if (!ptr || ptr->enums == Qundef) { + if (!ptr || UNDEF_P(ptr->enums)) { rb_raise(rb_eArgError, "uninitialized product"); } return ptr; @@ -3462,16 +3560,23 @@ enum_product_allocate(VALUE klass) * e.size #=> 6 */ static VALUE -enum_product_initialize(VALUE obj, VALUE enums) +enum_product_initialize(int argc, VALUE *argv, VALUE obj) { struct enum_product *ptr; + VALUE enums = Qnil, options = Qnil; + + rb_scan_args(argc, argv, "*:", &enums, &options); + + if (!NIL_P(options) && !RHASH_EMPTY_P(options)) { + rb_exc_raise(rb_keyword_error_new("unknown", rb_hash_keys(options))); + } rb_check_frozen(obj); TypedData_Get_Struct(obj, struct enum_product, &enum_product_data_type, ptr); if (!ptr) rb_raise(rb_eArgError, "unallocated product"); - ptr->enums = rb_obj_freeze(enums); + RB_OBJ_WRITE(obj, &ptr->enums, rb_ary_freeze(enums)); return obj; } @@ -3489,7 +3594,7 @@ enum_product_init_copy(VALUE obj, VALUE orig) if (!ptr1) rb_raise(rb_eArgError, "unallocated product"); - ptr1->enums = ptr0->enums; + RB_OBJ_WRITE(obj, &ptr1->enums, ptr0->enums); return obj; } @@ -3498,10 +3603,19 @@ static VALUE enum_product_total_size(VALUE enums) { VALUE total = INT2FIX(1); + VALUE sizes = rb_ary_hidden_new(RARRAY_LEN(enums)); long i; for (i = 0; i < RARRAY_LEN(enums); i++) { VALUE size = enum_size(RARRAY_AREF(enums, i)); + if (size == INT2FIX(0)) { + rb_ary_resize(sizes, 0); + return size; + } + rb_ary_push(sizes, size); + } + for (i = 0; i < RARRAY_LEN(sizes); i++) { + VALUE size = RARRAY_AREF(sizes, i); if (NIL_P(size) || (RB_TYPE_P(size, T_FLOAT) && isinf(NUM2DBL(size)))) { return size; @@ -3540,9 +3654,9 @@ enum_product_enum_size(VALUE obj, VALUE args, VALUE eobj) struct product_state { VALUE obj; VALUE block; + int index; int argc; VALUE *argv; - int index; }; static VALUE product_each(VALUE, struct product_state *); @@ -3570,7 +3684,7 @@ product_each(VALUE obj, struct product_state *pstate) rb_block_call(eobj, id_each_entry, 0, NULL, product_each_i, (VALUE)pstate); } else { - rb_funcallv(pstate->block, id_call, pstate->argc, pstate->argv); + rb_funcall(pstate->block, id_call, 1, rb_ary_new_from_values(pstate->argc, pstate->argv)); } return obj; @@ -3581,15 +3695,23 @@ enum_product_run(VALUE obj, VALUE block) { struct enum_product *ptr = enum_product_ptr(obj); int argc = RARRAY_LENINT(ptr->enums); + if (argc == 0) { /* no need to allocate state.argv */ + rb_funcall(block, id_call, 1, rb_ary_new()); + return obj; + } + + VALUE argsbuf = 0; struct product_state state = { .obj = obj, .block = block, .index = 0, .argc = argc, - .argv = ALLOCA_N(VALUE, argc), + .argv = ALLOCV_N(VALUE, argsbuf, argc), }; - return product_each(obj, &state); + VALUE ret = product_each(obj, &state); + ALLOCV_END(argsbuf); + return ret; } /* @@ -3642,7 +3764,7 @@ inspect_enum_product(VALUE obj, VALUE dummy, int recur) TypedData_Get_Struct(obj, struct enum_product, &enum_product_data_type, ptr); - if (!ptr || ptr->enums == Qundef) { + if (!ptr || UNDEF_P(ptr->enums)) { return rb_sprintf("#<%"PRIsVALUE": uninitialized>", rb_class_path(klass)); } @@ -3668,6 +3790,7 @@ enum_product_inspect(VALUE obj) /* * call-seq: * Enumerator.product(*enums) -> enumerator + * Enumerator.product(*enums) { |elts| ... } -> enumerator * * Generates a new enumerator object that generates a Cartesian * product of given enumerable objects. This is equivalent to @@ -3676,18 +3799,78 @@ enum_product_inspect(VALUE obj) * e = Enumerator.product(1..3, [4, 5]) * e.to_a #=> [[1, 4], [1, 5], [2, 4], [2, 5], [3, 4], [3, 5]] * e.size #=> 6 + * + * When a block is given, calls the block with each N-element array + * generated and returns +nil+. */ static VALUE -enumerator_s_product(VALUE klass, VALUE enums) +enumerator_s_product(int argc, VALUE *argv, VALUE klass) { - VALUE obj = enum_product_initialize(enum_product_allocate(rb_cEnumProduct), enums); + VALUE enums = Qnil, options = Qnil, block = Qnil; - if (rb_block_given_p()) { - return enum_product_run(obj, rb_block_proc()); + rb_scan_args(argc, argv, "*:&", &enums, &options, &block); + + if (!NIL_P(options) && !RHASH_EMPTY_P(options)) { + rb_exc_raise(rb_keyword_error_new("unknown", rb_hash_keys(options))); } - else { - return obj; + + VALUE obj = enum_product_initialize(argc, argv, enum_product_allocate(rb_cEnumProduct)); + + if (!NIL_P(block)) { + enum_product_run(obj, block); + return Qnil; } + + return obj; +} + +struct arith_seq { + struct enumerator enumerator; + VALUE begin; + VALUE end; + VALUE step; + bool exclude_end; +}; + +RUBY_REFERENCES(arith_seq_refs) = { + RUBY_REF_EDGE(struct enumerator, obj), + RUBY_REF_EDGE(struct enumerator, args), + RUBY_REF_EDGE(struct enumerator, fib), + RUBY_REF_EDGE(struct enumerator, dst), + RUBY_REF_EDGE(struct enumerator, lookahead), + RUBY_REF_EDGE(struct enumerator, feedvalue), + RUBY_REF_EDGE(struct enumerator, stop_exc), + RUBY_REF_EDGE(struct enumerator, size), + RUBY_REF_EDGE(struct enumerator, procs), + + RUBY_REF_EDGE(struct arith_seq, begin), + RUBY_REF_EDGE(struct arith_seq, end), + RUBY_REF_EDGE(struct arith_seq, step), + RUBY_REF_END +}; + +static const rb_data_type_t arith_seq_data_type = { + "arithmetic_sequence", + { + RUBY_REFS_LIST_PTR(arith_seq_refs), + RUBY_TYPED_DEFAULT_FREE, + NULL, // Nothing allocated externally, so don't need a memsize function + NULL, + }, + .parent = &enumerator_data_type, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE +}; + +static VALUE +arith_seq_allocate(VALUE klass) +{ + struct arith_seq *ptr; + VALUE enum_obj; + + enum_obj = TypedData_Make_Struct(klass, struct arith_seq, &arith_seq_data_type, ptr); + ptr->enumerator.obj = Qundef; + + return enum_obj; } /* @@ -3707,12 +3890,16 @@ rb_arith_seq_new(VALUE obj, VALUE meth, int argc, VALUE const *argv, rb_enumerator_size_func *size_fn, VALUE beg, VALUE end, VALUE step, int excl) { - VALUE aseq = enumerator_init(enumerator_allocate(rb_cArithSeq), + VALUE aseq = enumerator_init(arith_seq_allocate(rb_cArithSeq), obj, meth, argc, argv, size_fn, Qnil, rb_keyword_given_p()); - rb_ivar_set(aseq, id_begin, beg); - rb_ivar_set(aseq, id_end, end); - rb_ivar_set(aseq, id_step, step); - rb_ivar_set(aseq, id_exclude_end, RBOOL(excl)); + struct arith_seq *ptr; + TypedData_Get_Struct(aseq, struct arith_seq, &enumerator_data_type, ptr); + + RB_OBJ_WRITE(aseq, &ptr->begin, beg); + RB_OBJ_WRITE(aseq, &ptr->end, end); + RB_OBJ_WRITE(aseq, &ptr->step, step); + ptr->exclude_end = excl; + return aseq; } @@ -3725,7 +3912,9 @@ rb_arith_seq_new(VALUE obj, VALUE meth, int argc, VALUE const *argv, static inline VALUE arith_seq_begin(VALUE self) { - return rb_ivar_get(self, id_begin); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->begin; } /* @@ -3736,7 +3925,9 @@ arith_seq_begin(VALUE self) static inline VALUE arith_seq_end(VALUE self) { - return rb_ivar_get(self, id_end); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->end; } /* @@ -3748,7 +3939,9 @@ arith_seq_end(VALUE self) static inline VALUE arith_seq_step(VALUE self) { - return rb_ivar_get(self, id_step); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->step; } /* @@ -3759,13 +3952,17 @@ arith_seq_step(VALUE self) static inline VALUE arith_seq_exclude_end(VALUE self) { - return rb_ivar_get(self, id_exclude_end); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return RBOOL(ptr->exclude_end); } static inline int arith_seq_exclude_end_p(VALUE self) { - return RTEST(arith_seq_exclude_end(self)); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->exclude_end; } int @@ -3832,46 +4029,14 @@ rb_arithmetic_sequence_beg_len_step(VALUE obj, long *begp, long *lenp, long *ste return Qnil; } -/* - * call-seq: - * aseq.first -> num or nil - * aseq.first(n) -> an_array - * - * Returns the first number in this arithmetic sequence, - * or an array of the first +n+ elements. - */ static VALUE -arith_seq_first(int argc, VALUE *argv, VALUE self) +arith_seq_take(VALUE self, VALUE num) { VALUE b, e, s, ary; long n; int x; - rb_check_arity(argc, 0, 1); - - b = arith_seq_begin(self); - e = arith_seq_end(self); - s = arith_seq_step(self); - if (argc == 0) { - if (NIL_P(b)) { - return Qnil; - } - if (!NIL_P(e)) { - VALUE zero = INT2FIX(0); - int r = rb_cmpint(rb_num_coerce_cmp(s, zero, idCmp), s, zero); - if (r > 0 && RTEST(rb_funcall(b, '>', 1, e))) { - return Qnil; - } - if (r < 0 && RTEST(rb_funcall(b, '<', 1, e))) { - return Qnil; - } - } - return b; - } - - // TODO: the following code should be extracted as arith_seq_take - - n = NUM2LONG(argv[0]); + n = NUM2LONG(num); if (n < 0) { rb_raise(rb_eArgError, "attempt to take negative size"); } @@ -3879,6 +4044,9 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) return rb_ary_new_capa(0); } + b = arith_seq_begin(self); + e = arith_seq_end(self); + s = arith_seq_step(self); x = arith_seq_exclude_end_p(self); if (FIXNUM_P(b) && NIL_P(e) && FIXNUM_P(s)) { @@ -3913,7 +4081,7 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) ary = rb_ary_new_capa((n < len) ? n : len); while (n > 0 && i < end) { rb_ary_push(ary, LONG2FIX(i)); - if (i + unit < i) break; + if (i > LONG_MAX - unit) break; i += unit; --n; } @@ -3926,7 +4094,7 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) ary = rb_ary_new_capa((n < len) ? n : len); while (n > 0 && i > end) { rb_ary_push(ary, LONG2FIX(i)); - if (i + unit > i) break; + if (i < LONG_MIN - unit) break; i += unit; --n; } @@ -3973,7 +4141,49 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) return ary; } - return rb_call_super(argc, argv); + { + VALUE argv[1]; + argv[0] = num; + return rb_call_super(1, argv); + } +} + +/* + * call-seq: + * aseq.first -> num or nil + * aseq.first(n) -> an_array + * + * Returns the first number in this arithmetic sequence, + * or an array of the first +n+ elements. + */ +static VALUE +arith_seq_first(int argc, VALUE *argv, VALUE self) +{ + VALUE b, e, s; + + rb_check_arity(argc, 0, 1); + + b = arith_seq_begin(self); + e = arith_seq_end(self); + s = arith_seq_step(self); + if (argc == 0) { + if (NIL_P(b)) { + return Qnil; + } + if (!NIL_P(e)) { + VALUE zero = INT2FIX(0); + int r = rb_cmpint(rb_num_coerce_cmp(s, zero, idCmp), s, zero); + if (r > 0 && RTEST(rb_funcall(b, '>', 1, e))) { + return Qnil; + } + if (r < 0 && RTEST(rb_funcall(b, '<', 1, e))) { + return Qnil; + } + } + return b; + } + + return arith_seq_take(self, argv[0]); } static inline VALUE @@ -4139,7 +4349,7 @@ static VALUE arith_seq_inspect(VALUE self) { struct enumerator *e; - VALUE eobj, str, eargs; + VALUE eobj, str; int range_p; TypedData_Get_Struct(self, struct enumerator, &enumerator_data_type, e); @@ -4153,39 +4363,7 @@ arith_seq_inspect(VALUE self) str = rb_sprintf("(%s%"PRIsVALUE"%s.", range_p ? "(" : "", eobj, range_p ? ")" : ""); rb_str_buf_append(str, rb_id2str(e->meth)); - - eargs = rb_attr_get(eobj, id_arguments); - if (NIL_P(eargs)) { - eargs = e->args; - } - if (eargs != Qfalse) { - long argc = RARRAY_LEN(eargs); - const VALUE *argv = RARRAY_CONST_PTR(eargs); /* WB: no new reference */ - - if (argc > 0) { - VALUE kwds = Qnil; - - rb_str_buf_cat2(str, "("); - - if (RB_TYPE_P(argv[argc-1], T_HASH)) { - int all_key = TRUE; - rb_hash_foreach(argv[argc-1], key_symbol_p, (VALUE)&all_key); - if (all_key) kwds = argv[--argc]; - } - - while (argc--) { - VALUE arg = *argv++; - - rb_str_append(str, rb_inspect(arg)); - rb_str_buf_cat2(str, ", "); - } - if (!NIL_P(kwds)) { - rb_hash_foreach(kwds, kwd_append, str); - } - rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */ - rb_str_buf_cat2(str, ")"); - } - } + append_method_args(eobj, str, e->args); rb_str_buf_cat2(str, ")"); @@ -4491,6 +4669,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cLazy, "uniq", lazy_uniq, 0); rb_define_method(rb_cLazy, "compact", lazy_compact, 0); rb_define_method(rb_cLazy, "with_index", lazy_with_index, -1); + rb_define_method(rb_cLazy, "tap_each", lazy_tap_each, 0); lazy_use_super_method = rb_hash_new_with_size(18); rb_hash_aset(lazy_use_super_method, sym("map"), sym("_enumerable_map")); @@ -4512,7 +4691,7 @@ InitVM_Enumerator(void) rb_hash_aset(lazy_use_super_method, sym("uniq"), sym("_enumerable_uniq")); rb_hash_aset(lazy_use_super_method, sym("with_index"), sym("_enumerable_with_index")); rb_obj_freeze(lazy_use_super_method); - rb_gc_register_mark_object(lazy_use_super_method); + rb_vm_register_global_object(lazy_use_super_method); #if 0 /* for RDoc */ rb_define_method(rb_cLazy, "to_a", lazy_to_a, 0); @@ -4527,7 +4706,7 @@ InitVM_Enumerator(void) rb_eStopIteration = rb_define_class("StopIteration", rb_eIndexError); rb_define_method(rb_eStopIteration, "result", stop_result, 0); - /* Generator */ + /* :nodoc: Generator */ rb_cGenerator = rb_define_class_under(rb_cEnumerator, "Generator", rb_cObject); rb_include_module(rb_cGenerator, rb_mEnumerable); rb_define_alloc_func(rb_cGenerator, generator_allocate); @@ -4535,7 +4714,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cGenerator, "initialize_copy", generator_init_copy, 1); rb_define_method(rb_cGenerator, "each", generator_each, -1); - /* Yielder */ + /* :nodoc: Yielder */ rb_cYielder = rb_define_class_under(rb_cEnumerator, "Yielder", rb_cObject); rb_define_alloc_func(rb_cYielder, yielder_allocate); rb_define_method(rb_cYielder, "initialize", yielder_initialize, 0); @@ -4543,7 +4722,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cYielder, "<<", yielder_yield_push, 1); rb_define_method(rb_cYielder, "to_proc", yielder_to_proc, 0); - /* Producer */ + /* :nodoc: Producer */ rb_cEnumProducer = rb_define_class_under(rb_cEnumerator, "Producer", rb_cObject); rb_define_alloc_func(rb_cEnumProducer, producer_allocate); rb_define_method(rb_cEnumProducer, "each", producer_each, 0); @@ -4567,7 +4746,7 @@ InitVM_Enumerator(void) /* Product */ rb_cEnumProduct = rb_define_class_under(rb_cEnumerator, "Product", rb_cEnumerator); rb_define_alloc_func(rb_cEnumProduct, enum_product_allocate); - rb_define_method(rb_cEnumProduct, "initialize", enum_product_initialize, -2); + rb_define_method(rb_cEnumProduct, "initialize", enum_product_initialize, -1); rb_define_method(rb_cEnumProduct, "initialize_copy", enum_product_init_copy, 1); rb_define_method(rb_cEnumProduct, "each", enum_product_each, 0); rb_define_method(rb_cEnumProduct, "size", enum_product_size, 0); @@ -4578,7 +4757,7 @@ InitVM_Enumerator(void) rb_undef_method(rb_cEnumProduct, "next_values"); rb_undef_method(rb_cEnumProduct, "peek"); rb_undef_method(rb_cEnumProduct, "peek_values"); - rb_define_singleton_method(rb_cEnumerator, "product", enumerator_s_product, -2); + rb_define_singleton_method(rb_cEnumerator, "product", enumerator_s_product, -1); /* ArithmeticSequence */ rb_cArithSeq = rb_define_class_under(rb_cEnumerator, "ArithmeticSequence", rb_cEnumerator); @@ -4606,7 +4785,6 @@ void Init_Enumerator(void) { id_rewind = rb_intern_const("rewind"); - id_new = rb_intern_const("new"); id_next = rb_intern_const("next"); id_result = rb_intern_const("result"); id_receiver = rb_intern_const("receiver"); @@ -4616,12 +4794,7 @@ Init_Enumerator(void) id_force = rb_intern_const("force"); id_to_enum = rb_intern_const("to_enum"); id_each_entry = rb_intern_const("each_entry"); - id_begin = rb_intern_const("begin"); - id_end = rb_intern_const("end"); - id_step = rb_intern_const("step"); - id_exclude_end = rb_intern_const("exclude_end"); sym_each = ID2SYM(id_each); - sym_cycle = ID2SYM(rb_intern_const("cycle")); sym_yield = ID2SYM(rb_intern_const("yield")); InitVM(Enumerator); |
