diff options
Diffstat (limited to 'enumerator.c')
| -rw-r--r-- | enumerator.c | 695 |
1 files changed, 405 insertions, 290 deletions
diff --git a/enumerator.c b/enumerator.c index faaa77cb49..69c96b2d8f 100644 --- a/enumerator.c +++ b/enumerator.c @@ -18,6 +18,7 @@ #include <float.h> #endif +#include <limits.h> #include "id.h" #include "internal.h" #include "internal/class.h" @@ -33,79 +34,93 @@ /* * Document-class: Enumerator * - * A class which allows both internal and external iteration. + * \Class \Enumerator supports: * - * An Enumerator can be created by the following methods. - * - Object#to_enum - * - Object#enum_for - * - Enumerator.new + * - {External iteration}[rdoc-ref:Enumerator@External+Iteration]. + * - {Internal iteration}[rdoc-ref:Enumerator@Internal+Iteration]. * - * Most methods have two forms: a block form where the contents - * are evaluated for each item in the enumeration, and a non-block form - * which returns a new Enumerator wrapping the iteration. + * An \Enumerator may be created by the following methods: * - * enumerator = %w(one two three).each - * puts enumerator.class # => Enumerator + * - Object#to_enum. + * - Object#enum_for. + * - Enumerator.new. * - * enumerator.each_with_object("foo") do |item, obj| - * puts "#{obj}: #{item}" - * end + * In addition, certain Ruby methods return \Enumerator objects: + * a Ruby iterator method that accepts a block + * may return an \Enumerator if no block is given. + * There are many such methods, for example, in classes Array and Hash. + * (In the documentation for those classes, search for `new_enumerator`.) * - * # foo: one - * # foo: two - * # foo: three + * == Internal Iteration * - * enum_with_obj = enumerator.each_with_object("foo") - * puts enum_with_obj.class # => Enumerator + * In _internal iteration_, an iterator method drives the iteration + * and the caller's block handles the processing; + * this example uses method #each_with_index: * - * enum_with_obj.each do |item, obj| - * puts "#{obj}: #{item}" - * end + * words = %w[foo bar baz] # => ["foo", "bar", "baz"] + * enumerator = words.each # => #<Enumerator: ...> + * enumerator.each_with_index {|word, i| puts "#{i}: #{word}" } + * 0: foo + * 1: bar + * 2: baz * - * # foo: one - * # foo: two - * # foo: three + * Iterator methods in class \Enumerator include: * - * This allows you to chain Enumerators together. For example, you - * can map a list's elements to strings containing the index - * and the element as a string via: + * - #each: + * passes each item to the block. + * - #each_with_index: + * passes each item and its index to the block. + * - #each_with_object (aliased as #with_object): + * passes each item and a given object to the block. + * - #with_index: + * like #each_with_index, but starting at a given offset (instead of zero). * - * puts %w[foo bar baz].map.with_index { |w, i| "#{i}:#{w}" } - * # => ["0:foo", "1:bar", "2:baz"] + * \Class \Enumerator includes module Enumerable, + * which provides many more iterator methods. * * == External Iteration * - * An Enumerator can also be used as an external iterator. - * For example, Enumerator#next returns the next value of the iterator - * or raises StopIteration if the Enumerator is at the end. - * - * e = [1,2,3].each # returns an enumerator object. - * puts e.next # => 1 - * puts e.next # => 2 - * puts e.next # => 3 - * puts e.next # raises StopIteration - * - * +next+, +next_values+, +peek+, and +peek_values+ are the only methods - * which use external iteration (and Array#zip(Enumerable-not-Array) which uses +next+ internally). - * - * These methods do not affect other internal enumeration methods, - * unless the underlying iteration method itself has side-effect, e.g. IO#each_line. - * - * FrozenError will be raised if these methods are called against a frozen enumerator. - * Since +rewind+ and +feed+ also change state for external iteration, - * these methods may raise FrozenError too. - * - * External iteration differs *significantly* from internal iteration - * due to using a Fiber: - * - The Fiber adds some overhead compared to internal enumeration. - * - The stacktrace will only include the stack from the Enumerator, not above. - * - Fiber-local variables are *not* inherited inside the Enumerator Fiber, - * which instead starts with no Fiber-local variables. - * - Fiber storage variables *are* inherited and are designed - * to handle Enumerator Fibers. Assigning to a Fiber storage variable - * only affects the current Fiber, so if you want to change state - * in the caller Fiber of the Enumerator Fiber, you need to use an - * extra indirection (e.g., use some object in the Fiber storage + * In _external iteration_, the user's program both drives the iteration + * and handles the processing in stream-like fashion; + * this example uses method #next: + * + * words = %w[foo bar baz] + * enumerator = words.each + * enumerator.next # => "foo" + * enumerator.next # => "bar" + * enumerator.next # => "baz" + * enumerator.next # Raises StopIteration: iteration reached an end + * + * External iteration methods in class \Enumerator include: + * + * - #feed: + * sets the value that is next to be returned. + * - #next: + * returns the next value and increments the position. + * - #next_values: + * returns the next value in a 1-element array and increments the position. + * - #peek: + * returns the next value but does not increment the position. + * - #peek_values: + * returns the next value in a 1-element array but does not increment the position. + * - #rewind: + * sets the position to zero. + * + * Each of these methods raises FrozenError if called from a frozen \Enumerator. + * + * == External Iteration and \Fiber + * + * External iteration that uses Fiber differs *significantly* from internal iteration: + * + * - Using \Fiber adds some overhead compared to internal enumeration. + * - The stacktrace will only include the stack from the \Enumerator, not above. + * - \Fiber-local variables are *not* inherited inside the \Enumerator \Fiber, + * which instead starts with no \Fiber-local variables. + * - \Fiber storage variables *are* inherited and are designed + * to handle \Enumerator Fibers. Assigning to a \Fiber storage variable + * only affects the current \Fiber, so if you want to change state + * in the caller \Fiber of the \Enumerator \Fiber, you need to use an + * extra indirection (e.g., use some object in the \Fiber storage * variable and mutate some ivar of it). * * Concretely: @@ -125,7 +140,7 @@ * e.each { p _1 } * p Fiber[:storage_var] # => 2 (it ran in the same Fiber/"stack" as the current Fiber) * - * == Convert External Iteration to Internal Iteration + * == Converting External Iteration to Internal Iteration * * You can use an external iterator to implement an internal iterator as follows: * @@ -162,10 +177,9 @@ */ VALUE rb_cEnumerator; static VALUE rb_cLazy; -static ID id_rewind, id_new, id_to_enum, id_each_entry; +static ID id_rewind, id_to_enum, id_each_entry; static ID id_next, id_result, id_receiver, id_arguments, id_memo, id_method, id_force; -static ID id_begin, id_end, id_step, id_exclude_end; -static VALUE sym_each, sym_cycle, sym_yield; +static VALUE sym_each, sym_yield; static VALUE lazy_use_super_method; @@ -222,6 +236,7 @@ struct yielder { struct producer { VALUE init; VALUE proc; + VALUE size; }; typedef struct MEMO *lazyenum_proc_func(VALUE, struct MEMO *, VALUE, long); @@ -281,28 +296,20 @@ enumerator_ptr(VALUE obj) } static void -proc_entry_mark(void *p) -{ - struct proc_entry *ptr = p; - rb_gc_mark_movable(ptr->proc); - rb_gc_mark_movable(ptr->memo); -} - -static void -proc_entry_compact(void *p) +proc_entry_mark_and_move(void *p) { struct proc_entry *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); - ptr->memo = rb_gc_location(ptr->memo); + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->memo); } static const rb_data_type_t proc_entry_data_type = { "proc_entry", { - proc_entry_mark, + proc_entry_mark_and_move, RUBY_TYPED_DEFAULT_FREE, NULL, // Nothing allocated externally, so don't need a memsize function - proc_entry_compact, + proc_entry_mark_and_move, }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; @@ -452,28 +459,31 @@ convert_to_feasible_size_value(VALUE obj) /* * call-seq: - * Enumerator.new(size = nil) { |yielder| ... } + * Enumerator.new(size = nil) {|yielder| ... } * - * Creates a new Enumerator object, which can be used as an - * Enumerable. + * Returns a new \Enumerator object that can be used for iteration. * - * Iteration is defined by the given block, in - * which a "yielder" object, given as block parameter, can be used to - * yield a value by calling the +yield+ method (aliased as <code><<</code>): + * The given block defines the iteration; + * it is called with a "yielder" object that can yield an object + * via a call to method <tt>yielder.yield</tt>: * - * fib = Enumerator.new do |y| - * a = b = 1 - * loop do - * y << a - * a, b = b, a + b + * fib = Enumerator.new do |yielder| + * n = next_n = 1 + * while true do + * yielder.yield(n) + * n, next_n = next_n, n + next_n * end * end * * fib.take(10) # => [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] * - * The optional parameter can be used to specify how to calculate the size - * in a lazy fashion (see Enumerator#size). It can either be a value or - * a callable object. + * Parameter +size+ specifies how the size is to be calculated (see #size); + * it can either be a value or a callable object: + * + * Enumerator.new{}.size # => nil + * Enumerator.new(42){}.size # => 42 + * Enumerator.new(-> {42}){}.size # => 42 + * */ static VALUE enumerator_initialize(int argc, VALUE *argv, VALUE obj) @@ -679,7 +689,7 @@ enumerator_with_index(int argc, VALUE *argv, VALUE obj) rb_check_arity(argc, 0, 1); RETURN_SIZED_ENUMERATOR(obj, argc, argv, enumerator_enum_size); memo = (!argc || NIL_P(memo = argv[0])) ? INT2FIX(0) : rb_to_int(memo); - return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)MEMO_NEW(memo, 0, 0)); + return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)rb_imemo_memo_new(memo, 0, 0)); } /* @@ -1093,6 +1103,7 @@ enumerator_rewind(VALUE obj) static struct generator *generator_ptr(VALUE obj); static VALUE append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args); +static VALUE append_method_args(VALUE obj, VALUE str, VALUE default_args); static VALUE inspect_enumerator(VALUE obj, VALUE dummy, int recur) @@ -1165,7 +1176,7 @@ kwd_append(VALUE key, VALUE val, VALUE str) static VALUE append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) { - VALUE method, eargs; + VALUE method; method = rb_attr_get(obj, id_method); if (method != Qfalse) { @@ -1179,6 +1190,13 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) rb_str_buf_cat2(str, ":"); rb_str_buf_append(str, method); } + return append_method_args(obj, str, default_args); +} + +static VALUE +append_method_args(VALUE obj, VALUE str, VALUE default_args) +{ + VALUE eargs; eargs = rb_attr_get(obj, id_arguments); if (NIL_P(eargs)) { @@ -1208,10 +1226,11 @@ append_method(VALUE obj, VALUE str, ID default_method, VALUE default_args) if (!NIL_P(kwds)) { rb_hash_foreach(kwds, kwd_append, str); } - rb_str_set_len(str, RSTRING_LEN(str)-2); + rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */ rb_str_buf_cat2(str, ")"); } } + RB_GC_GUARD(eargs); return str; } @@ -1238,6 +1257,24 @@ enumerator_inspect(VALUE obj) * (1..100).to_a.permutation(4).size # => 94109400 * loop.size # => Float::INFINITY * (1..100).drop_while.size # => nil + * + * Note that enumerator size might be inaccurate, and should be rather treated as a hint. + * For example, there is no check that the size provided to ::new is accurate: + * + * e = Enumerator.new(5) { |y| 2.times { y << it} } + * e.size # => 5 + * e.to_a.size # => 2 + * + * Another example is an enumerator created by ::produce without a +size+ argument. + * Such enumerators return +Infinity+ for size, but this is inaccurate if the passed + * block raises StopIteration: + * + * e = Enumerator.produce(1) { it + 1 } + * e.size # => Infinity + * + * e = Enumerator.produce(1) { it > 3 ? raise(StopIteration) : it + 1 } + * e.size # => Infinity + * e.to_a.size # => 4 */ static VALUE @@ -1281,26 +1318,19 @@ enumerator_size(VALUE obj) * Yielder */ static void -yielder_mark(void *p) +yielder_mark_and_move(void *p) { struct yielder *ptr = p; - rb_gc_mark_movable(ptr->proc); -} - -static void -yielder_compact(void *p) -{ - struct yielder *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); + rb_gc_mark_and_move(&ptr->proc); } static const rb_data_type_t yielder_data_type = { "yielder", { - yielder_mark, + yielder_mark_and_move, RUBY_TYPED_DEFAULT_FREE, NULL, - yielder_compact, + yielder_mark_and_move, }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; @@ -1411,28 +1441,20 @@ yielder_new(void) * Generator */ static void -generator_mark(void *p) -{ - struct generator *ptr = p; - rb_gc_mark_movable(ptr->proc); - rb_gc_mark_movable(ptr->obj); -} - -static void -generator_compact(void *p) +generator_mark_and_move(void *p) { struct generator *ptr = p; - ptr->proc = rb_gc_location(ptr->proc); - ptr->obj = rb_gc_location(ptr->obj); + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->obj); } static const rb_data_type_t generator_data_type = { "generator", { - generator_mark, + generator_mark_and_move, RUBY_TYPED_DEFAULT_FREE, NULL, - generator_compact, + generator_mark_and_move, }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; @@ -1603,6 +1625,11 @@ lazy_init_block_i(RB_BLOCK_CALL_FUNC_ARGLIST(val, m)) #define LAZY_MEMO_SET_PACKED(memo) ((memo)->memo_flags |= LAZY_MEMO_PACKED) #define LAZY_MEMO_RESET_PACKED(memo) ((memo)->memo_flags &= ~LAZY_MEMO_PACKED) +#define LAZY_NEED_BLOCK(func) \ + if (!rb_block_given_p()) { \ + rb_raise(rb_eArgError, "tried to call lazy " #func " without a block"); \ + } + static VALUE lazy_yielder_result(struct MEMO *result, VALUE yielder, VALUE procs_array, VALUE memos, long i); static VALUE @@ -1613,7 +1640,7 @@ lazy_init_yielder(RB_BLOCK_CALL_FUNC_ARGLIST(_, m)) VALUE memos = rb_attr_get(yielder, id_memo); struct MEMO *result; - result = MEMO_NEW(m, rb_enum_values_pack(argc, argv), + result = rb_imemo_memo_new(m, rb_enum_values_pack(argc, argv), argc > 1 ? LAZY_MEMO_PACKED : 0); return lazy_yielder_result(result, yielder, procs_array, memos, 0); } @@ -1806,9 +1833,7 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) VALUE generator; rb_check_arity(argc, 1, 2); - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy new without a block"); - } + LAZY_NEED_BLOCK(new); obj = argv[0]; if (argc > 1) { size = argv[1]; @@ -2066,10 +2091,7 @@ static const lazyenum_funcs lazy_map_funcs = { static VALUE lazy_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy map without a block"); - } - + LAZY_NEED_BLOCK(map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_map_funcs); } @@ -2151,10 +2173,7 @@ static const lazyenum_funcs lazy_flat_map_funcs = { static VALUE lazy_flat_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy flat_map without a block"); - } - + LAZY_NEED_BLOCK(flat_map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_flat_map_funcs); } @@ -2181,10 +2200,7 @@ static const lazyenum_funcs lazy_select_funcs = { static VALUE lazy_select(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy select without a block"); - } - + LAZY_NEED_BLOCK(select); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_select_funcs); } @@ -2215,10 +2231,7 @@ static const lazyenum_funcs lazy_filter_map_funcs = { static VALUE lazy_filter_map(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy filter_map without a block"); - } - + LAZY_NEED_BLOCK(filter_map); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_filter_map_funcs); } @@ -2244,10 +2257,7 @@ static const lazyenum_funcs lazy_reject_funcs = { static VALUE lazy_reject(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy reject without a block"); - } - + LAZY_NEED_BLOCK(reject); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_reject_funcs); } @@ -2530,10 +2540,7 @@ static const lazyenum_funcs lazy_take_while_funcs = { static VALUE lazy_take_while(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy take_while without a block"); - } - + LAZY_NEED_BLOCK(take_while); return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_take_while_funcs); } @@ -2628,10 +2635,7 @@ static const lazyenum_funcs lazy_drop_while_funcs = { static VALUE lazy_drop_while(VALUE obj) { - if (!rb_block_given_p()) { - rb_raise(rb_eArgError, "tried to call lazy drop_while without a block"); - } - + LAZY_NEED_BLOCK(drop_while); return lazy_add_method(obj, 0, 0, Qfalse, Qnil, &lazy_drop_while_funcs); } @@ -2776,6 +2780,52 @@ lazy_with_index(int argc, VALUE *argv, VALUE obj) return lazy_add_method(obj, 0, 0, memo, rb_ary_new_from_values(1, &memo), &lazy_with_index_funcs); } +static struct MEMO * +lazy_tap_each_proc(VALUE proc_entry, struct MEMO *result, VALUE memos, long memo_index) +{ + struct proc_entry *entry = proc_entry_ptr(proc_entry); + + rb_proc_call_with_block(entry->proc, 1, &result->memo_value, Qnil); + + return result; +} + +static const lazyenum_funcs lazy_tap_each_funcs = { + lazy_tap_each_proc, 0, +}; + +/* + * call-seq: + * lazy.tap_each { |item| ... } -> lazy_enumerator + * + * Passes each element through to the block for side effects only, + * without modifying the element or affecting the enumeration. + * Returns a new lazy enumerator. + * + * This is useful for debugging or logging inside lazy chains, + * without breaking laziness or misusing +map+. + * + * (1..).lazy + * .tap_each { |x| puts "got #{x}" } + * .select(&:even?) + * .first(3) + * # prints: got 1, got 2, ..., got 6 + * # returns: [2, 4, 6] + * + * Similar in intent to Java's Stream#peek. + */ + +static VALUE +lazy_tap_each(VALUE obj) +{ + if (!rb_block_given_p()) + { + rb_raise(rb_eArgError, "tried to call lazy tap_each without a block"); + } + + return lazy_add_method(obj, 0, 0, Qnil, Qnil, &lazy_tap_each_funcs); +} + #if 0 /* for RDoc */ /* @@ -2913,19 +2963,12 @@ stop_result(VALUE self) */ static void -producer_mark(void *p) -{ - struct producer *ptr = p; - rb_gc_mark_movable(ptr->init); - rb_gc_mark_movable(ptr->proc); -} - -static void -producer_compact(void *p) +producer_mark_and_move(void *p) { struct producer *ptr = p; - ptr->init = rb_gc_location(ptr->init); - ptr->proc = rb_gc_location(ptr->proc); + rb_gc_mark_and_move(&ptr->init); + rb_gc_mark_and_move(&ptr->proc); + rb_gc_mark_and_move(&ptr->size); } #define producer_free RUBY_TYPED_DEFAULT_FREE @@ -2939,10 +2982,10 @@ producer_memsize(const void *p) static const rb_data_type_t producer_data_type = { "producer", { - producer_mark, + producer_mark_and_move, producer_free, producer_memsize, - producer_compact, + producer_mark_and_move, }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; @@ -2969,12 +3012,13 @@ producer_allocate(VALUE klass) obj = TypedData_Make_Struct(klass, struct producer, &producer_data_type, ptr); ptr->init = Qundef; ptr->proc = Qundef; + ptr->size = Qnil; return obj; } static VALUE -producer_init(VALUE obj, VALUE init, VALUE proc) +producer_init(VALUE obj, VALUE init, VALUE proc, VALUE size) { struct producer *ptr; @@ -2986,6 +3030,7 @@ producer_init(VALUE obj, VALUE init, VALUE proc) RB_OBJ_WRITE(obj, &ptr->init, init); RB_OBJ_WRITE(obj, &ptr->proc, proc); + RB_OBJ_WRITE(obj, &ptr->size, size); return obj; } @@ -3036,12 +3081,18 @@ producer_each(VALUE obj) static VALUE producer_size(VALUE obj, VALUE args, VALUE eobj) { - return DBL2NUM(HUGE_VAL); + struct producer *ptr = producer_ptr(obj); + VALUE size = ptr->size; + + if (NIL_P(size)) return Qnil; + if (RB_INTEGER_TYPE_P(size) || RB_FLOAT_TYPE_P(size)) return size; + + return rb_funcall(size, id_call, 0); } /* * call-seq: - * Enumerator.produce(initial = nil) { |prev| block } -> enumerator + * Enumerator.produce(initial = nil, size: nil) { |prev| block } -> enumerator * * Creates an infinite enumerator from any block, just called over and * over. The result of the previous iteration is passed to the next one. @@ -3073,19 +3124,50 @@ producer_size(VALUE obj, VALUE args, VALUE eobj) * PATTERN = %r{\d+|[-/+*]} * Enumerator.produce { scanner.scan(PATTERN) }.slice_after { scanner.eos? }.first * # => ["7", "+", "38", "/", "6"] + * + * The optional +size+ keyword argument specifies the size of the enumerator, + * which can be retrieved by Enumerator#size. It can be an integer, + * +Float::INFINITY+, a callable object (such as a lambda), or +nil+ to + * indicate unknown size. When not specified, the size defaults to + * +Float::INFINITY+. + * + * # Infinite enumerator + * enum = Enumerator.produce(1, size: Float::INFINITY, &:succ) + * enum.size # => Float::INFINITY + * + * # Finite enumerator with known/computable size + * abs_dir = File.expand_path("./baz") # => "/foo/bar/baz" + * traverser = Enumerator.produce(abs_dir, size: -> { abs_dir.count("/") + 1 }) { + * raise StopIteration if it == "/" + * File.dirname(it) + * } + * traverser.size # => 4 + * + * # Finite enumerator with unknown size + * calendar = Enumerator.produce(Date.today, size: nil) { + * it.monday? ? raise(StopIteration) : it + 1 + * } + * calendar.size # => nil */ static VALUE enumerator_s_produce(int argc, VALUE *argv, VALUE klass) { - VALUE init, producer; + VALUE init, producer, opts, size; + ID keyword_ids[1]; if (!rb_block_given_p()) rb_raise(rb_eArgError, "no block given"); - if (rb_scan_args(argc, argv, "01", &init) == 0) { + keyword_ids[0] = rb_intern("size"); + rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "01:", &init, &opts); + rb_get_kwargs(opts, keyword_ids, 0, 1, &size); + + size = UNDEF_P(size) ? DBL2NUM(HUGE_VAL) : convert_to_feasible_size_value(size); + + if (argc == 0 || (argc == 1 && !NIL_P(opts))) { init = Qundef; } - producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc()); + producer = producer_init(producer_allocate(rb_cEnumProducer), init, rb_block_proc(), size); return rb_enumeratorize_with_size_kw(producer, sym_each, 0, 0, producer_size, RB_NO_KEYWORDS); } @@ -3101,17 +3183,10 @@ enumerator_s_produce(int argc, VALUE *argv, VALUE klass) */ static void -enum_chain_mark(void *p) -{ - struct enum_chain *ptr = p; - rb_gc_mark_movable(ptr->enums); -} - -static void -enum_chain_compact(void *p) +enum_chain_mark_and_move(void *p) { struct enum_chain *ptr = p; - ptr->enums = rb_gc_location(ptr->enums); + rb_gc_mark_and_move(&ptr->enums); } #define enum_chain_free RUBY_TYPED_DEFAULT_FREE @@ -3125,12 +3200,12 @@ enum_chain_memsize(const void *p) static const rb_data_type_t enum_chain_data_type = { "chain", { - enum_chain_mark, + enum_chain_mark_and_move, enum_chain_free, enum_chain_memsize, - enum_chain_compact, + enum_chain_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static struct enum_chain * @@ -3180,7 +3255,7 @@ enum_chain_initialize(VALUE obj, VALUE enums) if (!ptr) rb_raise(rb_eArgError, "unallocated chain"); - ptr->enums = rb_ary_freeze(enums); + RB_OBJ_WRITE(obj, &ptr->enums, rb_ary_freeze(enums)); ptr->pos = -1; return obj; @@ -3214,7 +3289,7 @@ enum_chain_init_copy(VALUE obj, VALUE orig) if (!ptr1) rb_raise(rb_eArgError, "unallocated chain"); - ptr1->enums = ptr0->enums; + RB_OBJ_WRITE(obj, &ptr1->enums, ptr0->enums); ptr1->pos = ptr0->pos; return obj; @@ -3423,17 +3498,10 @@ enumerator_plus(VALUE obj, VALUE eobj) */ static void -enum_product_mark(void *p) +enum_product_mark_and_move(void *p) { struct enum_product *ptr = p; - rb_gc_mark_movable(ptr->enums); -} - -static void -enum_product_compact(void *p) -{ - struct enum_product *ptr = p; - ptr->enums = rb_gc_location(ptr->enums); + rb_gc_mark_and_move(&ptr->enums); } #define enum_product_free RUBY_TYPED_DEFAULT_FREE @@ -3447,12 +3515,12 @@ enum_product_memsize(const void *p) static const rb_data_type_t enum_product_data_type = { "product", { - enum_product_mark, + enum_product_mark_and_move, enum_product_free, enum_product_memsize, - enum_product_compact, + enum_product_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static struct enum_product * @@ -3508,7 +3576,7 @@ enum_product_initialize(int argc, VALUE *argv, VALUE obj) if (!ptr) rb_raise(rb_eArgError, "unallocated product"); - ptr->enums = rb_ary_freeze(enums); + RB_OBJ_WRITE(obj, &ptr->enums, rb_ary_freeze(enums)); return obj; } @@ -3526,7 +3594,7 @@ enum_product_init_copy(VALUE obj, VALUE orig) if (!ptr1) rb_raise(rb_eArgError, "unallocated product"); - ptr1->enums = ptr0->enums; + RB_OBJ_WRITE(obj, &ptr1->enums, ptr0->enums); return obj; } @@ -3586,9 +3654,9 @@ enum_product_enum_size(VALUE obj, VALUE args, VALUE eobj) struct product_state { VALUE obj; VALUE block; + int index; int argc; VALUE *argv; - int index; }; static VALUE product_each(VALUE, struct product_state *); @@ -3627,15 +3695,23 @@ enum_product_run(VALUE obj, VALUE block) { struct enum_product *ptr = enum_product_ptr(obj); int argc = RARRAY_LENINT(ptr->enums); + if (argc == 0) { /* no need to allocate state.argv */ + rb_funcall(block, id_call, 1, rb_ary_new()); + return obj; + } + + VALUE argsbuf = 0; struct product_state state = { .obj = obj, .block = block, .index = 0, .argc = argc, - .argv = ALLOCA_N(VALUE, argc), + .argv = ALLOCV_N(VALUE, argsbuf, argc), }; - return product_each(obj, &state); + VALUE ret = product_each(obj, &state); + ALLOCV_END(argsbuf); + return ret; } /* @@ -3748,6 +3824,55 @@ enumerator_s_product(int argc, VALUE *argv, VALUE klass) return obj; } +struct arith_seq { + struct enumerator enumerator; + VALUE begin; + VALUE end; + VALUE step; + bool exclude_end; +}; + +RUBY_REFERENCES(arith_seq_refs) = { + RUBY_REF_EDGE(struct enumerator, obj), + RUBY_REF_EDGE(struct enumerator, args), + RUBY_REF_EDGE(struct enumerator, fib), + RUBY_REF_EDGE(struct enumerator, dst), + RUBY_REF_EDGE(struct enumerator, lookahead), + RUBY_REF_EDGE(struct enumerator, feedvalue), + RUBY_REF_EDGE(struct enumerator, stop_exc), + RUBY_REF_EDGE(struct enumerator, size), + RUBY_REF_EDGE(struct enumerator, procs), + + RUBY_REF_EDGE(struct arith_seq, begin), + RUBY_REF_EDGE(struct arith_seq, end), + RUBY_REF_EDGE(struct arith_seq, step), + RUBY_REF_END +}; + +static const rb_data_type_t arith_seq_data_type = { + "arithmetic_sequence", + { + RUBY_REFS_LIST_PTR(arith_seq_refs), + RUBY_TYPED_DEFAULT_FREE, + NULL, // Nothing allocated externally, so don't need a memsize function + NULL, + }, + .parent = &enumerator_data_type, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE +}; + +static VALUE +arith_seq_allocate(VALUE klass) +{ + struct arith_seq *ptr; + VALUE enum_obj; + + enum_obj = TypedData_Make_Struct(klass, struct arith_seq, &arith_seq_data_type, ptr); + ptr->enumerator.obj = Qundef; + + return enum_obj; +} + /* * Document-class: Enumerator::ArithmeticSequence * @@ -3765,12 +3890,16 @@ rb_arith_seq_new(VALUE obj, VALUE meth, int argc, VALUE const *argv, rb_enumerator_size_func *size_fn, VALUE beg, VALUE end, VALUE step, int excl) { - VALUE aseq = enumerator_init(enumerator_allocate(rb_cArithSeq), + VALUE aseq = enumerator_init(arith_seq_allocate(rb_cArithSeq), obj, meth, argc, argv, size_fn, Qnil, rb_keyword_given_p()); - rb_ivar_set(aseq, id_begin, beg); - rb_ivar_set(aseq, id_end, end); - rb_ivar_set(aseq, id_step, step); - rb_ivar_set(aseq, id_exclude_end, RBOOL(excl)); + struct arith_seq *ptr; + TypedData_Get_Struct(aseq, struct arith_seq, &enumerator_data_type, ptr); + + RB_OBJ_WRITE(aseq, &ptr->begin, beg); + RB_OBJ_WRITE(aseq, &ptr->end, end); + RB_OBJ_WRITE(aseq, &ptr->step, step); + ptr->exclude_end = excl; + return aseq; } @@ -3783,7 +3912,9 @@ rb_arith_seq_new(VALUE obj, VALUE meth, int argc, VALUE const *argv, static inline VALUE arith_seq_begin(VALUE self) { - return rb_ivar_get(self, id_begin); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->begin; } /* @@ -3794,7 +3925,9 @@ arith_seq_begin(VALUE self) static inline VALUE arith_seq_end(VALUE self) { - return rb_ivar_get(self, id_end); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->end; } /* @@ -3806,7 +3939,9 @@ arith_seq_end(VALUE self) static inline VALUE arith_seq_step(VALUE self) { - return rb_ivar_get(self, id_step); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->step; } /* @@ -3817,13 +3952,17 @@ arith_seq_step(VALUE self) static inline VALUE arith_seq_exclude_end(VALUE self) { - return rb_ivar_get(self, id_exclude_end); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return RBOOL(ptr->exclude_end); } static inline int arith_seq_exclude_end_p(VALUE self) { - return RTEST(arith_seq_exclude_end(self)); + struct arith_seq *ptr; + TypedData_Get_Struct(self, struct arith_seq, &enumerator_data_type, ptr); + return ptr->exclude_end; } int @@ -3890,46 +4029,14 @@ rb_arithmetic_sequence_beg_len_step(VALUE obj, long *begp, long *lenp, long *ste return Qnil; } -/* - * call-seq: - * aseq.first -> num or nil - * aseq.first(n) -> an_array - * - * Returns the first number in this arithmetic sequence, - * or an array of the first +n+ elements. - */ static VALUE -arith_seq_first(int argc, VALUE *argv, VALUE self) +arith_seq_take(VALUE self, VALUE num) { VALUE b, e, s, ary; long n; int x; - rb_check_arity(argc, 0, 1); - - b = arith_seq_begin(self); - e = arith_seq_end(self); - s = arith_seq_step(self); - if (argc == 0) { - if (NIL_P(b)) { - return Qnil; - } - if (!NIL_P(e)) { - VALUE zero = INT2FIX(0); - int r = rb_cmpint(rb_num_coerce_cmp(s, zero, idCmp), s, zero); - if (r > 0 && RTEST(rb_funcall(b, '>', 1, e))) { - return Qnil; - } - if (r < 0 && RTEST(rb_funcall(b, '<', 1, e))) { - return Qnil; - } - } - return b; - } - - // TODO: the following code should be extracted as arith_seq_take - - n = NUM2LONG(argv[0]); + n = NUM2LONG(num); if (n < 0) { rb_raise(rb_eArgError, "attempt to take negative size"); } @@ -3937,6 +4044,9 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) return rb_ary_new_capa(0); } + b = arith_seq_begin(self); + e = arith_seq_end(self); + s = arith_seq_step(self); x = arith_seq_exclude_end_p(self); if (FIXNUM_P(b) && NIL_P(e) && FIXNUM_P(s)) { @@ -3971,7 +4081,7 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) ary = rb_ary_new_capa((n < len) ? n : len); while (n > 0 && i < end) { rb_ary_push(ary, LONG2FIX(i)); - if (i + unit < i) break; + if (i > LONG_MAX - unit) break; i += unit; --n; } @@ -3984,7 +4094,7 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) ary = rb_ary_new_capa((n < len) ? n : len); while (n > 0 && i > end) { rb_ary_push(ary, LONG2FIX(i)); - if (i + unit > i) break; + if (i < LONG_MIN - unit) break; i += unit; --n; } @@ -4031,7 +4141,49 @@ arith_seq_first(int argc, VALUE *argv, VALUE self) return ary; } - return rb_call_super(argc, argv); + { + VALUE argv[1]; + argv[0] = num; + return rb_call_super(1, argv); + } +} + +/* + * call-seq: + * aseq.first -> num or nil + * aseq.first(n) -> an_array + * + * Returns the first number in this arithmetic sequence, + * or an array of the first +n+ elements. + */ +static VALUE +arith_seq_first(int argc, VALUE *argv, VALUE self) +{ + VALUE b, e, s; + + rb_check_arity(argc, 0, 1); + + b = arith_seq_begin(self); + e = arith_seq_end(self); + s = arith_seq_step(self); + if (argc == 0) { + if (NIL_P(b)) { + return Qnil; + } + if (!NIL_P(e)) { + VALUE zero = INT2FIX(0); + int r = rb_cmpint(rb_num_coerce_cmp(s, zero, idCmp), s, zero); + if (r > 0 && RTEST(rb_funcall(b, '>', 1, e))) { + return Qnil; + } + if (r < 0 && RTEST(rb_funcall(b, '<', 1, e))) { + return Qnil; + } + } + return b; + } + + return arith_seq_take(self, argv[0]); } static inline VALUE @@ -4197,7 +4349,7 @@ static VALUE arith_seq_inspect(VALUE self) { struct enumerator *e; - VALUE eobj, str, eargs; + VALUE eobj, str; int range_p; TypedData_Get_Struct(self, struct enumerator, &enumerator_data_type, e); @@ -4211,39 +4363,7 @@ arith_seq_inspect(VALUE self) str = rb_sprintf("(%s%"PRIsVALUE"%s.", range_p ? "(" : "", eobj, range_p ? ")" : ""); rb_str_buf_append(str, rb_id2str(e->meth)); - - eargs = rb_attr_get(eobj, id_arguments); - if (NIL_P(eargs)) { - eargs = e->args; - } - if (eargs != Qfalse) { - long argc = RARRAY_LEN(eargs); - const VALUE *argv = RARRAY_CONST_PTR(eargs); /* WB: no new reference */ - - if (argc > 0) { - VALUE kwds = Qnil; - - rb_str_buf_cat2(str, "("); - - if (RB_TYPE_P(argv[argc-1], T_HASH)) { - int all_key = TRUE; - rb_hash_foreach(argv[argc-1], key_symbol_p, (VALUE)&all_key); - if (all_key) kwds = argv[--argc]; - } - - while (argc--) { - VALUE arg = *argv++; - - rb_str_append(str, rb_inspect(arg)); - rb_str_buf_cat2(str, ", "); - } - if (!NIL_P(kwds)) { - rb_hash_foreach(kwds, kwd_append, str); - } - rb_str_set_len(str, RSTRING_LEN(str)-2); /* drop the last ", " */ - rb_str_buf_cat2(str, ")"); - } - } + append_method_args(eobj, str, e->args); rb_str_buf_cat2(str, ")"); @@ -4549,6 +4669,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cLazy, "uniq", lazy_uniq, 0); rb_define_method(rb_cLazy, "compact", lazy_compact, 0); rb_define_method(rb_cLazy, "with_index", lazy_with_index, -1); + rb_define_method(rb_cLazy, "tap_each", lazy_tap_each, 0); lazy_use_super_method = rb_hash_new_with_size(18); rb_hash_aset(lazy_use_super_method, sym("map"), sym("_enumerable_map")); @@ -4585,7 +4706,7 @@ InitVM_Enumerator(void) rb_eStopIteration = rb_define_class("StopIteration", rb_eIndexError); rb_define_method(rb_eStopIteration, "result", stop_result, 0); - /* Generator */ + /* :nodoc: Generator */ rb_cGenerator = rb_define_class_under(rb_cEnumerator, "Generator", rb_cObject); rb_include_module(rb_cGenerator, rb_mEnumerable); rb_define_alloc_func(rb_cGenerator, generator_allocate); @@ -4593,7 +4714,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cGenerator, "initialize_copy", generator_init_copy, 1); rb_define_method(rb_cGenerator, "each", generator_each, -1); - /* Yielder */ + /* :nodoc: Yielder */ rb_cYielder = rb_define_class_under(rb_cEnumerator, "Yielder", rb_cObject); rb_define_alloc_func(rb_cYielder, yielder_allocate); rb_define_method(rb_cYielder, "initialize", yielder_initialize, 0); @@ -4601,7 +4722,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cYielder, "<<", yielder_yield_push, 1); rb_define_method(rb_cYielder, "to_proc", yielder_to_proc, 0); - /* Producer */ + /* :nodoc: Producer */ rb_cEnumProducer = rb_define_class_under(rb_cEnumerator, "Producer", rb_cObject); rb_define_alloc_func(rb_cEnumProducer, producer_allocate); rb_define_method(rb_cEnumProducer, "each", producer_each, 0); @@ -4664,7 +4785,6 @@ void Init_Enumerator(void) { id_rewind = rb_intern_const("rewind"); - id_new = rb_intern_const("new"); id_next = rb_intern_const("next"); id_result = rb_intern_const("result"); id_receiver = rb_intern_const("receiver"); @@ -4674,12 +4794,7 @@ Init_Enumerator(void) id_force = rb_intern_const("force"); id_to_enum = rb_intern_const("to_enum"); id_each_entry = rb_intern_const("each_entry"); - id_begin = rb_intern_const("begin"); - id_end = rb_intern_const("end"); - id_step = rb_intern_const("step"); - id_exclude_end = rb_intern_const("exclude_end"); sym_each = ID2SYM(id_each); - sym_cycle = ID2SYM(rb_intern_const("cycle")); sym_yield = ID2SYM(rb_intern_const("yield")); InitVM(Enumerator); |
