diff options
Diffstat (limited to 'array.c')
| -rw-r--r-- | array.c | 1512 |
1 files changed, 1081 insertions, 431 deletions
@@ -11,89 +11,26 @@ **********************************************************************/ -#include "ruby/ruby.h" +#include "internal.h" #include "ruby/util.h" #include "ruby/st.h" -#include "ruby/encoding.h" -#include "internal.h" #include "probes.h" #include "id.h" +#include "debug_counter.h" #ifndef ARRAY_DEBUG # define NDEBUG #endif -#include <assert.h> +#include "ruby_assert.h" VALUE rb_cArray; -static ID id_cmp, id_div, id_power; +/* for OPTIMIZED_CMP: */ +#define id_cmp idCmp #define ARY_DEFAULT_SIZE 16 #define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE)) - -void -rb_mem_clear(register VALUE *mem, register long size) -{ - while (size--) { - *mem++ = Qnil; - } -} - -static void -ary_mem_clear(VALUE ary, long beg, long size) -{ - RARRAY_PTR_USE(ary, ptr, { - rb_mem_clear(ptr + beg, size); - }); -} - -static inline void -memfill(register VALUE *mem, register long size, register VALUE val) -{ - while (size--) { - *mem++ = val; - } -} - -static void -ary_memfill(VALUE ary, long beg, long size, VALUE val) -{ - RARRAY_PTR_USE(ary, ptr, { - memfill(ptr + beg, size, val); - RB_OBJ_WRITTEN(ary, Qundef, val); - }); -} - -static void -ary_memcpy(VALUE ary, long beg, long argc, const VALUE *argv) -{ -#if 1 - if (OBJ_PROMOTED(ary)) { - if (argc > (int)(128/sizeof(VALUE)) /* is magic number (cache line size) */) { - rb_gc_writebarrier_remember_promoted(ary); - RARRAY_PTR_USE(ary, ptr, { - MEMCPY(ptr+beg, argv, VALUE, argc); - }); - } - else { - int i; - RARRAY_PTR_USE(ary, ptr, { - for (i=0; i<argc; i++) { - RB_OBJ_WRITE(ary, &ptr[i+beg], argv[i]); - } - }); - } - } - else { - RARRAY_PTR_USE(ary, ptr, { - MEMCPY(ptr+beg, argv, VALUE, argc); - }); - } -#else - /* giveup write barrier (traditional way) */ - MEMCPY(RARRAY_PTR(ary)+beg, argv, VALUE, argc); -#endif -} +#define SMALL_ARRAY_LEN 16 # define ARY_SHARED_P(ary) \ (assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \ @@ -195,6 +132,74 @@ ary_memcpy(VALUE ary, long beg, long argc, const VALUE *argv) FL_SET((ary), RARRAY_SHARED_ROOT_FLAG); \ } while (0) +#define ARY_SET(a, i, v) RARRAY_ASET((assert(!ARY_SHARED_P(a)), (a)), (i), (v)) + +void +rb_mem_clear(register VALUE *mem, register long size) +{ + while (size--) { + *mem++ = Qnil; + } +} + +static void +ary_mem_clear(VALUE ary, long beg, long size) +{ + RARRAY_PTR_USE(ary, ptr, { + rb_mem_clear(ptr + beg, size); + }); +} + +static inline void +memfill(register VALUE *mem, register long size, register VALUE val) +{ + while (size--) { + *mem++ = val; + } +} + +static void +ary_memfill(VALUE ary, long beg, long size, VALUE val) +{ + RARRAY_PTR_USE(ary, ptr, { + memfill(ptr + beg, size, val); + RB_OBJ_WRITTEN(ary, Qundef, val); + }); +} + +static void +ary_memcpy0(VALUE ary, long beg, long argc, const VALUE *argv, VALUE buff_owner_ary) +{ +#if 1 + assert(!ARY_SHARED_P(buff_owner_ary)); + + if (argc > (int)(128/sizeof(VALUE)) /* is magic number (cache line size) */) { + rb_gc_writebarrier_remember(buff_owner_ary); + RARRAY_PTR_USE(ary, ptr, { + MEMCPY(ptr+beg, argv, VALUE, argc); + }); + } + else { + int i; + RARRAY_PTR_USE(ary, ptr, { + for (i=0; i<argc; i++) { + RB_OBJ_WRITE(buff_owner_ary, &ptr[i+beg], argv[i]); + } + }); + } +#else + /* giveup write barrier (traditional way) */ + RARRAY_PTR(buff_owner_ary); + MEMCPY(RARRAY_PTR(ary)+beg, argv, VALUE, argc); +#endif +} + +static void +ary_memcpy(VALUE ary, long beg, long argc, const VALUE *argv) +{ + ary_memcpy0(ary, beg, argc, argv, ary); +} + static void ary_resize_capa(VALUE ary, long capacity) { @@ -344,14 +349,11 @@ rb_ary_modify(VALUE ary) ARY_SET_PTR(ary, ptr); } - /* TODO: age2 promotion, OBJ_PROMOTED() checks not infant. */ - if (OBJ_PROMOTED(ary) && !OBJ_PROMOTED(shared)) { - rb_gc_writebarrier_remember_promoted(ary); - } + rb_gc_writebarrier_remember(ary); } } -static void +static VALUE ary_ensure_room_for_push(VALUE ary, long add_len) { long old_len = RARRAY_LEN(ary); @@ -367,6 +369,7 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (ARY_SHARED_OCCUPIED(shared)) { if (RARRAY_CONST_PTR(ary) - RARRAY_CONST_PTR(shared) + new_len <= RARRAY_LEN(shared)) { rb_ary_modify_check(ary); + return shared; } else { /* if array is shared, then it is likely it participate in push/shift pattern */ @@ -375,16 +378,21 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (new_len > capa - (capa >> 6)) { ary_double_capa(ary, new_len); } + return ary; } - return; } } + rb_ary_modify(ary); + } + else { + rb_ary_modify_check(ary); } - rb_ary_modify(ary); capa = ARY_CAPA(ary); if (new_len > capa) { ary_double_capa(ary, new_len); } + + return ary; } /* @@ -451,10 +459,7 @@ ary_alloc(VALUE klass) static VALUE empty_ary_alloc(VALUE klass) { - if (RUBY_DTRACE_ARRAY_CREATE_ENABLED()) { - RUBY_DTRACE_ARRAY_CREATE(0, rb_sourcefile(), rb_sourceline()); - } - + RUBY_DTRACE_CREATE_HOOK(ARRAY, 0); return ary_alloc(klass); } @@ -470,21 +475,16 @@ ary_new(VALUE klass, long capa) rb_raise(rb_eArgError, "array size too big"); } - if (RUBY_DTRACE_ARRAY_CREATE_ENABLED()) { - RUBY_DTRACE_ARRAY_CREATE(capa, rb_sourcefile(), rb_sourceline()); - } + RUBY_DTRACE_CREATE_HOOK(ARRAY, capa); + ary = ary_alloc(klass); if (capa > RARRAY_EMBED_LEN_MAX) { ptr = ALLOC_N(VALUE, capa); - ary = ary_alloc(klass); FL_UNSET_EMBED(ary); ARY_SET_PTR(ary, ptr); ARY_SET_CAPA(ary, capa); ARY_SET_HEAP_LEN(ary, 0); } - else { - ary = ary_alloc(klass); - } return ary; } @@ -502,7 +502,7 @@ rb_ary_new(void) } VALUE -rb_ary_new_from_args(long n, ...) +(rb_ary_new_from_args)(long n, ...) { va_list ar; VALUE ary; @@ -512,7 +512,7 @@ rb_ary_new_from_args(long n, ...) va_start(ar, n); for (i=0; i<n; i++) { - RARRAY_ASET(ary, i, va_arg(ar, VALUE)); + ARY_SET(ary, i, va_arg(ar, VALUE)); } va_end(ar); @@ -521,11 +521,11 @@ rb_ary_new_from_args(long n, ...) } VALUE -rb_ary_new_from_values(long n, const VALUE *elts) +rb_ary_tmp_new_from_values(VALUE klass, long n, const VALUE *elts) { VALUE ary; - ary = rb_ary_new2(n); + ary = ary_new(klass, n); if (n > 0 && elts) { ary_memcpy(ary, 0, n, elts); ARY_SET_LEN(ary, n); @@ -535,24 +535,43 @@ rb_ary_new_from_values(long n, const VALUE *elts) } VALUE +rb_ary_new_from_values(long n, const VALUE *elts) +{ + return rb_ary_tmp_new_from_values(rb_cArray, n, elts); +} + +VALUE rb_ary_tmp_new(long capa) { return ary_new(0, capa); } +VALUE +rb_ary_tmp_new_fill(long capa) +{ + VALUE ary = ary_new(0, capa); + ary_memfill(ary, 0, capa, Qnil); + ARY_SET_LEN(ary, capa); + return ary; +} + void rb_ary_free(VALUE ary) { if (ARY_OWNS_HEAP_P(ary)) { + RB_DEBUG_COUNTER_INC(obj_ary_ptr); ruby_sized_xfree((void *)ARY_HEAP_PTR(ary), ARY_HEAP_SIZE(ary)); } + else { + RB_DEBUG_COUNTER_INC(obj_ary_embed); + } } RUBY_FUNC_EXPORTED size_t rb_ary_memsize(VALUE ary) { if (ARY_OWNS_HEAP_P(ary)) { - return RARRAY(ary)->as.heap.aux.capa * sizeof(VALUE); + return ARY_CAPA(ary) * sizeof(VALUE); } else { return 0; @@ -585,7 +604,7 @@ ary_make_shared(VALUE ary) } else { long capa = ARY_CAPA(ary), len = RARRAY_LEN(ary); - NEWOBJ_OF(shared, struct RArray, 0, T_ARRAY); /* keep shared ary as non-WB-protected */ + NEWOBJ_OF(shared, struct RArray, 0, T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0)); FL_UNSET_EMBED(shared); ARY_SET_LEN((VALUE)shared, capa); @@ -622,16 +641,17 @@ rb_assoc_new(VALUE car, VALUE cdr) return rb_ary_new3(2, car, cdr); } -static VALUE -to_ary(VALUE ary) +VALUE +rb_to_array_type(VALUE ary) { - return rb_convert_type(ary, T_ARRAY, "Array", "to_ary"); + return rb_convert_type_with_id(ary, T_ARRAY, "Array", idTo_ary); } +#define to_ary rb_to_array_type VALUE rb_check_array_type(VALUE ary) { - return rb_check_convert_type(ary, T_ARRAY, "Array", "to_ary"); + return rb_check_convert_type_with_id(ary, T_ARRAY, "Array", idTo_ary); } /* @@ -661,16 +681,16 @@ rb_ary_s_try_convert(VALUE dummy, VALUE ary) /* * call-seq: - * Array.new(size=0, obj=nil) + * Array.new(size=0, default=nil) * Array.new(array) * Array.new(size) {|index| block } * * Returns a new array. * * In the first form, if no arguments are sent, the new array will be empty. - * When a +size+ and an optional +obj+ are sent, an array is created with - * +size+ copies of +obj+. Take notice that all elements will reference the - * same object +obj+. + * When a +size+ and an optional +default+ are sent, an array is created with + * +size+ copies of +default+. Take notice that all elements will reference the + * same object +default+. * * The second form creates a copy of the array passed as a parameter (the * array is generated by calling to_ary on the parameter). @@ -744,12 +764,14 @@ rb_ary_initialize(int argc, VALUE *argv, VALUE ary) } len = NUM2LONG(size); + /* NUM2LONG() may call size.to_int, ary can be frozen, modified, etc */ if (len < 0) { rb_raise(rb_eArgError, "negative array size"); } if (len > ARY_MAX_SIZE) { rb_raise(rb_eArgError, "array size too big"); } + /* recheck after argument conversion */ rb_ary_modify(ary); ary_resize_capa(ary, len); if (rb_block_given_p()) { @@ -817,7 +839,7 @@ rb_ary_store(VALUE ary, long idx, VALUE val) if (idx >= len) { ARY_SET_LEN(ary, idx + 1); } - RARRAY_ASET(ary, idx, val); + ARY_SET(ary, idx, val); } static VALUE @@ -861,7 +883,7 @@ enum ary_take_pos_flags }; static VALUE -ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags last) +ary_take_first_or_last(int argc, const VALUE *argv, VALUE ary, enum ary_take_pos_flags last) { VALUE nv; long n; @@ -891,7 +913,10 @@ ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags * expression returns the array itself, so several appends * may be chained together. * - * [ 1, 2 ] << "c" << "d" << [ 3, 4 ] + * a = [ 1, 2 ] + * a << "c" << "d" << [ 3, 4 ] + * #=> [ 1, 2, "c", "d", [ 3, 4 ] ] + * a * #=> [ 1, 2, "c", "d", [ 3, 4 ] ] * */ @@ -900,20 +925,20 @@ VALUE rb_ary_push(VALUE ary, VALUE item) { long idx = RARRAY_LEN(ary); - - ary_ensure_room_for_push(ary, 1); - RARRAY_ASET(ary, idx, item); + VALUE target_ary = ary_ensure_room_for_push(ary, 1); + RARRAY_PTR_USE(ary, ptr, { + RB_OBJ_WRITE(target_ary, &ptr[idx], item); + }); ARY_SET_LEN(ary, idx + 1); return ary; } VALUE -rb_ary_cat(VALUE ary, const VALUE *ptr, long len) +rb_ary_cat(VALUE ary, const VALUE *argv, long len) { long oldlen = RARRAY_LEN(ary); - - ary_ensure_room_for_push(ary, len); - ary_memcpy(ary, oldlen, len, ptr); + VALUE target_ary = ary_ensure_room_for_push(ary, len); + ary_memcpy0(ary, oldlen, len, argv, target_ary); ARY_SET_LEN(ary, oldlen + len); return ary; } @@ -930,7 +955,7 @@ rb_ary_cat(VALUE ary, const VALUE *ptr, long len) * a = [ "a", "b", "c" ] * a.push("d", "e", "f") * #=> ["a", "b", "c", "d", "e", "f"] - * [1, 2, 3,].push(4).push(5) + * [1, 2, 3].push(4).push(5) * #=> [1, 2, 3, 4, 5] */ @@ -1010,11 +1035,11 @@ rb_ary_shift(VALUE ary) } assert(!ARY_EMBED_P(ary)); /* ARY_EMBED_LEN_MAX < ARY_DEFAULT_SIZE */ - RARRAY_ASET(ary, 0, Qnil); + ARY_SET(ary, 0, Qnil); ary_make_shared(ary); } else if (ARY_SHARED_OCCUPIED(ARY_SHARED(ary))) { - RARRAY_ASET(ary, 0, Qnil); + RARRAY_PTR_USE(ary, ptr, ptr[0] = Qnil); } ARY_INCREASE_PTR(ary, 1); /* shift ptr */ ARY_INCREASE_LEN(ary, -1); @@ -1060,21 +1085,28 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) n = RARRAY_LEN(result); if (ARY_SHARED_P(ary)) { if (ARY_SHARED_OCCUPIED(ARY_SHARED(ary))) { + setup_occupied_shared: ary_mem_clear(ary, 0, n); } ARY_INCREASE_PTR(ary, n); } else { - RARRAY_PTR_USE(ary, ptr, { - MEMMOVE(ptr, ptr + n, VALUE, RARRAY_LEN(ary)-n); - }); /* WB: no new reference */ + if (RARRAY_LEN(ary) < ARY_DEFAULT_SIZE) { + RARRAY_PTR_USE(ary, ptr, { + MEMMOVE(ptr, ptr+n, VALUE, RARRAY_LEN(ary)-n); + }); /* WB: no new reference */ + } + else { + ary_make_shared(ary); + goto setup_occupied_shared; + } } ARY_INCREASE_LEN(ary, -n); return result; } -static void +static VALUE ary_ensure_room_for_unshift(VALUE ary, int argc) { long len = RARRAY_LEN(ary); @@ -1086,6 +1118,8 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) rb_raise(rb_eIndexError, "index %ld too big", new_len); } + rb_ary_modify(ary); + if (ARY_SHARED_P(ary)) { VALUE shared = ARY_SHARED(ary); capa = RARRAY_LEN(shared); @@ -1096,7 +1130,6 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) } } - rb_ary_modify(ary); capa = ARY_CAPA(ary); if (capa - (capa >> 6) <= new_len) { ary_double_capa(ary, new_len); @@ -1120,12 +1153,16 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) head = sharedp + argc + room; } ARY_SET_PTR(ary, head - argc); + assert(ARY_SHARED_OCCUPIED(ARY_SHARED(ary))); + return ARY_SHARED(ary); } else { /* sliding items */ RARRAY_PTR_USE(ary, ptr, { MEMMOVE(ptr + argc, ptr, VALUE, len); }); + + return ary; } } @@ -1145,14 +1182,15 @@ static VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary) { long len = RARRAY_LEN(ary); + VALUE target_ary; if (argc == 0) { rb_ary_modify_check(ary); return ary; } - ary_ensure_room_for_unshift(ary, argc); - ary_memcpy(ary, 0, argc, argv); + target_ary = ary_ensure_room_for_unshift(ary, argc); + ary_memcpy0(ary, 0, argc, argv, target_ary); ARY_SET_LEN(ary, len + argc); return ary; } @@ -1178,10 +1216,17 @@ rb_ary_elt(VALUE ary, long offset) VALUE rb_ary_entry(VALUE ary, long offset) { + long len = RARRAY_LEN(ary); + const VALUE *ptr = RARRAY_CONST_PTR(ary); + if (len == 0) return Qnil; if (offset < 0) { - offset += RARRAY_LEN(ary); + offset += len; + if (offset < 0) return Qnil; + } + else if (len <= offset) { + return Qnil; } - return rb_ary_elt(ary, offset); + return ptr[offset]; } VALUE @@ -1239,23 +1284,31 @@ rb_ary_subseq(VALUE ary, long beg, long len) */ VALUE -rb_ary_aref(int argc, VALUE *argv, VALUE ary) +rb_ary_aref(int argc, const VALUE *argv, VALUE ary) { - VALUE arg; - long beg, len; - + rb_check_arity(argc, 1, 2); if (argc == 2) { - beg = NUM2LONG(argv[0]); - len = NUM2LONG(argv[1]); - if (beg < 0) { - beg += RARRAY_LEN(ary); - } - return rb_ary_subseq(ary, beg, len); + return rb_ary_aref2(ary, argv[0], argv[1]); } - if (argc != 1) { - rb_scan_args(argc, argv, "11", NULL, NULL); + return rb_ary_aref1(ary, argv[0]); +} + +VALUE +rb_ary_aref2(VALUE ary, VALUE b, VALUE e) +{ + long beg = NUM2LONG(b); + long len = NUM2LONG(e); + if (beg < 0) { + beg += RARRAY_LEN(ary); } - arg = argv[0]; + return rb_ary_subseq(ary, beg, len); +} + +VALUE +rb_ary_aref1(VALUE ary, VALUE arg) +{ + long beg, len; + /* special case - speeding up */ if (FIXNUM_P(arg)) { return rb_ary_entry(ary, FIX2LONG(arg)); @@ -1285,7 +1338,7 @@ rb_ary_aref(int argc, VALUE *argv, VALUE ary) * a.at(-1) #=> "e" */ -static VALUE +VALUE rb_ary_at(VALUE ary, VALUE pos) { return rb_ary_entry(ary, NUM2LONG(pos)); @@ -1334,7 +1387,7 @@ rb_ary_first(int argc, VALUE *argv, VALUE ary) */ VALUE -rb_ary_last(int argc, VALUE *argv, VALUE ary) +rb_ary_last(int argc, const VALUE *argv, VALUE ary) { if (argc == 0) { long len = RARRAY_LEN(ary); @@ -1358,8 +1411,9 @@ rb_ary_last(int argc, VALUE *argv, VALUE ary) * +default+ value. * * Alternatively, if a block is given it will only be executed when an - * invalid +index+ is referenced. Negative values of +index+ count from the - * end of the array. + * invalid +index+ is referenced. + * + * Negative values of +index+ count from the end of the array. * * a = [ 11, 22, 33, 44 ] * a.fetch(1) #=> 22 @@ -1426,9 +1480,8 @@ rb_ary_fetch(int argc, VALUE *argv, VALUE ary) static VALUE rb_ary_index(int argc, VALUE *argv, VALUE ary) { - const VALUE *ptr; VALUE val; - long i, len; + long i; if (argc == 0) { RETURN_ENUMERATOR(ary, 0, 0); @@ -1443,20 +1496,11 @@ rb_ary_index(int argc, VALUE *argv, VALUE ary) val = argv[0]; if (rb_block_given_p()) rb_warn("given block not used"); - len = RARRAY_LEN(ary); - ptr = RARRAY_CONST_PTR(ary); - for (i=0; i<len; i++) { - VALUE e = ptr[i]; - switch (rb_equal_opt(e, val)) { - case Qundef: - if (!rb_equal(e, val)) break; - case Qtrue: + for (i=0; i<RARRAY_LEN(ary); i++) { + VALUE e = RARRAY_AREF(ary, i); + if (rb_equal(e, val)) { return LONG2NUM(i); - case Qfalse: - continue; } - len = RARRAY_LEN(ary); - ptr = RARRAY_CONST_PTR(ary); } return Qnil; } @@ -1488,7 +1532,6 @@ rb_ary_index(int argc, VALUE *argv, VALUE ary) static VALUE rb_ary_rindex(int argc, VALUE *argv, VALUE ary) { - const VALUE *ptr; VALUE val; long i = RARRAY_LEN(ary), len; @@ -1507,21 +1550,11 @@ rb_ary_rindex(int argc, VALUE *argv, VALUE ary) val = argv[0]; if (rb_block_given_p()) rb_warn("given block not used"); - ptr = RARRAY_CONST_PTR(ary); while (i--) { - VALUE e = ptr[i]; - switch (rb_equal_opt(e, val)) { - case Qundef: - if (!rb_equal(e, val)) break; - case Qtrue: + VALUE e = RARRAY_AREF(ary, i); + if (rb_equal(e, val)) { return LONG2NUM(i); - case Qfalse: - continue; } - if (i > (len = RARRAY_LEN(ary))) { - i = len; - } - ptr = RARRAY_CONST_PTR(ary); } return Qnil; } @@ -1536,10 +1569,10 @@ rb_ary_to_ary(VALUE obj) } static void -rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) +rb_ary_splice(VALUE ary, long beg, long len, const VALUE *rptr, long rlen) { - long rlen; long olen; + long rofs; if (len < 0) rb_raise(rb_eIndexError, "negative length (%ld)", len); olen = RARRAY_LEN(ary); @@ -1554,23 +1587,22 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) len = olen - beg; } - if (rpl == Qundef) { - rlen = 0; - } - else { - rpl = rb_ary_to_ary(rpl); - rlen = RARRAY_LEN(rpl); - olen = RARRAY_LEN(ary); /* ary may be resized in rpl.to_ary too */ + { + const VALUE *optr = RARRAY_CONST_PTR(ary); + rofs = (rptr >= optr && rptr < optr + olen) ? rptr - optr : -1; } + if (beg >= olen) { + VALUE target_ary; if (beg > ARY_MAX_SIZE - rlen) { rb_raise(rb_eIndexError, "index %ld too big", beg); } - ary_ensure_room_for_push(ary, rlen-len); /* len is 0 or negative */ + target_ary = ary_ensure_room_for_push(ary, rlen-len); /* len is 0 or negative */ len = beg + rlen; ary_mem_clear(ary, olen, beg - olen); if (rlen > 0) { - ary_memcpy(ary, beg, rlen, RARRAY_CONST_PTR(rpl)); + if (rofs != -1) rptr = RARRAY_CONST_PTR(ary) + rofs; + ary_memcpy0(ary, beg, rlen, rptr, target_ary); } ARY_SET_LEN(ary, len); } @@ -1593,10 +1625,10 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) ARY_SET_LEN(ary, alen); } if (rlen > 0) { - MEMMOVE(RARRAY_PTR(ary) + beg, RARRAY_CONST_PTR(rpl), VALUE, rlen); + if (rofs != -1) rptr = RARRAY_CONST_PTR(ary) + rofs; + MEMMOVE(RARRAY_PTR(ary) + beg, rptr, VALUE, rlen); } } - RB_GC_GUARD(rpl); } void @@ -1699,13 +1731,13 @@ static VALUE rb_ary_aset(int argc, VALUE *argv, VALUE ary) { long offset, beg, len; + VALUE rpl; if (argc == 3) { rb_ary_modify_check(ary); beg = NUM2LONG(argv[0]); len = NUM2LONG(argv[1]); - rb_ary_splice(ary, beg, len, argv[2]); - return argv[2]; + goto range; } rb_check_arity(argc, 2, 2); rb_ary_modify_check(ary); @@ -1715,8 +1747,11 @@ rb_ary_aset(int argc, VALUE *argv, VALUE ary) } if (rb_range_beg_len(argv[0], &beg, &len, RARRAY_LEN(ary), 1)) { /* check if idx is Range */ - rb_ary_splice(ary, beg, len, argv[1]); - return argv[1]; + range: + rpl = rb_ary_to_ary(argv[argc-1]); + rb_ary_splice(ary, beg, len, RARRAY_CONST_PTR(rpl), RARRAY_LEN(rpl)); + RB_GC_GUARD(rpl); + return argv[argc-1]; } offset = NUM2LONG(argv[0]); @@ -1732,7 +1767,9 @@ fixnum: * Inserts the given values before the element with the given +index+. * * Negative indices count backwards from the end of the array, where +-1+ is - * the last element. + * the last element. If a negative index is used, the given values will be + * inserted after that element, so using an index of +-1+ will insert the + * values at the end of the array. * * a = %w{ a b c d } * a.insert(2, 99) #=> ["a", "b", 99, "c", "d"] @@ -1746,15 +1783,20 @@ rb_ary_insert(int argc, VALUE *argv, VALUE ary) rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS); rb_ary_modify_check(ary); - if (argc == 1) return ary; pos = NUM2LONG(argv[0]); + if (argc == 1) return ary; if (pos == -1) { pos = RARRAY_LEN(ary); } - if (pos < 0) { + else if (pos < 0) { + long minpos = -RARRAY_LEN(ary) - 1; + if (pos < minpos) { + rb_raise(rb_eIndexError, "index %ld too small for array; minimum: %ld", + pos, minpos); + } pos++; } - rb_ary_splice(ary, pos, 0, rb_ary_new4(argc - 1, argv + 1)); + rb_ary_splice(ary, pos, 0, argv + 1, argc - 1); return ary; } @@ -1773,9 +1815,9 @@ ary_enum_length(VALUE ary, VALUE args, VALUE eobj) * ary.each -> Enumerator * * Calls the given block once for each element in +self+, passing that element - * as a parameter. + * as a parameter. Returns the array itself. * - * An Enumerator is returned if no block is given. + * If no block is given, an Enumerator is returned. * * a = [ "a", "b", "c" ] * a.each {|x| print x, " -- " } @@ -1786,10 +1828,9 @@ ary_enum_length(VALUE ary, VALUE args, VALUE eobj) */ VALUE -rb_ary_each(VALUE array) +rb_ary_each(VALUE ary) { long i; - volatile VALUE ary = array; RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); for (i=0; i<RARRAY_LEN(ary); i++) { @@ -1962,7 +2003,10 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first) if (RB_TYPE_P(val, T_STRING)) { str_join: rb_str_buf_append(result, val); - *first = FALSE; + if (*first) { + rb_enc_copy(result, val); + *first = FALSE; + } } else if (RB_TYPE_P(val, T_ARRAY)) { obj = val; @@ -1973,6 +2017,7 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first) else { VALUE args[4]; + *first = FALSE; args[0] = val; args[1] = sep; args[2] = result; @@ -1986,17 +2031,13 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first) val = tmp; goto str_join; } - tmp = rb_check_convert_type(val, T_ARRAY, "Array", "to_ary"); + tmp = rb_check_array_type(val); if (!NIL_P(tmp)) { obj = val; val = tmp; goto ary_join; } val = rb_obj_as_string(val); - if (*first) { - rb_enc_copy(result, val); - *first = FALSE; - } goto str_join; } } @@ -2047,11 +2088,16 @@ rb_ary_join(VALUE ary, VALUE sep) * * Returns a string created by converting each element of the array to * a string, separated by the given +separator+. - * If the +separator+ is +nil+, it uses current $,. - * If both the +separator+ and $, are nil, it uses empty string. + * If the +separator+ is +nil+, it uses current <code>$,</code>. + * If both the +separator+ and <code>$,</code> are +nil+, + * it uses an empty string. * * [ "a", "b", "c" ].join #=> "abc" * [ "a", "b", "c" ].join("-") #=> "a-b-c" + * + * For nested arrays, join is applied recursively: + * + * [ "a", [1, 2, [:x, :y]], "b" ].join("-") #=> "a-1-2-x-y-b" */ static VALUE @@ -2144,12 +2190,13 @@ static VALUE rb_ary_to_h(VALUE ary) { long i; - VALUE hash = rb_hash_new(); + VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary)); for (i=0; i<RARRAY_LEN(ary); i++) { - VALUE key_value_pair = rb_check_array_type(rb_ary_elt(ary, i)); + const VALUE elt = rb_ary_elt(ary, i); + const VALUE key_value_pair = rb_check_array_type(elt); if (NIL_P(key_value_pair)) { - rb_raise(rb_eTypeError, "wrong element type %s at %ld (expected array)", - rb_builtin_class_name(rb_ary_elt(ary, i)), i); + rb_raise(rb_eTypeError, "wrong element type %"PRIsVALUE" at %ld (expected array)", + rb_obj_class(elt), i); } if (RARRAY_LEN(key_value_pair) != 2) { rb_raise(rb_eArgError, "wrong array length at %ld (expected 2, was %ld)", @@ -2344,26 +2391,9 @@ rb_ary_rotate_m(int argc, VALUE *argv, VALUE ary) struct ary_sort_data { VALUE ary; - int opt_methods; - int opt_inited; + struct cmp_opt_data cmp_opt; }; -enum { - sort_opt_Fixnum, - sort_opt_String, - sort_optimizable_count -}; - -#define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString) - -#define SORT_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(sort_opt_,type)) -#define SORT_OPTIMIZABLE(data, type) \ - (((data)->opt_inited & SORT_OPTIMIZABLE_BIT(type)) ? \ - ((data)->opt_methods & SORT_OPTIMIZABLE_BIT(type)) : \ - (((data)->opt_inited |= SORT_OPTIMIZABLE_BIT(type)), \ - rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \ - ((data)->opt_methods |= SORT_OPTIMIZABLE_BIT(type)))) - static VALUE sort_reentered(VALUE ary) { @@ -2379,9 +2409,12 @@ sort_1(const void *ap, const void *bp, void *dummy) struct ary_sort_data *data = dummy; VALUE retval = sort_reentered(data->ary); VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp; + VALUE args[2]; int n; - retval = rb_yield_values(2, a, b); + args[0] = a; + args[1] = b; + retval = rb_yield_values2(2, args); n = rb_cmpint(retval, a, b); sort_reentered(data->ary); return n; @@ -2395,14 +2428,17 @@ sort_2(const void *ap, const void *bp, void *dummy) VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp; int n; - if (FIXNUM_P(a) && FIXNUM_P(b) && SORT_OPTIMIZABLE(data, Fixnum)) { + if (FIXNUM_P(a) && FIXNUM_P(b) && CMP_OPTIMIZABLE(data->cmp_opt, Fixnum)) { if ((long)a > (long)b) return 1; if ((long)a < (long)b) return -1; return 0; } - if (STRING_P(a) && STRING_P(b) && SORT_OPTIMIZABLE(data, String)) { + if (STRING_P(a) && STRING_P(b) && CMP_OPTIMIZABLE(data->cmp_opt, String)) { return rb_str_cmp(a, b); } + if (RB_FLOAT_TYPE_P(a) && CMP_OPTIMIZABLE(data->cmp_opt, Float)) { + return rb_float_cmp(a, b); + } retval = rb_funcallv(a, id_cmp, 1, &b); n = rb_cmpint(retval, a, b); @@ -2421,15 +2457,18 @@ sort_2(const void *ap, const void *bp, void *dummy) * Comparisons for the sort will be done using the <code><=></code> operator * or using an optional code block. * - * The block must implement a comparison between +a+ and +b+, and return - * +-1+, when +a+ follows +b+, +0+ when +a+ and +b+ are equivalent, or ++1+ - * if +b+ follows +a+. + * The block must implement a comparison between +a+ and +b+ and return + * an integer less than 0 when +b+ follows +a+, +0+ when +a+ and +b+ + * are equivalent, or an integer greater than 0 when +a+ follows +b+. * - * See also Enumerable#sort_by. + * The result is not guaranteed to be stable. When the comparison of two + * elements returns +0+, the order of the elements is unpredictable. * - * a = [ "d", "a", "e", "c", "b" ] - * a.sort! #=> ["a", "b", "c", "d", "e"] - * a.sort! { |x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + * ary = [ "d", "a", "e", "c", "b" ] + * ary.sort! #=> ["a", "b", "c", "d", "e"] + * ary.sort! { |a, b| b <=> a } #=> ["e", "d", "c", "b", "a"] + * + * See also Enumerable#sort_by. */ VALUE @@ -2444,8 +2483,8 @@ rb_ary_sort_bang(VALUE ary) RBASIC_CLEAR_CLASS(tmp); data.ary = tmp; - data.opt_methods = 0; - data.opt_inited = 0; + data.cmp_opt.opt_methods = 0; + data.cmp_opt.opt_inited = 0; RARRAY_PTR_USE(tmp, ptr, { ruby_qsort(ptr, len, sizeof(VALUE), rb_block_given_p()?sort_1:sort_2, &data); @@ -2454,8 +2493,8 @@ rb_ary_sort_bang(VALUE ary) if (ARY_EMBED_P(tmp)) { if (ARY_SHARED_P(ary)) { /* ary might be destructively operated in the given block */ rb_ary_unshare(ary); + FL_SET_EMBED(ary); } - FL_SET_EMBED(ary); ary_memcpy(ary, 0, ARY_EMBED_LEN(tmp), ARY_EMBED_PTR(tmp)); ARY_SET_LEN(ary, ARY_EMBED_LEN(tmp)); } @@ -2502,16 +2541,18 @@ rb_ary_sort_bang(VALUE ary) * Comparisons for the sort will be done using the <code><=></code> operator * or using an optional code block. * - * The block must implement a comparison between +a+ and +b+, and return - * +-1+, when +a+ follows +b+, +0+ when +a+ and +b+ are equivalent, or ++1+ - * if +b+ follows +a+. + * The block must implement a comparison between +a+ and +b+ and return + * an integer less than 0 when +b+ follows +a+, +0+ when +a+ and +b+ + * are equivalent, or an integer greater than 0 when +a+ follows +b+. * + * The result is not guaranteed to be stable. When the comparison of two + * elements returns +0+, the order of the elements is unpredictable. * - * See also Enumerable#sort_by. + * ary = [ "d", "a", "e", "c", "b" ] + * ary.sort #=> ["a", "b", "c", "d", "e"] + * ary.sort { |a, b| b <=> a } #=> ["e", "d", "c", "b", "a"] * - * a = [ "d", "a", "e", "c", "b" ] - * a.sort #=> ["a", "b", "c", "d", "e"] - * a.sort { |x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + * See also Enumerable#sort_by. */ VALUE @@ -2522,6 +2563,8 @@ rb_ary_sort(VALUE ary) return ary; } +static VALUE rb_ary_bsearch_index(VALUE ary); + /* * call-seq: * ary.bsearch {|x| block } -> elem @@ -2529,12 +2572,12 @@ rb_ary_sort(VALUE ary) * By using binary search, finds a value from this array which meets * the given condition in O(log n) where n is the size of the array. * - * You can use this method in two use cases: a find-minimum mode and + * You can use this method in two modes: a find-minimum mode and * a find-any mode. In either case, the elements of the array must be * monotone (or sorted) with respect to the block. * - * In find-minimum mode (this is a good choice for typical use case), - * the block must return true or false, and there must be an index i + * In find-minimum mode (this is a good choice for typical use cases), + * the block must always return true or false, and there must be an index i * (0 <= i <= ary.size) so that: * * - the block returns false for any element whose index is less than @@ -2552,7 +2595,7 @@ rb_ary_sort(VALUE ary) * ary.bsearch {|x| x >= 100 } #=> nil * * In find-any mode (this behaves like libc's bsearch(3)), the block - * must return a number, and there must be two indices i and j + * must always return a number, and there must be two indices i and j * (0 <= i <= j <= ary.size) so that: * * - the block returns a positive number for ary[k] if 0 <= k < i, @@ -2578,6 +2621,30 @@ rb_ary_sort(VALUE ary) static VALUE rb_ary_bsearch(VALUE ary) { + VALUE index_result = rb_ary_bsearch_index(ary); + + if (FIXNUM_P(index_result)) { + return rb_ary_entry(ary, FIX2LONG(index_result)); + } + return index_result; +} + +/* + * call-seq: + * ary.bsearch_index {|x| block } -> int or nil + * + * By using binary search, finds an index of a value from this array which + * meets the given condition in O(log n) where n is the size of the array. + * + * It supports two modes, depending on the nature of the block. They are + * exactly the same as in the case of the #bsearch method, with the only difference + * being that this method returns the index of the element instead of the + * element itself. For more details consult the documentation for #bsearch. + */ + +static VALUE +rb_ary_bsearch_index(VALUE ary) +{ long low = 0, high = RARRAY_LEN(ary), mid; int smaller = 0, satisfied = 0; VALUE v, val; @@ -2588,8 +2655,8 @@ rb_ary_bsearch(VALUE ary) val = rb_ary_entry(ary, mid); v = rb_yield(val); if (FIXNUM_P(v)) { - if (FIX2INT(v) == 0) return val; - smaller = FIX2INT(v) < 0; + if (v == INT2FIX(0)) return INT2FIX(mid); + smaller = (SIGNED_VALUE)v < 0; /* Fixnum preserves its sign-bit */ } else if (v == Qtrue) { satisfied = 1; @@ -2600,16 +2667,16 @@ rb_ary_bsearch(VALUE ary) } else if (rb_obj_is_kind_of(v, rb_cNumeric)) { const VALUE zero = INT2FIX(0); - switch (rb_cmpint(rb_funcallv(v, id_cmp, 1, &zero), v, INT2FIX(0))) { - case 0: return val; - case 1: smaller = 1; break; - case -1: smaller = 0; + switch (rb_cmpint(rb_funcallv(v, id_cmp, 1, &zero), v, zero)) { + case 0: return INT2FIX(mid); + case 1: smaller = 1; break; + case -1: smaller = 0; } } else { - rb_raise(rb_eTypeError, "wrong argument type %s" - " (must be numeric, true, false or nil)", - rb_obj_classname(v)); + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE + " (must be numeric, true, false or nil)", + rb_obj_class(v)); } if (smaller) { high = mid; @@ -2618,9 +2685,8 @@ rb_ary_bsearch(VALUE ary) low = mid + 1; } } - if (low == RARRAY_LEN(ary)) return Qnil; if (!satisfied) return Qnil; - return rb_ary_entry(ary, low); + return INT2FIX(low); } @@ -2638,8 +2704,12 @@ sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, dummy)) * Sorts +self+ in place using a set of keys generated by mapping the * values in +self+ through the given block. * + * The result is not guaranteed to be stable. When two keys are equal, + * the order of the corresponding elements is unpredictable. + * * If no block is given, an Enumerator is returned instead. * + * See also Enumerable#sort_by. */ static VALUE @@ -2671,9 +2741,9 @@ rb_ary_sort_by_bang(VALUE ary) * If no block is given, an Enumerator is returned instead. * * a = [ "a", "b", "c", "d" ] - * a.collect { |x| x + "!" } #=> ["a!", "b!", "c!", "d!"] - * a.map.with_index{ |x, i| x * i } #=> ["", "b", "cc", "ddd"] - * a #=> ["a", "b", "c", "d"] + * a.collect { |x| x + "!" } #=> ["a!", "b!", "c!", "d!"] + * a.map.with_index { |x, i| x * i } #=> ["", "b", "cc", "ddd"] + * a #=> ["a", "b", "c", "d"] */ static VALUE @@ -2685,7 +2755,7 @@ rb_ary_collect(VALUE ary) RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); collect = rb_ary_new2(RARRAY_LEN(ary)); for (i = 0; i < RARRAY_LEN(ary); i++) { - rb_ary_push(collect, rb_yield(RARRAY_AREF(ary, i))); + rb_ary_push(collect, rb_yield_force_blockarg(RARRAY_AREF(ary, i))); } return collect; } @@ -2726,7 +2796,7 @@ rb_ary_collect_bang(VALUE ary) } VALUE -rb_get_values_at(VALUE obj, long olen, int argc, VALUE *argv, VALUE (*func) (VALUE, long)) +rb_get_values_at(VALUE obj, long olen, int argc, const VALUE *argv, VALUE (*func) (VALUE, long)) { VALUE result = rb_ary_new2(argc); long beg, len, i, j; @@ -2810,6 +2880,50 @@ rb_ary_select(VALUE ary) return result; } +struct select_bang_arg { + VALUE ary; + long len[2]; +}; + +static VALUE +select_bang_i(VALUE a) +{ + volatile struct select_bang_arg *arg = (void *)a; + VALUE ary = arg->ary; + long i1, i2; + + for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); arg->len[0] = ++i1) { + VALUE v = RARRAY_AREF(ary, i1); + if (!RTEST(rb_yield(v))) continue; + if (i1 != i2) { + rb_ary_store(ary, i2, v); + } + arg->len[1] = ++i2; + } + return (i1 == i2) ? Qnil : ary; +} + +static VALUE +select_bang_ensure(VALUE a) +{ + volatile struct select_bang_arg *arg = (void *)a; + VALUE ary = arg->ary; + long len = RARRAY_LEN(ary); + long i1 = arg->len[0], i2 = arg->len[1]; + + if (i2 < len && i2 < i1) { + long tail = 0; + if (i1 < len) { + tail = len - i1; + RARRAY_PTR_USE(ary, ptr, { + MEMMOVE(ptr + i2, ptr + i1, VALUE, tail); + }); + } + ARY_SET_LEN(ary, i2 + tail); + } + return ary; +} + /* * call-seq: * ary.select! {|item| block } -> ary or nil @@ -2818,6 +2932,8 @@ rb_ary_select(VALUE ary) * Invokes the given block passing in successive elements from +self+, * deleting elements for which the block returns a +false+ value. * + * The array may not be changed instantly every time the block is called. + * * If changes were made, it will return +self+, otherwise it returns +nil+. * * See also Array#keep_if @@ -2829,23 +2945,14 @@ rb_ary_select(VALUE ary) static VALUE rb_ary_select_bang(VALUE ary) { - long i1, i2; + struct select_bang_arg args; RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); rb_ary_modify(ary); - for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); i1++) { - VALUE v = RARRAY_AREF(ary, i1); - if (!RTEST(rb_yield(v))) continue; - if (i1 != i2) { - rb_ary_store(ary, i2, v); - } - i2++; - } - if (i1 == i2) return Qnil; - if (i2 < i1) - ARY_SET_LEN(ary, i2); - return ary; + args.ary = ary; + args.len[0] = args.len[1] = 0; + return rb_ensure(select_bang_i, (VALUE)&args, select_bang_ensure, (VALUE)&args); } /* @@ -3046,7 +3153,7 @@ rb_ary_slice_bang(int argc, VALUE *argv, VALUE ary) if (len == 0) return rb_ary_new2(0); arg2 = rb_ary_new4(len, RARRAY_CONST_PTR(ary)+pos); RBASIC_SET_CLASS(arg2, rb_obj_class(ary)); - rb_ary_splice(ary, pos, len, Qundef); + rb_ary_splice(ary, pos, len, 0, 0); return arg2; } @@ -3088,23 +3195,32 @@ ary_reject(VALUE orig, VALUE result) } static VALUE -ary_reject_bang(VALUE ary) +reject_bang_i(VALUE a) { - long i; - VALUE result = Qnil; + volatile struct select_bang_arg *arg = (void *)a; + VALUE ary = arg->ary; + long i1, i2; - rb_ary_modify_check(ary); - for (i = 0; i < RARRAY_LEN(ary); ) { - VALUE v = RARRAY_AREF(ary, i); - if (RTEST(rb_yield(v))) { - rb_ary_delete_at(ary, i); - result = ary; - } - else { - i++; + for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); arg->len[0] = ++i1) { + VALUE v = RARRAY_AREF(ary, i1); + if (RTEST(rb_yield(v))) continue; + if (i1 != i2) { + rb_ary_store(ary, i2, v); } + arg->len[1] = ++i2; } - return result; + return (i1 == i2) ? Qnil : ary; +} + +static VALUE +ary_reject_bang(VALUE ary) +{ + struct select_bang_arg args; + + rb_ary_modify_check(ary); + args.ary = ary; + args.len[0] = args.len[1] = 0; + return rb_ensure(reject_bang_i, (VALUE)&args, select_bang_ensure, (VALUE)&args); } /* @@ -3112,11 +3228,10 @@ ary_reject_bang(VALUE ary) * ary.reject! { |item| block } -> ary or nil * ary.reject! -> Enumerator * - * Equivalent to Array#delete_if, deleting elements from +self+ for which the - * block evaluates to +true+, but returns +nil+ if no changes were made. + * Deletes every element of +self+ for which the block evaluates to +true+, + * if no changes were made returns +nil+. * - * The array is changed instantly every time the block is called, not after - * the iteration is over. + * The array may not be changed instantly every time the block is called. * * See also Enumerable#reject and Array#delete_if. * @@ -3127,6 +3242,7 @@ static VALUE rb_ary_reject_bang(VALUE ary) { RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); + rb_ary_modify(ary); return ary_reject_bang(ary); } @@ -3136,7 +3252,7 @@ rb_ary_reject_bang(VALUE ary) * ary.reject -> Enumerator * * Returns a new array containing the items in +self+ for which the given - * block is not +true+. + * block is not +true+. The ordering of non-rejected elements is maintained. * * See also Array#delete_if * @@ -3244,8 +3360,10 @@ rb_ary_zip(int argc, VALUE *argv, VALUE ary) if (rb_block_given_p()) { int arity = rb_block_arity(); - if (arity > 1 && argc+1 < 0x100) { - VALUE *tmp = ALLOCA_N(VALUE, argc+1); + if (arity > 1) { + VALUE work, *tmp; + + tmp = ALLOCV_N(VALUE, work, argc+1); for (i=0; i<RARRAY_LEN(ary); i++) { tmp[0] = RARRAY_AREF(ary, i); @@ -3254,6 +3372,8 @@ rb_ary_zip(int argc, VALUE *argv, VALUE ary) } rb_yield_values2(argc+1, tmp); } + + if (work) ALLOCV_END(work); } else { for (i=0; i<RARRAY_LEN(ary); i++) { @@ -3438,12 +3558,10 @@ rb_ary_clear(VALUE ary) static VALUE rb_ary_fill(int argc, VALUE *argv, VALUE ary) { - VALUE item, arg1, arg2; + VALUE item = Qundef, arg1, arg2; long beg = 0, end = 0, len = 0; - int block_p = FALSE; if (rb_block_given_p()) { - block_p = TRUE; rb_scan_args(argc, argv, "02", &arg1, &arg2); argc += 1; /* hackish */ } @@ -3485,14 +3603,14 @@ rb_ary_fill(int argc, VALUE *argv, VALUE ary) ARY_SET_LEN(ary, end); } - if (block_p) { + if (item == Qundef) { VALUE v; long i; for (i=beg; i<end; i++) { v = rb_yield(LONG2NUM(i)); if (i>=RARRAY_LEN(ary)) break; - RARRAY_ASET(ary, i, v); + ARY_SET(ary, i, v); } } else { @@ -3514,6 +3632,13 @@ rb_ary_fill(int argc, VALUE *argv, VALUE ary) * c #=> [ "a", "b", "c", "d", "e", "f" ] * a #=> [ "a", "b", "c" ] * + * Note that + * x += y + * is the same as + * x = x + y + * This means that it produces a new array. As a consequence, + * repeated use of <code>+=</code> on arrays can be quite inefficient. + * * See also Array#concat. */ @@ -3535,31 +3660,61 @@ rb_ary_plus(VALUE x, VALUE y) return z; } +static VALUE +ary_append(VALUE x, VALUE y) +{ + long n = RARRAY_LEN(y); + if (n > 0) { + rb_ary_splice(x, RARRAY_LEN(x), 0, RARRAY_CONST_PTR(y), n); + } + return x; +} + /* * call-seq: - * ary.concat(other_ary) -> ary + * ary.concat(other_ary1, other_ary2,...) -> ary * - * Appends the elements of +other_ary+ to +self+. + * Appends the elements of +other_ary+s to +self+. * * [ "a", "b" ].concat( ["c", "d"] ) #=> [ "a", "b", "c", "d" ] + * [ "a" ].concat( ["b"], ["c", "d"] ) #=> [ "a", "b", "c", "d" ] + * [ "a" ].concat #=> [ "a" ] + * * a = [ 1, 2, 3 ] * a.concat( [ 4, 5 ] ) * a #=> [ 1, 2, 3, 4, 5 ] * + * a = [ 1, 2 ] + * a.concat(a, a) #=> [1, 2, 1, 2, 1, 2] + * * See also Array#+. */ -VALUE -rb_ary_concat(VALUE x, VALUE y) +static VALUE +rb_ary_concat_multi(int argc, VALUE *argv, VALUE ary) { - rb_ary_modify_check(x); - y = to_ary(y); - if (RARRAY_LEN(y) > 0) { - rb_ary_splice(x, RARRAY_LEN(x), 0, y); + rb_ary_modify_check(ary); + + if (argc == 1) { + rb_ary_concat(ary, argv[0]); } - return x; + else if (argc > 1) { + int i; + VALUE args = rb_ary_tmp_new(argc); + for (i = 0; i < argc; i++) { + rb_ary_concat(args, argv[i]); + } + ary_append(ary, args); + } + + return ary; } +VALUE +rb_ary_concat(VALUE x, VALUE y) +{ + return ary_append(x, to_ary(y)); +} /* * call-seq: @@ -3626,7 +3781,7 @@ rb_ary_times(VALUE ary, VALUE times) /* * call-seq: - * ary.assoc(obj) -> new_ary or nil + * ary.assoc(obj) -> element_ary or nil * * Searches through an array whose elements are also arrays comparing +obj+ * with the first element of each contained array using <code>obj.==</code>. @@ -3661,7 +3816,7 @@ rb_ary_assoc(VALUE ary, VALUE key) /* * call-seq: - * ary.rassoc(obj) -> new_ary or nil + * ary.rassoc(obj) -> element_ary or nil * * Searches through the array whose elements are also arrays. * @@ -3745,7 +3900,7 @@ rb_ary_equal(VALUE ary1, VALUE ary2) { if (ary1 == ary2) return Qtrue; if (!RB_TYPE_P(ary2, T_ARRAY)) { - if (!rb_respond_to(ary2, rb_intern("to_ary"))) { + if (!rb_respond_to(ary2, idTo_ary)) { return Qfalse; } return rb_equal(ary2, ary1); @@ -3788,12 +3943,14 @@ rb_ary_eql(VALUE ary1, VALUE ary2) /* * call-seq: - * ary.hash -> fixnum + * ary.hash -> integer * * Compute a hash-code for this array. * * Two arrays with the same content will have the same hash code (and will * compare using #eql?). + * + * See also Object#hash. */ static VALUE @@ -3810,7 +3967,7 @@ rb_ary_hash(VALUE ary) h = rb_hash_uint(h, NUM2LONG(n)); } h = rb_hash_end(h); - return LONG2FIX(h); + return ST2FIX(h); } /* @@ -3829,15 +3986,31 @@ VALUE rb_ary_includes(VALUE ary, VALUE item) { long i; + VALUE e; for (i=0; i<RARRAY_LEN(ary); i++) { - if (rb_equal(RARRAY_AREF(ary, i), item)) { + e = RARRAY_AREF(ary, i); + if (rb_equal(e, item)) { return Qtrue; } } return Qfalse; } +static VALUE +rb_ary_includes_by_eql(VALUE ary, VALUE item) +{ + long i; + VALUE e; + + for (i=0; i<RARRAY_LEN(ary); i++) { + e = RARRAY_AREF(ary, i); + if (rb_eql(item, e)) { + return Qtrue; + } + } + return Qfalse; +} static VALUE recursive_cmp(VALUE ary1, VALUE ary2, int recur) @@ -3866,21 +4039,26 @@ recursive_cmp(VALUE ary1, VALUE ary2, int recur) * Comparison --- Returns an integer (+-1+, +0+, or <code>+1</code>) if this * array is less than, equal to, or greater than +other_ary+. * - * +nil+ is returned if the two values are incomparable. - * * Each object in each array is compared (using the <=> operator). * - * Arrays are compared in an "element-wise" manner; the first two elements - * that are not equal will determine the return value for the whole - * comparison. + * Arrays are compared in an "element-wise" manner; the first element of +ary+ + * is compared with the first one of +other_ary+ using the <=> operator, then + * each of the second elements, etc... + * As soon as the result of any such comparison is non zero (i.e. the two + * corresponding elements are not equal), that result is returned for the + * whole array comparison. * - * If all the values are equal, then the return is based on a comparison of + * If all the elements are equal, then the result is based on a comparison of * the array lengths. Thus, two arrays are "equal" according to Array#<=> if, * and only if, they have the same length and the value of each element is * equal to the value of the corresponding element in the other array. * + * +nil+ is returned if the +other_ary+ is not an array or if the comparison + * of two elements returned +nil+. + * * [ "a", "a", "c" ] <=> [ "a", "b", "c" ] #=> -1 * [ 1, 2, 3, 4, 5, 6 ] <=> [ 1, 2 ] #=> +1 + * [ 1, 2 ] <=> [ 1, :two ] #=> nil * */ @@ -3908,17 +4086,16 @@ ary_add_hash(VALUE hash, VALUE ary) for (i=0; i<RARRAY_LEN(ary); i++) { VALUE elt = RARRAY_AREF(ary, i); - if (rb_hash_lookup2(hash, elt, Qundef) == Qundef) { - rb_hash_aset(hash, elt, elt); - } + rb_hash_add_new_element(hash, elt, elt); } return hash; } static inline VALUE -ary_tmp_hash_new(void) +ary_tmp_hash_new(VALUE ary) { - VALUE hash = rb_hash_new(); + long size = RARRAY_LEN(ary); + VALUE hash = rb_hash_new_with_size(size); RBASIC_CLEAR_CLASS(hash); return hash; @@ -3927,7 +4104,7 @@ ary_tmp_hash_new(void) static VALUE ary_make_hash(VALUE ary) { - VALUE hash = ary_tmp_hash_new(); + VALUE hash = ary_tmp_hash_new(ary); return ary_add_hash(hash, ary); } @@ -3938,9 +4115,7 @@ ary_add_hash_by(VALUE hash, VALUE ary) for (i = 0; i < RARRAY_LEN(ary); ++i) { VALUE v = rb_ary_elt(ary, i), k = rb_yield(v); - if (rb_hash_lookup2(hash, k, Qundef) == Qundef) { - rb_hash_aset(hash, k, v); - } + rb_hash_add_new_element(hash, k, v); } return hash; } @@ -3948,19 +4123,19 @@ ary_add_hash_by(VALUE hash, VALUE ary) static VALUE ary_make_hash_by(VALUE ary) { - VALUE hash = ary_tmp_hash_new(); + VALUE hash = ary_tmp_hash_new(ary); return ary_add_hash_by(hash, ary); } static inline void ary_recycle_hash(VALUE hash) { + assert(RBASIC_CLASS(hash) == 0); if (RHASH(hash)->ntbl) { st_table *tbl = RHASH(hash)->ntbl; - RHASH(hash)->ntbl = 0; st_free_table(tbl); } - RB_GC_GUARD(hash); + rb_gc_force_recycle(hash); } /* @@ -3987,9 +4162,19 @@ rb_ary_diff(VALUE ary1, VALUE ary2) VALUE hash; long i; - hash = ary_make_hash(to_ary(ary2)); + ary2 = to_ary(ary2); ary3 = rb_ary_new(); + if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN || RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + for (i=0; i<RARRAY_LEN(ary1); i++) { + VALUE elt = rb_ary_elt(ary1, i); + if (rb_ary_includes_by_eql(ary2, elt)) continue; + rb_ary_push(ary3, elt); + } + return ary3; + } + + hash = ary_make_hash(ary2); for (i=0; i<RARRAY_LEN(ary1); i++) { if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue; rb_ary_push(ary3, rb_ary_elt(ary1, i)); @@ -4002,13 +4187,12 @@ rb_ary_diff(VALUE ary1, VALUE ary2) * call-seq: * ary & other_ary -> new_ary * - * Set Intersection --- Returns a new array containing elements common to the - * two arrays, excluding any duplicates. The order is preserved from the - * original array. + * Set Intersection --- Returns a new array containing unique elements common to the + * two arrays. The order is preserved from the original array. * * It compares elements using their #hash and #eql? methods for efficiency. * - * [ 1, 1, 3, 5 ] & [ 1, 2, 3 ] #=> [ 1, 3 ] + * [ 1, 1, 3, 5 ] & [ 3, 2, 1 ] #=> [ 1, 3 ] * [ 'a', 'b', 'b', 'z' ] & [ 'a', 'b', 'c' ] #=> [ 'a', 'b' ] * * See also Array#uniq. @@ -4026,6 +4210,17 @@ rb_ary_and(VALUE ary1, VALUE ary2) ary2 = to_ary(ary2); ary3 = rb_ary_new(); if (RARRAY_LEN(ary2) == 0) return ary3; + + if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN && RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + for (i=0; i<RARRAY_LEN(ary1); i++) { + v = RARRAY_AREF(ary1, i); + if (!rb_ary_includes_by_eql(ary2, v)) continue; + if (rb_ary_includes_by_eql(ary3, v)) continue; + rb_ary_push(ary3, v); + } + return ary3; + } + hash = ary_make_hash(ary2); table = rb_hash_tbl_raw(hash); @@ -4054,11 +4249,12 @@ ary_hash_orset(st_data_t *key, st_data_t *value, st_data_t arg, int existing) * ary | other_ary -> new_ary * * Set Union --- Returns a new array by joining +ary+ with +other_ary+, - * excluding any duplicates and preserving the order from the original array. + * excluding any duplicates and preserving the order from the given arrays. * * It compares elements using their #hash and #eql? methods for efficiency. * * [ "a", "b", "c" ] | [ "c", "d", "a" ] #=> [ "a", "b", "c", "d" ] + * [ "c", "d", "a" ] | [ "a", "b", "c" ] #=> [ "c", "d", "a", "b" ] * * See also Array#uniq. */ @@ -4070,8 +4266,22 @@ rb_ary_or(VALUE ary1, VALUE ary2) long i; ary2 = to_ary(ary2); - hash = ary_make_hash(ary1); + if (RARRAY_LEN(ary1) + RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + ary3 = rb_ary_new(); + for (i=0; i<RARRAY_LEN(ary1); i++) { + VALUE elt = rb_ary_elt(ary1, i); + if (rb_ary_includes_by_eql(ary3, elt)) continue; + rb_ary_push(ary3, elt); + } + for (i=0; i<RARRAY_LEN(ary2); i++) { + VALUE elt = rb_ary_elt(ary2, i); + if (rb_ary_includes_by_eql(ary3, elt)) continue; + rb_ary_push(ary3, elt); + } + return ary3; + } + hash = ary_make_hash(ary1); for (i=0; i<RARRAY_LEN(ary2); i++) { VALUE elt = RARRAY_AREF(ary2, i); if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) { @@ -4083,6 +4293,116 @@ rb_ary_or(VALUE ary1, VALUE ary2) return ary3; } +/* + * call-seq: + * ary.max -> obj + * ary.max { |a, b| block } -> obj + * ary.max(n) -> array + * ary.max(n) { |a, b| block } -> array + * + * Returns the object in _ary_ with the maximum value. The + * first form assumes all objects implement <code>Comparable</code>; + * the second uses the block to return <em>a <=> b</em>. + * + * ary = %w(albatross dog horse) + * ary.max #=> "horse" + * ary.max { |a, b| a.length <=> b.length } #=> "albatross" + * + * If the +n+ argument is given, maximum +n+ elements are returned + * as an array. + * + * ary = %w[albatross dog horse] + * ary.max(2) #=> ["horse", "dog"] + * ary.max(2) {|a, b| a.length <=> b.length } #=> ["albatross", "horse"] + */ +static VALUE +rb_ary_max(int argc, VALUE *argv, VALUE ary) +{ + struct cmp_opt_data cmp_opt = { 0, 0 }; + VALUE result = Qundef, v; + VALUE num; + long i; + + rb_scan_args(argc, argv, "01", &num); + + if (!NIL_P(num)) + return rb_nmin_run(ary, num, 0, 1, 1); + + if (rb_block_given_p()) { + for (i = 0; i < RARRAY_LEN(ary); i++) { + v = RARRAY_AREF(ary, i); + if (result == Qundef || rb_cmpint(rb_yield_values(2, v, result), v, result) > 0) { + result = v; + } + } + } + else { + for (i = 0; i < RARRAY_LEN(ary); i++) { + v = RARRAY_AREF(ary, i); + if (result == Qundef || OPTIMIZED_CMP(v, result, cmp_opt) > 0) { + result = v; + } + } + } + if (result == Qundef) return Qnil; + return result; +} + +/* + * call-seq: + * ary.min -> obj + * ary.min {| a,b | block } -> obj + * ary.min(n) -> array + * ary.min(n) {| a,b | block } -> array + * + * Returns the object in _ary_ with the minimum value. The + * first form assumes all objects implement <code>Comparable</code>; + * the second uses the block to return <em>a <=> b</em>. + * + * ary = %w(albatross dog horse) + * ary.min #=> "albatross" + * ary.min { |a, b| a.length <=> b.length } #=> "dog" + * + * If the +n+ argument is given, minimum +n+ elements are returned + * as an array. + * + * ary = %w[albatross dog horse] + * ary.min(2) #=> ["albatross", "dog"] + * ary.min(2) {|a, b| a.length <=> b.length } #=> ["dog", "horse"] + */ +static VALUE +rb_ary_min(int argc, VALUE *argv, VALUE ary) +{ + struct cmp_opt_data cmp_opt = { 0, 0 }; + VALUE result = Qundef, v; + VALUE num; + long i; + + rb_scan_args(argc, argv, "01", &num); + + if (!NIL_P(num)) + return rb_nmin_run(ary, num, 0, 0, 1); + + if (rb_block_given_p()) { + for (i = 0; i < RARRAY_LEN(ary); i++) { + v = RARRAY_AREF(ary, i); + if (result == Qundef || rb_cmpint(rb_yield_values(2, v, result), v, result) < 0) { + result = v; + } + } + } + else { + for (i = 0; i < RARRAY_LEN(ary); i++) { + v = RARRAY_AREF(ary, i); + if (result == Qundef || OPTIMIZED_CMP(v, result, cmp_opt) < 0) { + result = v; + } + } + } + if (result == Qundef) return Qnil; + return result; +} + static int push_value(st_data_t key, st_data_t val, st_data_t ary) { @@ -4102,6 +4422,8 @@ push_value(st_data_t key, st_data_t val, st_data_t ary) * * It compares values using their #hash and #eql? methods for efficiency. * + * +self+ is traversed in order, and the first occurrence is kept. + * * Returns +nil+ if no changes are made (that is, no duplicates are found). * * a = [ "a", "a", "b", "b", "c" ] @@ -4157,6 +4479,8 @@ rb_ary_uniq_bang(VALUE ary) * * It compares values using their #hash and #eql? methods for efficiency. * + * +self+ is traversed in order, and the first occurrence is kept. + * * a = [ "a", "a", "b", "b", "c" ] * a.uniq # => ["a", "b", "c"] * @@ -4308,11 +4632,15 @@ flatten(VALUE ary, int level, int *modified) while (1) { while (i < RARRAY_LEN(ary)) { elt = RARRAY_AREF(ary, i++); + if (level >= 0 && RARRAY_LEN(stack) / 2 >= level) { + rb_ary_push(result, elt); + continue; + } tmp = rb_check_array_type(elt); if (RBASIC(result)->klass) { rb_raise(rb_eRuntimeError, "flatten reentered"); } - if (NIL_P(tmp) || (level >= 0 && RARRAY_LEN(stack) / 2 >= level)) { + if (NIL_P(tmp)) { rb_ary_push(result, elt); } else { @@ -4341,7 +4669,7 @@ flatten(VALUE ary, int level, int *modified) st_free_table(memo); - RBASIC_SET_CLASS(result, rb_class_of(ary)); + RBASIC_SET_CLASS(result, rb_obj_class(ary)); return result; } @@ -4439,7 +4767,13 @@ static ID id_random; * * Shuffles elements in +self+ in place. * + * a = [ 1, 2, 3 ] #=> [1, 2, 3] + * a.shuffle! #=> [2, 3, 1] + * a #=> [2, 3, 1] + * * The optional +rng+ argument will be used as the random number generator. + * + * a.shuffle!(random: Random.new(1)) #=> [1, 3, 2] */ static VALUE @@ -4486,6 +4820,7 @@ rb_ary_shuffle_bang(int argc, VALUE *argv, VALUE ary) * * a = [ 1, 2, 3 ] #=> [1, 2, 3] * a.shuffle #=> [2, 3, 1] + * a #=> [1, 2, 3] * * The optional +rng+ argument will be used as the random number generator. * @@ -4532,6 +4867,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) VALUE opts, randgen = rb_cRandom; long n, len, i, j, k, idx[10]; long rnds[numberof(idx)]; + long memo_threshold; if (OPTHASH_GIVEN_P(opts)) { VALUE rnd; @@ -4591,6 +4927,11 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) } return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k)); } + memo_threshold = + len < 2560 ? len / 128 : + len < 5120 ? len / 64 : + len < 10240 ? len / 32 : + len / 16; if (n <= numberof(idx)) { long sorted[numberof(idx)]; sorted[0] = idx[0] = rnds[0]; @@ -4610,6 +4951,38 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) } }); } + else if (n <= memo_threshold / 2) { + long max_idx = 0; +#undef RUBY_UNTYPED_DATA_WARNING +#define RUBY_UNTYPED_DATA_WARNING 0 + VALUE vmemo = Data_Wrap_Struct(0, 0, st_free_table, 0); + st_table *memo = st_init_numtable_with_size(n); + DATA_PTR(vmemo) = memo; + result = rb_ary_new_capa(n); + RARRAY_PTR_USE(result, ptr_result, { + for (i=0; i<n; i++) { + long r = RAND_UPTO(len-i) + i; + ptr_result[i] = r; + if (r > max_idx) max_idx = r; + } + len = RARRAY_LEN(ary); + if (len <= max_idx) n = 0; + else if (n > len) n = len; + RARRAY_PTR_USE(ary, ptr_ary, { + for (i=0; i<n; i++) { + long j2 = j = ptr_result[i]; + long i2 = i; + st_data_t value; + if (st_lookup(memo, (st_data_t)i, &value)) i2 = (long)value; + if (st_lookup(memo, (st_data_t)j, &value)) j2 = (long)value; + st_insert(memo, (st_data_t)j, (st_data_t)i2); + ptr_result[i] = ptr_ary[j2]; + } + }); + }); + DATA_PTR(vmemo) = 0; + st_free_table(memo); + } else { result = rb_ary_dup(ary); RBASIC_CLEAR_CLASS(result); @@ -4642,7 +5015,7 @@ rb_ary_cycle_size(VALUE self, VALUE args, VALUE eobj) mul = NUM2LONG(n); if (mul <= 0) return INT2FIX(0); n = LONG2FIX(mul); - return rb_funcallv(rb_ary_length(self), '*', 1, &n); + return rb_fix_mul_fix(rb_ary_length(self), n); } /* @@ -4715,37 +5088,48 @@ yield_indexed_values(const VALUE values, const long r, const long *const p) } /* - * Recursively compute permutations of +r+ elements of the set - * <code>[0..n-1]</code>. + * Compute permutations of +r+ elements of the set <code>[0..n-1]</code>. * - * When we have a complete permutation of array indexes, copy the values - * at those indexes into a new array and yield that array. + * When we have a complete permutation of array indices, copy the values + * at those indices into a new array and yield that array. * * n: the size of the set * r: the number of elements in each permutation * p: the array (of size r) that we're filling in - * index: what index we're filling in now * used: an array of booleans: whether a given index is already used * values: the Ruby array that holds the actual values to permute */ static void -permute0(long n, long r, long *p, long index, char *used, VALUE values) +permute0(const long n, const long r, long *const p, char *const used, const VALUE values) { - long i; - for (i = 0; i < n; i++) { - if (used[i] == 0) { + long i = 0, index = 0; + + for (;;) { + const char *const unused = memchr(&used[i], 0, n-i); + if (!unused) { + if (!index) break; + i = p[--index]; /* pop index */ + used[i++] = 0; /* index unused */ + } + else { + i = unused - used; p[index] = i; + used[i] = 1; /* mark index used */ + ++index; if (index < r-1) { /* if not done yet */ - used[i] = 1; /* mark index used */ - permute0(n, r, p, index+1, /* recurse */ - used, values); - used[i] = 0; /* index unused */ + p[index] = i = 0; + continue; } - else { + for (i = 0; i < n; ++i) { + if (used[i]) continue; + p[index] = i; if (!yield_indexed_values(values, r, p)) { rb_raise(rb_eRuntimeError, "permute reentered"); } } + i = p[--index]; /* pop index */ + used[i] = 0; /* index unused */ + p[index] = ++i; } } } @@ -4757,10 +5141,16 @@ permute0(long n, long r, long *p, long index, char *used, VALUE values) static VALUE descending_factorial(long from, long how_many) { - VALUE cnt = LONG2FIX(how_many >= 0); - while (how_many-- > 0) { - VALUE v = LONG2FIX(from--); - cnt = rb_funcallv(cnt, '*', 1, &v); + VALUE cnt; + if (how_many > 0) { + cnt = LONG2FIX(from); + while (--how_many > 0) { + long v = --from; + cnt = rb_int_mul(cnt, LONG2FIX(v)); + } + } + else { + cnt = LONG2FIX(how_many == 0); } return cnt; } @@ -4768,16 +5158,23 @@ descending_factorial(long from, long how_many) static VALUE binomial_coefficient(long comb, long size) { - VALUE r, v; + VALUE r; + long i; if (comb > size-comb) { comb = size-comb; } if (comb < 0) { return LONG2FIX(0); } - r = descending_factorial(size, comb); - v = descending_factorial(comb, comb); - return rb_funcallv(r, id_div, 1, &v); + else if (comb == 0) { + return LONG2FIX(1); + } + r = LONG2FIX(size); + for (i = 1; i < comb; ++i) { + r = rb_int_mul(r, LONG2FIX(size - i)); + r = rb_int_idiv(r, LONG2FIX(i + 1)); + } + return r; } static VALUE @@ -4840,23 +5237,42 @@ rb_ary_permutation(int argc, VALUE *argv, VALUE ary) } } else { /* this is the general case */ - volatile VALUE t0 = tmpbuf(r,sizeof(long)); - long *p = (long*)RSTRING_PTR(t0); - volatile VALUE t1 = tmpbuf(n,sizeof(char)); - char *used = (char*)RSTRING_PTR(t1); + volatile VALUE t0; + long *p = ALLOCV_N(long, t0, r+roomof(n, sizeof(long))); + char *used = (char*)(p + r); VALUE ary0 = ary_make_shared_copy(ary); /* private defensive copy of ary */ RBASIC_CLEAR_CLASS(ary0); MEMZERO(used, char, n); /* initialize array */ - permute0(n, r, p, 0, used, ary0); /* compute and yield permutations */ - tmpbuf_discard(t0); - tmpbuf_discard(t1); + permute0(n, r, p, used, ary0); /* compute and yield permutations */ + ALLOCV_END(t0); RBASIC_SET_CLASS_RAW(ary0, rb_cArray); } return ary; } +static void +combinate0(const long len, const long n, long *const stack, const VALUE values) +{ + long lev = 0; + + MEMZERO(stack+1, long, n); + stack[0] = -1; + for (;;) { + for (lev++; lev < n; lev++) { + stack[lev+1] = stack[lev]+1; + } + if (!yield_indexed_values(values, n, stack+1)) { + rb_raise(rb_eRuntimeError, "combination reentered"); + } + do { + if (lev == 0) return; + stack[lev--]++; + } while (stack[lev+1]+n == len+lev+1); + } +} + static VALUE rb_ary_combination_size(VALUE ary, VALUE args, VALUE eobj) { @@ -4894,7 +5310,7 @@ rb_ary_combination_size(VALUE ary, VALUE args, VALUE eobj) static VALUE rb_ary_combination(VALUE ary, VALUE num) { - long n, i, len; + long i, n, len; n = NUM2LONG(num); RETURN_SIZED_ENUMERATOR(ary, 1, &num, rb_ary_combination_size); @@ -4906,7 +5322,7 @@ rb_ary_combination(VALUE ary, VALUE num) rb_yield(rb_ary_new2(0)); } else if (n == 1) { - for (i = 0; i < len; i++) { + for (i = 0; i < RARRAY_LEN(ary); i++) { rb_yield(rb_ary_new3(1, RARRAY_AREF(ary, i))); } } @@ -4914,24 +5330,9 @@ rb_ary_combination(VALUE ary, VALUE num) VALUE ary0 = ary_make_shared_copy(ary); /* private defensive copy of ary */ volatile VALUE t0; long *stack = ALLOCV_N(long, t0, n+1); - long lev = 0; RBASIC_CLEAR_CLASS(ary0); - MEMZERO(stack+1, long, n); - stack[0] = -1; - for (;;) { - for (lev++; lev < n; lev++) { - stack[lev+1] = stack[lev]+1; - } - if (!yield_indexed_values(ary0, n, stack+1)) { - rb_raise(rb_eRuntimeError, "combination reentered"); - } - do { - if (lev == 0) goto done; - stack[lev--]++; - } while (stack[lev+1]+n == len+lev+1); - } - done: + combinate0(len, n, stack, ary0); ALLOCV_END(t0); RBASIC_SET_CLASS_RAW(ary0, rb_cArray); } @@ -4939,32 +5340,37 @@ rb_ary_combination(VALUE ary, VALUE num) } /* - * Recursively compute repeated permutations of +r+ elements of the set + * Compute repeated permutations of +r+ elements of the set * <code>[0..n-1]</code>. * - * When we have a complete repeated permutation of array indexes, copy the - * values at those indexes into a new array and yield that array. + * When we have a complete repeated permutation of array indices, copy the + * values at those indices into a new array and yield that array. * * n: the size of the set * r: the number of elements in each permutation * p: the array (of size r) that we're filling in - * index: what index we're filling in now * values: the Ruby array that holds the actual values to permute */ static void -rpermute0(long n, long r, long *p, long index, VALUE values) +rpermute0(const long n, const long r, long *const p, const VALUE values) { - long i; - for (i = 0; i < n; i++) { - p[index] = i; - if (index < r-1) { /* if not done yet */ - rpermute0(n, r, p, index+1, values); /* recurse */ + long i = 0, index = 0; + + p[index] = i; + for (;;) { + if (++index < r-1) { + p[index] = i = 0; + continue; } - else { + for (i = 0; i < n; ++i) { + p[index] = i; if (!yield_indexed_values(values, r, p)) { rb_raise(rb_eRuntimeError, "repeated permute reentered"); } } + do { + if (index <= 0) return; + } while ((i = ++p[--index]) >= n); } } @@ -4973,14 +5379,14 @@ rb_ary_repeated_permutation_size(VALUE ary, VALUE args, VALUE eobj) { long n = RARRAY_LEN(ary); long k = NUM2LONG(RARRAY_AREF(args, 0)); - VALUE v; if (k < 0) { return LONG2FIX(0); } - - v = LONG2NUM(k); - return rb_funcallv(LONG2NUM(n), id_power, 1, &v); + if (n <= 0) { + return LONG2FIX(!k); + } + return rb_int_positive_pow(n, (unsigned long)k); } /* @@ -5027,31 +5433,38 @@ rb_ary_repeated_permutation(VALUE ary, VALUE num) } } else { /* this is the general case */ - volatile VALUE t0 = tmpbuf(r, sizeof(long)); - long *p = (long*)RSTRING_PTR(t0); + volatile VALUE t0; + long *p = ALLOCV_N(long, t0, r); VALUE ary0 = ary_make_shared_copy(ary); /* private defensive copy of ary */ RBASIC_CLEAR_CLASS(ary0); - rpermute0(n, r, p, 0, ary0); /* compute and yield repeated permutations */ - tmpbuf_discard(t0); + rpermute0(n, r, p, ary0); /* compute and yield repeated permutations */ + ALLOCV_END(t0); RBASIC_SET_CLASS_RAW(ary0, rb_cArray); } return ary; } static void -rcombinate0(long n, long r, long *p, long index, long rest, VALUE values) +rcombinate0(const long n, const long r, long *const p, const long rest, const VALUE values) { - if (rest > 0) { - for (; index < n; ++index) { - p[r-rest] = index; - rcombinate0(n, r, p, index, rest-1, values); + long i = 0, index = 0; + + p[index] = i; + for (;;) { + if (++index < r-1) { + p[index] = i; + continue; } - } - else { - if (!yield_indexed_values(values, r, p)) { - rb_raise(rb_eRuntimeError, "repeated combination reentered"); + for (; i < n; ++i) { + p[index] = i; + if (!yield_indexed_values(values, r, p)) { + rb_raise(rb_eRuntimeError, "repeated combination reentered"); + } } + do { + if (index <= 0) return; + } while ((i = ++p[--index]) >= n); } } @@ -5108,7 +5521,7 @@ rb_ary_repeated_combination(VALUE ary, VALUE num) rb_yield(rb_ary_new2(0)); } else if (n == 1) { - for (i = 0; i < len; i++) { + for (i = 0; i < RARRAY_LEN(ary); i++) { rb_yield(rb_ary_new3(1, RARRAY_AREF(ary, i))); } } @@ -5116,13 +5529,13 @@ rb_ary_repeated_combination(VALUE ary, VALUE num) /* yield nothing */ } else { - volatile VALUE t0 = tmpbuf(n, sizeof(long)); - long *p = (long*)RSTRING_PTR(t0); + volatile VALUE t0; + long *p = ALLOCV_N(long, t0, n); VALUE ary0 = ary_make_shared_copy(ary); /* private defensive copy of ary */ RBASIC_CLEAR_CLASS(ary0); - rcombinate0(len, n, p, 0, n, ary0); /* compute and yield repeated combinations */ - tmpbuf_discard(t0); + rcombinate0(len, n, p, n, ary0); /* compute and yield repeated combinations */ + ALLOCV_END(t0); RBASIC_SET_CLASS_RAW(ary0, rb_cArray); } return ary; @@ -5265,7 +5678,7 @@ rb_ary_take(VALUE obj, VALUE n) /* * call-seq: - * ary.take_while { |arr| block } -> new_ary + * ary.take_while { |obj| block } -> new_ary * ary.take_while -> Enumerator * * Passes elements to the block until the block returns +nil+ or +false+, then @@ -5324,7 +5737,7 @@ rb_ary_drop(VALUE ary, VALUE n) /* * call-seq: - * ary.drop_while { |arr| block } -> new_ary + * ary.drop_while { |obj| block } -> new_ary * ary.drop_while -> Enumerator * * Drops elements up to, but not including, the first element for which the @@ -5353,6 +5766,236 @@ rb_ary_drop_while(VALUE ary) } /* + * call-seq: + * ary.any? [{ |obj| block }] -> true or false + * + * See also Enumerable#any? + */ + +static VALUE +rb_ary_any_p(int argc, VALUE *argv, VALUE ary) +{ + long i, len = RARRAY_LEN(ary); + const VALUE *ptr = RARRAY_CONST_PTR(ary); + + rb_check_arity(argc, 0, 1); + if (!len) return Qfalse; + if (argc) { + for (i = 0; i < RARRAY_LEN(ary); ++i) { + if (RTEST(rb_funcall(argv[0], idEqq, 1, RARRAY_AREF(ary, i)))) return Qtrue; + } + } + else if (!rb_block_given_p()) { + for (i = 0; i < len; ++i) if (RTEST(ptr[i])) return Qtrue; + } + else { + for (i = 0; i < RARRAY_LEN(ary); ++i) { + if (RTEST(rb_yield(RARRAY_AREF(ary, i)))) return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * ary.dig(idx, ...) -> object + * + * Extracts the nested value specified by the sequence of <i>idx</i> + * objects by calling +dig+ at each step, returning +nil+ if any + * intermediate step is +nil+. + * + * a = [[1, [2, 3]]] + * + * a.dig(0, 1, 1) #=> 3 + * a.dig(1, 2, 3) #=> nil + * a.dig(0, 0, 0) #=> TypeError: Integer does not have #dig method + * [42, {foo: :bar}].dig(1, :foo) #=> :bar + */ + +VALUE +rb_ary_dig(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS); + self = rb_ary_at(self, *argv); + if (!--argc) return self; + ++argv; + return rb_obj_dig(argc, argv, self, Qnil); +} + +static inline VALUE +finish_exact_sum(long n, VALUE r, VALUE v, int z) +{ + if (n != 0) + v = rb_fix_plus(LONG2FIX(n), v); + if (r != Qundef) { + /* r can be an Integer when mathn is loaded */ + if (FIXNUM_P(r)) + v = rb_fix_plus(r, v); + else if (RB_TYPE_P(r, T_BIGNUM)) + v = rb_big_plus(r, v); + else + v = rb_rational_plus(r, v); + } + else if (!n && z) { + v = rb_fix_plus(LONG2FIX(0), v); + } + return v; +} + +/* + * call-seq: + * ary.sum(init=0) -> number + * ary.sum(init=0) {|e| expr } -> number + * + * Returns the sum of elements. + * For example, [e1, e2, e3].sum returns init + e1 + e2 + e3. + * + * If a block is given, the block is applied to each element + * before addition. + * + * If <i>ary</i> is empty, it returns <i>init</i>. + * + * [].sum #=> 0 + * [].sum(0.0) #=> 0.0 + * [1, 2, 3].sum #=> 6 + * [3, 5.5].sum #=> 8.5 + * [2.5, 3.0].sum(0.0) {|e| e * e } #=> 15.25 + * [Object.new].sum #=> TypeError + * + * The (arithmetic) mean value of an array can be obtained as follows. + * + * mean = ary.sum(0.0) / ary.length + * + * This method can be used for non-numeric objects by + * explicit <i>init</i> argument. + * + * ["a", "b", "c"].sum("") #=> "abc" + * [[1], [[2]], [3]].sum([]) #=> [1, [2], 3] + * + * However, Array#join and Array#flatten is faster than Array#sum for + * array of strings and array of arrays. + * + * ["a", "b", "c"].join #=> "abc" + * [[1], [[2]], [3]].flatten(1) #=> [1, [2], 3] + * + * + * Array#sum method may not respect method redefinition of "+" methods + * such as Integer#+. + * + */ + +static VALUE +rb_ary_sum(int argc, VALUE *argv, VALUE ary) +{ + VALUE e, v, r; + long i, n; + int block_given; + + if (rb_scan_args(argc, argv, "01", &v) == 0) + v = LONG2FIX(0); + + block_given = rb_block_given_p(); + + if (RARRAY_LEN(ary) == 0) + return v; + + n = 0; + r = Qundef; + for (i = 0; i < RARRAY_LEN(ary); i++) { + e = RARRAY_AREF(ary, i); + if (block_given) + e = rb_yield(e); + if (FIXNUM_P(e)) { + n += FIX2LONG(e); /* should not overflow long type */ + if (!FIXABLE(n)) { + v = rb_big_plus(LONG2NUM(n), v); + n = 0; + } + } + else if (RB_TYPE_P(e, T_BIGNUM)) + v = rb_big_plus(e, v); + else if (RB_TYPE_P(e, T_RATIONAL)) { + if (r == Qundef) + r = e; + else + r = rb_rational_plus(r, e); + } + else + goto not_exact; + } + v = finish_exact_sum(n, r, v, argc!=0); + return v; + + not_exact: + v = finish_exact_sum(n, r, v, i!=0); + + if (RB_FLOAT_TYPE_P(e)) { + /* + * Kahan-Babuska balancing compensated summation algorithm + * See http://link.springer.com/article/10.1007/s00607-005-0139-x + */ + double f, c; + + f = NUM2DBL(v); + c = 0.0; + goto has_float_value; + for (; i < RARRAY_LEN(ary); i++) { + double x, t; + e = RARRAY_AREF(ary, i); + if (block_given) + e = rb_yield(e); + if (RB_FLOAT_TYPE_P(e)) + has_float_value: + x = RFLOAT_VALUE(e); + else if (FIXNUM_P(e)) + x = FIX2LONG(e); + else if (RB_TYPE_P(e, T_BIGNUM)) + x = rb_big2dbl(e); + else if (RB_TYPE_P(e, T_RATIONAL)) + x = rb_num2dbl(e); + else + goto not_float; + + if (isnan(f)) continue; + if (isnan(x)) { + f = x; + continue; + } + if (isinf(x)) { + if (isinf(f) && signbit(x) != signbit(f)) + f = NAN; + else + f = x; + continue; + } + if (isinf(f)) continue; + + t = f + x; + if (fabs(f) >= fabs(x)) + c += ((f - t) + x); + else + c += ((x - t) + f); + f = t; + } + f += c; + return DBL2NUM(f); + + not_float: + v = DBL2NUM(f); + } + + goto has_some_value; + for (; i < RARRAY_LEN(ary); i++) { + e = RARRAY_AREF(ary, i); + if (block_given) + e = rb_yield(e); + has_some_value: + v = rb_funcall(v, idPLUS, 1, e); + } + return v; +} + +/* * Arrays are ordered, integer-indexed collections of any object. * * Array indexing starts at 0, as in C or Java. A negative index is assumed @@ -5385,7 +6028,8 @@ rb_ary_drop_while(VALUE ary) * This method is safe to use with mutable objects such as hashes, strings or * other arrays: * - * Array.new(4) { Hash.new } #=> [{}, {}, {}, {}] + * Array.new(4) { Hash.new } #=> [{}, {}, {}, {}] + * Array.new(4) {|i| i.to_s } #=> ["0", "1", "2", "3"] * * This is also a quick way to build up multi-dimensional arrays: * @@ -5621,12 +6265,14 @@ Init_Array(void) rb_define_method(rb_cArray, "fetch", rb_ary_fetch, -1); rb_define_method(rb_cArray, "first", rb_ary_first, -1); rb_define_method(rb_cArray, "last", rb_ary_last, -1); - rb_define_method(rb_cArray, "concat", rb_ary_concat, 1); + rb_define_method(rb_cArray, "concat", rb_ary_concat_multi, -1); rb_define_method(rb_cArray, "<<", rb_ary_push, 1); rb_define_method(rb_cArray, "push", rb_ary_push_m, -1); + rb_define_alias(rb_cArray, "append", "push"); rb_define_method(rb_cArray, "pop", rb_ary_pop_m, -1); rb_define_method(rb_cArray, "shift", rb_ary_shift_m, -1); rb_define_method(rb_cArray, "unshift", rb_ary_unshift_m, -1); + rb_define_alias(rb_cArray, "prepend", "unshift"); rb_define_method(rb_cArray, "insert", rb_ary_insert, -1); rb_define_method(rb_cArray, "each", rb_ary_each, 0); rb_define_method(rb_cArray, "each_index", rb_ary_each_index, 0); @@ -5679,6 +6325,9 @@ Init_Array(void) rb_define_method(rb_cArray, "&", rb_ary_and, 1); rb_define_method(rb_cArray, "|", rb_ary_or, 1); + rb_define_method(rb_cArray, "max", rb_ary_max, -1); + rb_define_method(rb_cArray, "min", rb_ary_min, -1); + rb_define_method(rb_cArray, "uniq", rb_ary_uniq, 0); rb_define_method(rb_cArray, "uniq!", rb_ary_uniq_bang, 0); rb_define_method(rb_cArray, "compact", rb_ary_compact, 0); @@ -5701,9 +6350,10 @@ Init_Array(void) rb_define_method(rb_cArray, "drop", rb_ary_drop, 1); rb_define_method(rb_cArray, "drop_while", rb_ary_drop_while, 0); rb_define_method(rb_cArray, "bsearch", rb_ary_bsearch, 0); + rb_define_method(rb_cArray, "bsearch_index", rb_ary_bsearch_index, 0); + rb_define_method(rb_cArray, "any?", rb_ary_any_p, -1); + rb_define_method(rb_cArray, "dig", rb_ary_dig, -1); + rb_define_method(rb_cArray, "sum", rb_ary_sum, -1); - id_cmp = rb_intern("<=>"); id_random = rb_intern("random"); - id_div = rb_intern("div"); - id_power = rb_intern("**"); } |
