diff options
Diffstat (limited to 'set.c')
| -rw-r--r-- | set.c | 585 |
1 files changed, 346 insertions, 239 deletions
@@ -7,6 +7,7 @@ #include "id.h" #include "internal.h" #include "internal/bits.h" +#include "internal/error.h" #include "internal/hash.h" #include "internal/proc.h" #include "internal/sanitizers.h" @@ -101,6 +102,8 @@ static ID id_any_p; static ID id_new; static ID id_i_hash; static ID id_set_iter_lev; +static ID id_subclass_compatible; +static ID id_class_methods; #define RSET_INITIALIZED FL_USER1 #define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \ @@ -113,7 +116,7 @@ static ID id_set_iter_lev; #define RSET_SIZE(set) set_table_size(RSET_TABLE(set)) #define RSET_EMPTY(set) (RSET_SIZE(set) == 0) #define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set)) -#define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item)) +#define RSET_IS_MEMBER(set, item) set_table_lookup(RSET_TABLE(set), (st_data_t)(item)) #define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash) struct set_object { @@ -121,6 +124,14 @@ struct set_object { }; static int +mark_and_pin_key(st_data_t key, st_data_t data) +{ + rb_gc_mark((VALUE)key); + + return ST_CONTINUE; +} + +static int mark_key(st_data_t key, st_data_t data) { rb_gc_mark_movable((VALUE)key); @@ -132,22 +143,21 @@ static void set_mark(void *ptr) { struct set_object *sobj = ptr; - if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0); -} - -static void -set_free_embedded(struct set_object *sobj) -{ - free((&sobj->table)->bins); - free((&sobj->table)->entries); + if (sobj->table.entries) { + if (sobj->table.type == &identhash) { + set_table_foreach(&sobj->table, mark_and_pin_key, 0); + } + else { + set_table_foreach(&sobj->table, mark_key, 0); + } + } } static void set_free(void *ptr) { struct set_object *sobj = ptr; - set_free_embedded(sobj); - memset(&sobj->table, 0, sizeof(sobj->table)); + set_free_embedded_table(&sobj->table); } static size_t @@ -171,9 +181,7 @@ set_foreach_replace(st_data_t key, st_data_t argp, int error) static int set_replace_ref(st_data_t *key, st_data_t argp, int existing) { - if (rb_gc_location((VALUE)*key) != (VALUE)*key) { - *key = rb_gc_location((VALUE)*key); - } + rb_gc_mark_and_move((VALUE *)key); return ST_CONTINUE; } @@ -369,11 +377,10 @@ set_compact_after_delete(VALUE set) } static int -set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) +set_table_insert_wb(set_table *tab, VALUE set, VALUE key) { if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) { key = rb_hash_key_str(key); - if (key_addr) *key_addr = key; } int ret = set_insert(tab, (st_data_t)key); if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key); @@ -381,9 +388,9 @@ set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) } static int -set_insert_wb(VALUE set, VALUE key, VALUE *key_addr) +set_insert_wb(VALUE set, VALUE key) { - return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr); + return set_table_insert_wb(RSET_TABLE(set), set, key); } static VALUE @@ -405,6 +412,13 @@ set_s_alloc(VALUE klass) return set_alloc_with_size(klass, 0); } +/* + * call-seq: + * Set[*objects] -> new_set + * + * Returns a new Set object populated with the given objects, + * See Set::new. + */ static VALUE set_s_create(int argc, VALUE *argv, VALUE klass) { @@ -413,12 +427,25 @@ set_s_create(int argc, VALUE *argv, VALUE klass) int i; for (i=0; i < argc; i++) { - set_table_insert_wb(table, set, argv[i], NULL); + set_table_insert_wb(table, set, argv[i]); } return set; } +static VALUE +set_s_inherited(VALUE klass, VALUE subclass) +{ + if (klass == rb_cSet) { + // When subclassing directly from Set, include the compatibility layer + rb_require("set/subclass_compatible.rb"); + VALUE subclass_compatible = rb_const_get(klass, id_subclass_compatible); + rb_include_module(subclass, subclass_compatible); + rb_extend_object(subclass, rb_const_get(subclass_compatible, id_class_methods)); + } + return Qnil; +} + static void check_set(VALUE arg) { @@ -451,7 +478,7 @@ static VALUE set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) { VALUE element = i; - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } @@ -459,27 +486,41 @@ static VALUE set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) { VALUE element = rb_yield(i); - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } /* - * call-seq: - * Set.new -> new_set - * Set.new(enum) -> new_set - * Set.new(enum) { |elem| ... } -> new_set + * call-seq: + * Set.new(object = nil) -> new_set + * Set.new(object = nil) {|element| ... } -> new_set + * + * Returns a new \Set object based on the given +object+, + * which must be an Enumerable or +nil+. * - * Creates a new set containing the elements of the given enumerable - * object. + * With argument +object+ given as +nil+, + * returns a new empty \Set object: * - * If a block is given, the elements of enum are preprocessed by the - * given block. + * Set.new # => Set[] + * Set.new { fail 'Cannot happen' } # => Set[] # Block not called. + * + * With no block given and +object+ given as an Enumerable, + * populates the new set with the elements of +object+: + * + * Set.new(%w[ a b c ]) # => Set["a", "b", "c"] + * Set.new({foo: 0, bar: 1}) # => Set[[:foo, 0], [:bar, 1]] + * Set.new(4..10) # => Set[4, 5, 6, 7, 8, 9, 10] + * Set.new(Dir.new('lib')).take(5) + * # => [".", "..", "bundled_gems.rb", "bundler", "bundler.rb"] + * Set.new(File.new('doc/NEWS/NEWS-4.0.0.md')).take(3) + * # => ["# NEWS for Ruby 4.0.0\n", "\n", "This document is a list of user-visible feature changes\n"] + * + * With a block given and +object+ given as an Enumerable, + * calls the block with each element of +object+; + * adds the block's return value to the new set: + * + * Set.new(4..10) {|i| i * 2 } # => Set[8, 10, 12, 14, 16, 18, 20] * - * Set.new([1, 2]) #=> #<Set: {1, 2}> - * Set.new([1, 2, 1]) #=> #<Set: {1, 2}> - * Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}> - * Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}> - * Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}> */ static VALUE set_i_initialize(int argc, VALUE *argv, VALUE set) @@ -494,18 +535,13 @@ set_i_initialize(int argc, VALUE *argv, VALUE set) if (argc > 0 && (other = argv[0]) != Qnil) { if (RB_TYPE_P(other, T_ARRAY)) { - long len = RARRAY_LEN(other); - if (RARRAY_LEN(other) != 0) { - set_table *into = RSET_TABLE(set); - VALUE key; - int block_given = rb_block_given_p(); - RARRAY_PTR_USE(other, ptr, { - for(; len > 0; len--, ptr++) { - key = *ptr; - if (block_given) key = rb_yield(key); - set_table_insert_wb(into, set, key, NULL); - } - }); + long i; + int block_given = rb_block_given_p(); + set_table *into = RSET_TABLE(set); + for (i=0; i<RARRAY_LEN(other); i++) { + VALUE key = RARRAY_AREF(other, i); + if (block_given) key = rb_yield(key); + set_table_insert_wb(into, set, key); } } else { @@ -518,6 +554,7 @@ set_i_initialize(int argc, VALUE *argv, VALUE set) return set; } +/* :nodoc: */ static VALUE set_i_initialize_copy(VALUE set, VALUE other) { @@ -530,8 +567,9 @@ set_i_initialize_copy(VALUE set, VALUE other) struct set_object *sobj; TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); - set_free_embedded(sobj); + set_free_embedded_table(&sobj->table); set_copy(&sobj->table, RSET_TABLE(other)); + rb_gc_writebarrier_remember(set); return set; } @@ -539,10 +577,14 @@ set_i_initialize_copy(VALUE set, VALUE other) static int set_inspect_i(st_data_t key, st_data_t arg) { - VALUE str = (VALUE)arg; - if (RSTRING_LEN(str) > 8) { + VALUE *args = (VALUE*)arg; + VALUE str = args[0]; + if (args[1] == Qtrue) { rb_str_buf_cat_ascii(str, ", "); } + else { + args[1] = Qtrue; + } rb_str_buf_append(str, rb_inspect((VALUE)key)); return ST_CONTINUE; @@ -552,11 +594,17 @@ static VALUE set_inspect(VALUE set, VALUE dummy, int recur) { VALUE str; + VALUE klass_name = rb_class_path(CLASS_OF(set)); + + if (recur) { + str = rb_sprintf("%"PRIsVALUE"[...]", klass_name); + return rb_str_export_to_enc(str, rb_usascii_encoding()); + } - if (recur) return rb_usascii_str_new2("#<Set: {...}>"); - str = rb_str_buf_new2("#<Set: {"); - set_iter(set, set_inspect_i, str); - rb_str_buf_cat2(str, "}>"); + str = rb_sprintf("%"PRIsVALUE"[", klass_name); + VALUE args[2] = {str, Qfalse}; + set_iter(set, set_inspect_i, (st_data_t)args); + rb_str_buf_cat2(str, "]"); return str; } @@ -568,11 +616,11 @@ set_inspect(VALUE set, VALUE dummy, int recur) * Returns a new string containing the set entries: * * s = Set.new - * s.inspect # => "#<Set: {}>" + * s.inspect # => "Set[]" * s.add(1) - * s.inspect # => "#<Set: {1}>" + * s.inspect # => "Set[1]" * s.add(2) - * s.inspect # => "#<Set: {1, 2}>" + * s.inspect # => "Set[1, 2]" * * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting]. */ @@ -621,35 +669,19 @@ set_i_to_a(VALUE set) /* * call-seq: - * to_set(klass = Set, *args, &block) -> self or new_set + * to_set(&block) -> self or new_set * - * Returns self if receiver is an instance of +Set+ and no arguments or - * block are given. Otherwise, converts the set to another with - * <tt>klass.new(self, *args, &block)</tt>. - * - * In subclasses, returns `klass.new(self, *args, &block)` unless overridden. + * Without a block, if +self+ is an instance of +Set+, returns +self+. + * Otherwise, calls <tt>Set.new(self, &block)</tt>. */ static VALUE -set_i_to_set(int argc, VALUE *argv, VALUE set) +set_i_to_set(VALUE set) { - VALUE klass; - - if (argc == 0) { - klass = rb_cSet; - argv = &set; - argc = 1; - } - else { - klass = argv[0]; - argv[0] = set; - } - - if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) && - argc == 1 && !rb_block_given_p()) { + if (rb_obj_is_instance_of(set, rb_cSet) && !rb_block_given_p()) { return set; } - return rb_funcall_passing_block(klass, id_new, argc, argv); + return rb_funcall_passing_block(rb_cSet, id_new, 1, &set); } /* @@ -669,24 +701,24 @@ set_i_join(int argc, VALUE *argv, VALUE set) * call-seq: * add(obj) -> self * - * Adds the given object to the set and returns self. Use `merge` to + * Adds the given object to the set and returns self. Use Set#merge to * add many elements at once. * - * Set[1, 2].add(3) #=> #<Set: {1, 2, 3}> - * Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}> - * Set[1, 2].add(2) #=> #<Set: {1, 2}> + * Set[1, 2].add(3) #=> Set[1, 2, 3] + * Set[1, 2].add([3, 4]) #=> Set[1, 2, [3, 4]] + * Set[1, 2].add(2) #=> Set[1, 2] */ static VALUE set_i_add(VALUE set, VALUE item) { rb_check_frozen(set); if (set_iterating_p(set)) { - if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + if (!set_table_lookup(RSET_TABLE(set), (st_data_t)item)) { no_new_item(); } } else { - set_insert_wb(set, item, NULL); + set_insert_wb(set, item); } return set; } @@ -698,8 +730,8 @@ set_i_add(VALUE set, VALUE item) * Adds the given object to the set and returns self. If the object is * already in the set, returns nil. * - * Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}> - * Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}> + * Set[1, 2].add?(3) #=> Set[1, 2, 3] + * Set[1, 2].add?([3, 4]) #=> Set[1, 2, [3, 4]] * Set[1, 2].add?(2) #=> nil */ static VALUE @@ -707,13 +739,13 @@ set_i_add_p(VALUE set, VALUE item) { rb_check_frozen(set); if (set_iterating_p(set)) { - if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + if (!set_table_lookup(RSET_TABLE(set), (st_data_t)item)) { no_new_item(); } return Qnil; } else { - return set_insert_wb(set, item, NULL) ? Qnil : set; + return set_insert_wb(set, item) ? Qnil : set; } } @@ -728,7 +760,7 @@ static VALUE set_i_delete(VALUE set, VALUE item) { rb_check_frozen(set); - if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + if (set_table_delete(RSET_TABLE(set), (st_data_t *)&item)) { set_compact_after_delete(set); } return set; @@ -745,7 +777,7 @@ static VALUE set_i_delete_p(VALUE set, VALUE item) { rb_check_frozen(set); - if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + if (set_table_delete(RSET_TABLE(set), (st_data_t *)&item)) { set_compact_after_delete(set); return set; } @@ -828,9 +860,9 @@ set_classify_i(st_data_t key, st_data_t tmp) * * files = Set.new(Dir.glob("*.rb")) * hash = files.classify { |f| File.mtime(f).year } - * hash #=> {2000 => #<Set: {"a.rb", "b.rb"}>, - * # 2001 => #<Set: {"c.rb", "d.rb", "e.rb"}>, - * # 2002 => #<Set: {"f.rb"}>} + * hash #=> {2000 => Set["a.rb", "b.rb"], + * # 2001 => Set["c.rb", "d.rb", "e.rb"], + * # 2002 => Set["f.rb"]} * * Returns an enumerator if no block is given. */ @@ -845,66 +877,72 @@ set_i_classify(VALUE set) return args[0]; } -struct set_divide_args { - VALUE self; - VALUE set_class; - VALUE final_set; - VALUE hash; - VALUE current_set; - VALUE current_item; - unsigned long ni; - unsigned long nj; -}; +// Union-find with path compression +static long +set_divide_union_find_root(long *uf_parents, long index, long *tmp_array) +{ + long root = uf_parents[index]; + long update_size = 0; + while (root != index) { + tmp_array[update_size++] = index; + index = root; + root = uf_parents[index]; + } + for (long j = 0; j < update_size; j++) { + long idx = tmp_array[j]; + uf_parents[idx] = root; + } + return root; +} -static VALUE -set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg)) -{ - struct set_divide_args *args = (struct set_divide_args *)arg; - if (args->nj > args->ni) { - VALUE i = args->current_item; - if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) { - VALUE hash = args->hash; - if (args->current_set == Qnil) { - VALUE set = rb_hash_aref(hash, j); - if (set == Qnil) { - VALUE both[2] = {i, j}; - set = set_s_create(2, both, args->set_class); - rb_hash_aset(hash, i, set); - rb_hash_aset(hash, j, set); - set_i_add(args->final_set, set); - } - else { - set_i_add(set, i); - rb_hash_aset(hash, i, set); - } - args->current_set = set; - } - else { - set_i_add(args->current_set, j); - rb_hash_aset(hash, j, args->current_set); +static void +set_divide_union_find_merge(long *uf_parents, long i, long j, long *tmp_array) +{ + long root_i = set_divide_union_find_root(uf_parents, i, tmp_array); + long root_j = set_divide_union_find_root(uf_parents, j, tmp_array); + if (root_i != root_j) uf_parents[root_j] = root_i; +} + +static VALUE +set_divide_arity2(VALUE set) +{ + VALUE tmp, uf; + long size, *uf_parents, *tmp_array; + VALUE set_class = rb_obj_class(set); + VALUE items = set_i_to_a(set); + rb_ary_freeze(items); + size = RARRAY_LEN(items); + tmp_array = ALLOCV_N(long, tmp, size); + uf_parents = ALLOCV_N(long, uf, size); + for (long i = 0; i < size; i++) { + uf_parents[i] = i; + } + for (long i = 0; i < size - 1; i++) { + VALUE item1 = RARRAY_AREF(items, i); + for (long j = i + 1; j < size; j++) { + VALUE item2 = RARRAY_AREF(items, j); + if (RTEST(rb_yield_values(2, item1, item2)) && + RTEST(rb_yield_values(2, item2, item1))) { + set_divide_union_find_merge(uf_parents, i, j, tmp_array); } } } - args->nj++; - return j; -} - -static VALUE -set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg)) -{ - struct set_divide_args *args = (struct set_divide_args *)arg; - VALUE hash = args->hash; - args->current_set = rb_hash_aref(hash, i); - args->current_item = i; - args->nj = 0; - rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg); - if (args->current_set == Qnil) { - VALUE set = set_s_create(1, &i, args->set_class); - rb_hash_aset(hash, i, set); - set_i_add(args->final_set, set); + VALUE final_set = set_s_create(0, 0, rb_cSet); + VALUE hash = rb_hash_new(); + for (long i = 0; i < size; i++) { + VALUE v = RARRAY_AREF(items, i); + long root = set_divide_union_find_root(uf_parents, i, tmp_array); + VALUE set = rb_hash_aref(hash, LONG2FIX(root)); + if (set == Qnil) { + set = set_s_create(0, 0, set_class); + rb_hash_aset(hash, LONG2FIX(root), set); + set_i_add(final_set, set); + } + set_i_add(set, v); } - args->ni++; - return i; + ALLOCV_END(tmp); + ALLOCV_END(uf); + return final_set; } static void set_merge_enum_into(VALUE set, VALUE arg); @@ -925,10 +963,10 @@ static void set_merge_enum_into(VALUE set, VALUE arg); * * numbers = Set[1, 3, 4, 6, 9, 10, 11] * set = numbers.divide { |i,j| (i - j).abs == 1 } - * set #=> #<Set: {#<Set: {1}>, - * # #<Set: {3, 4}>, - * # #<Set: {6}>}> - * # #<Set: {9, 10, 11}>, + * set #=> Set[Set[1], + * # Set[3, 4], + * # Set[6], + * # Set[9, 10, 11]] * * Returns an enumerator if no block is given. */ @@ -938,19 +976,7 @@ set_i_divide(VALUE set) RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); if (rb_block_arity() == 2) { - VALUE final_set = set_s_create(0, 0, rb_cSet); - struct set_divide_args args = { - .self = set, - .set_class = rb_obj_class(set), - .final_set = final_set, - .hash = rb_hash_new(), - .current_set = 0, - .current_item = 0, - .ni = 0, - .nj = 0 - }; - rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args); - return final_set; + return set_divide_arity2(set); } VALUE values = rb_hash_values(set_i_classify(set)); @@ -971,9 +997,9 @@ set_clear_i(st_data_t key, st_data_t dummy) * * Removes all elements and returns self. * - * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> - * set.clear #=> #<Set: {}> - * set #=> #<Set: {}> + * set = Set[1, 'c', :s] #=> Set[1, "c", :s] + * set.clear #=> Set[] + * set #=> Set[] */ static VALUE set_i_clear(VALUE set) @@ -984,7 +1010,7 @@ set_i_clear(VALUE set) set_iter(set, set_clear_i, 0); } else { - set_clear(RSET_TABLE(set)); + set_table_clear(RSET_TABLE(set)); set_compact_after_delete(set); } return set; @@ -1000,8 +1026,8 @@ static int set_intersection_i(st_data_t key, st_data_t tmp) { struct set_intersection_data *data = (struct set_intersection_data *)tmp; - if (set_lookup(data->other, key)) { - set_table_insert_wb(data->into, data->set, key, NULL); + if (set_table_lookup(data->other, key)) { + set_table_insert_wb(data->into, data->set, key); } return ST_CONTINUE; @@ -1021,8 +1047,8 @@ set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data)) * Returns a new set containing elements common to the set and the given * enumerable object. * - * Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}> - * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}> + * Set[1, 3, 5] & Set[3, 2, 1] #=> Set[3, 1] + * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> Set["a", "b"] */ static VALUE set_i_intersection(VALUE set, VALUE other) @@ -1097,7 +1123,7 @@ static int set_merge_i(st_data_t key, st_data_t data) { struct set_merge_args *args = (struct set_merge_args *)data; - set_table_insert_wb(args->into, args->set, key, NULL); + set_table_insert_wb(args->into, args->set, key); return ST_CONTINUE; } @@ -1105,7 +1131,7 @@ static VALUE set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) { VALUE element = key; - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } @@ -1120,14 +1146,10 @@ set_merge_enum_into(VALUE set, VALUE arg) set_iter(arg, set_merge_i, (st_data_t)&args); } else if (RB_TYPE_P(arg, T_ARRAY)) { - long len = RARRAY_LEN(arg); - if (RARRAY_LEN(arg) != 0) { - set_table *into = RSET_TABLE(set); - RARRAY_PTR_USE(arg, ptr, { - for(; len > 0; len--, ptr++) { - set_table_insert_wb(into, set, *ptr, NULL); - } - }); + long i; + set_table *into = RSET_TABLE(set); + for (i=0; i<RARRAY_LEN(arg); i++) { + set_table_insert_wb(into, set, RARRAY_AREF(arg, i)); } } else { @@ -1148,6 +1170,11 @@ set_i_merge(int argc, VALUE *argv, VALUE set) if (rb_keyword_given_p()) { rb_raise(rb_eArgError, "no keywords accepted"); } + + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "cannot add to set during iteration"); + } + rb_check_frozen(set); int i; @@ -1176,9 +1203,9 @@ set_reset_table_with_type(VALUE set, const struct st_hash_type *type) .into = new }; set_iter(set, set_merge_i, (st_data_t)&args); - set_free_embedded(sobj); + set_free_embedded_table(&sobj->table); memcpy(&sobj->table, new, sizeof(*new)); - free(new); + SIZED_FREE(new); } else { sobj->table.type = type; @@ -1248,8 +1275,8 @@ set_xor_i(st_data_t key, st_data_t data) VALUE element = (VALUE)key; VALUE set = (VALUE)data; set_table *table = RSET_TABLE(set); - if (set_table_insert_wb(table, set, element, &element)) { - set_delete(table, &element); + if (set_table_insert_wb(table, set, element)) { + set_table_delete(table, &element); } return ST_CONTINUE; } @@ -1262,21 +1289,23 @@ set_xor_i(st_data_t key, st_data_t data) * given enumerable object. <tt>(set ^ enum)</tt> is equivalent to * <tt>((set | enum) - (set & enum))</tt>. * - * Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}> - * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}> + * Set[1, 2] ^ Set[2, 3] #=> Set[3, 1] + * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> Set["d", 1, "c"] */ static VALUE set_i_xor(VALUE set, VALUE other) { - VALUE new_set; + VALUE new_set = rb_obj_dup(set); + if (rb_obj_is_kind_of(other, rb_cSet)) { - new_set = other; + set_iter(other, set_xor_i, (st_data_t)new_set); } else { - new_set = set_s_alloc(rb_obj_class(set)); - set_merge_enum_into(new_set, other); + VALUE tmp = set_s_alloc(rb_cSet); + set_merge_enum_into(tmp, other); + set_iter(tmp, set_xor_i, (st_data_t)new_set); } - set_iter(set, set_xor_i, (st_data_t)new_set); + return new_set; } @@ -1287,8 +1316,8 @@ set_i_xor(VALUE set, VALUE other) * Returns a new set built by merging the set and the elements of the * given enumerable object. * - * Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}> - * Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}> + * Set[1, 2, 3] | Set[2, 4, 5] #=> Set[1, 2, 3, 4, 5] + * Set[1, 5, 'z'] | (1..6) #=> Set[1, 5, "z", 2, 3, 4, 6] */ static VALUE set_i_union(VALUE set, VALUE other) @@ -1301,7 +1330,7 @@ set_i_union(VALUE set, VALUE other) static int set_remove_i(st_data_t key, st_data_t from) { - set_delete((struct set_table *)from, (st_data_t *)&key); + set_table_delete((struct set_table *)from, (st_data_t *)&key); return ST_CONTINUE; } @@ -1309,7 +1338,7 @@ static VALUE set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) { rb_check_frozen(set); - set_delete(RSET_TABLE(set), (st_data_t *)&key); + set_table_delete(RSET_TABLE(set), (st_data_t *)&key); return key; } @@ -1346,8 +1375,8 @@ set_i_subtract(VALUE set, VALUE other) * Returns a new set built by duplicating the set, removing every * element that appears in the given enumerable object. * - * Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}> - * Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}> + * Set[1, 3, 5] - Set[1, 5] #=> Set[3] + * Set['a', 'b', 'z'] - ['a', 'c'] #=> Set["b", "z"] */ static VALUE set_i_difference(VALUE set, VALUE other) @@ -1382,7 +1411,7 @@ set_i_each(VALUE set) static int set_collect_i(st_data_t key, st_data_t data) { - set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL); + set_insert_wb((VALUE)data, rb_yield((VALUE)key)); return ST_CONTINUE; } @@ -1411,7 +1440,7 @@ static int set_keep_if_i(st_data_t key, st_data_t into) { if (!RTEST(rb_yield((VALUE)key))) { - set_delete((set_table *)into, &key); + set_table_delete((set_table *)into, &key); } return ST_CONTINUE; } @@ -1463,9 +1492,9 @@ set_i_select(VALUE set) * Replaces the contents of the set with the contents of the given * enumerable object and returns self. * - * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> - * set.replace([1, 2]) #=> #<Set: {1, 2}> - * set #=> #<Set: {1, 2}> + * set = Set[1, 'c', :s] #=> Set[1, "c", :s] + * set.replace([1, 2]) #=> Set[1, 2] + * set #=> Set[1, 2] */ static VALUE set_i_replace(VALUE set, VALUE other) @@ -1483,7 +1512,7 @@ set_i_replace(VALUE set, VALUE other) // make sure enum is enumerable before calling clear enum_method_id(other); - set_clear(RSET_TABLE(set)); + set_table_clear(RSET_TABLE(set)); set_merge_enum_into(set, other); } @@ -1594,7 +1623,7 @@ static int set_le_i(st_data_t key, st_data_t arg) { struct set_subset_data *data = (struct set_subset_data *)arg; - if (set_lookup(data->table, key)) return ST_CONTINUE; + if (set_table_lookup(data->table, key)) return ST_CONTINUE; data->result = Qfalse; return ST_STOP; } @@ -1670,7 +1699,7 @@ static int set_intersect_i(st_data_t key, st_data_t arg) { VALUE *args = (VALUE *)arg; - if (set_lookup((set_table *)args[0], key)) { + if (set_table_lookup((set_table *)args[0], key)) { args[1] = Qtrue; return ST_STOP; } @@ -1741,7 +1770,7 @@ set_i_disjoint(VALUE set, VALUE other) * set <=> other -> -1, 0, 1, or nil * * Returns 0 if the set are equal, -1 / 1 if the set is a - * proper subset / superset of the given set, or or nil if + * proper subset / superset of the given set, or nil if * they both have unique elements. */ static VALUE @@ -1779,7 +1808,7 @@ set_eql_i(st_data_t item, st_data_t arg) { struct set_equal_data *data = (struct set_equal_data *)arg; - if (!set_lookup(RSET_TABLE(data->set), item)) { + if (!set_table_lookup(RSET_TABLE(data->set), item)) { data->result = Qfalse; return ST_STOP; } @@ -1910,24 +1939,67 @@ compat_loader(VALUE self, VALUE a) return set_i_from_hash(self, rb_ivar_get(a, id_i_hash)); } +/* C-API functions */ + +void +rb_set_foreach(VALUE set, int (*func)(VALUE element, VALUE arg), VALUE arg) +{ + set_iter(set, func, arg); +} + +VALUE +rb_set_new(void) +{ + return set_alloc_with_size(rb_cSet, 0); +} + +VALUE +rb_set_new_capa(size_t capa) +{ + return set_alloc_with_size(rb_cSet, (st_index_t)capa); +} + +bool +rb_set_lookup(VALUE set, VALUE element) +{ + return RSET_IS_MEMBER(set, element); +} + +bool +rb_set_add(VALUE set, VALUE element) +{ + return set_i_add_p(set, element) != Qnil; +} + +VALUE +rb_set_clear(VALUE set) +{ + return set_i_clear(set); +} + +bool +rb_set_delete(VALUE set, VALUE element) +{ + return set_i_delete_p(set, element) != Qnil; +} + +size_t +rb_set_size(VALUE set) +{ + return RSET_SIZE(set); +} + /* * Document-class: Set * - * Copyright (c) 2002-2024 Akinori MUSHA <knu@iDaemons.org> - * - * Documentation by Akinori MUSHA and Gavin Sinclair. - * - * All rights reserved. You can redistribute and/or modify it under the same - * terms as Ruby. - * * The Set class implements a collection of unordered values with no * duplicates. It is a hybrid of Array's intuitive inter-operation * facilities and Hash's fast lookup. * - * Set is easy to use with Enumerable objects (implementing `each`). + * Set is easy to use with Enumerable objects (implementing #each). * Most of the initializer methods and binary operators accept generic * Enumerable objects besides sets and arrays. An Enumerable object - * can be converted to Set using the `to_set` method. + * can be converted to Set using the +to_set+ method. * * Set uses a data structure similar to Hash for storage, except that * it only has keys and no values. @@ -1951,11 +2023,11 @@ compat_loader(VALUE self, VALUE a) * * == Example * - * s1 = Set[1, 2] #=> #<Set: {1, 2}> - * s2 = [1, 2].to_set #=> #<Set: {1, 2}> + * s1 = Set[1, 2] #=> Set[1, 2] + * s2 = [1, 2].to_set #=> Set[1, 2] * s1 == s2 #=> true - * s1.add("foo") #=> #<Set: {1, 2, "foo"}> - * s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}> + * s1.add("foo") #=> Set[1, 2, "foo"] + * s1.merge([2, 6]) #=> Set[1, 2, "foo", 6] * s1.subset?(s2) #=> false * s2.subset?(s1) #=> true * @@ -1963,12 +2035,42 @@ compat_loader(VALUE self, VALUE a) * * - Akinori MUSHA <knu@iDaemons.org> (current maintainer) * - * == What's Here + * == Inheriting from \Set + * + * Before Ruby 4.0 (released December 2025), \Set had a different, less + * efficient implementation. It was reimplemented in C, and the behavior + * of some of the core methods were adjusted. + * + * To keep backward compatibility, when a class is inherited from \Set, + * additional module +Set::SubclassCompatible+ is included, which makes + * the inherited class behavior, as well as internal method names, + * closer to what it was before Ruby 4.0. * - * First, what's elsewhere. \Class \Set: + * It can be easily seen, for example, in the #inspect method behavior: + * + * p Set[1, 2, 3] + * # prints "Set[1, 2, 3]" + * + * class MySet < Set + * end + * p MySet[1, 2, 3] + * # prints "#<MySet: {1, 2, 3}>", like it was in Ruby 3.4 * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * For new code, if backward compatibility is not necessary, + * it is recommended to instead inherit from +Set::CoreSet+, which + * avoids including the "compatibility" layer: + * + * class MyCoreSet < Set::CoreSet + * end + * p MyCoreSet[1, 2, 3] + * # prints "MyCoreSet[1, 2, 3]" + * + * == Set's methods + * + * First, what's elsewhere. \Class \Set: + * + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * In particular, class \Set does not have many methods of its own @@ -1977,16 +2079,15 @@ compat_loader(VALUE self, VALUE a) * * Here, class \Set provides methods that are useful for: * - * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array] - * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set] - * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations] - * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing] - * - {Querying}[rdoc-ref:Array@Methods+for+Querying] - * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning] - * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting] - * - {Converting}[rdoc-ref:Array@Methods+for+Converting] - * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating] - * - {And more....}[rdoc-ref:Array@Other+Methods] + * - {Creating a Set}[rdoc-ref:Set@Methods+for+Creating+a+Set] + * - {Set Operations}[rdoc-ref:Set@Methods+for+Set+Operations] + * - {Comparing}[rdoc-ref:Set@Methods+for+Comparing] + * - {Querying}[rdoc-ref:Set@Methods+for+Querying] + * - {Assigning}[rdoc-ref:Set@Methods+for+Assigning] + * - {Deleting}[rdoc-ref:Set@Methods+for+Deleting] + * - {Converting}[rdoc-ref:Set@Methods+for+Converting] + * - {Iterating}[rdoc-ref:Set@Methods+for+Iterating] + * - {And more....}[rdoc-ref:Set@Other+Methods] * * === Methods for Creating a \Set * @@ -2129,6 +2230,8 @@ Init_Set(void) id_any_p = rb_intern_const("any?"); id_new = rb_intern_const("new"); id_i_hash = rb_intern_const("@hash"); + id_subclass_compatible = rb_intern_const("SubclassCompatible"); + id_class_methods = rb_intern_const("ClassMethods"); id_set_iter_lev = rb_make_internal_id(); rb_define_alloc_func(rb_cSet, set_s_alloc); @@ -2193,12 +2296,16 @@ Init_Set(void) rb_define_method(rb_cSet, "superset?", set_i_superset, 1); rb_define_alias(rb_cSet, ">=", "superset?"); rb_define_method(rb_cSet, "to_a", set_i_to_a, 0); - rb_define_method(rb_cSet, "to_h", set_i_to_h, 0); - rb_define_method(rb_cSet, "to_set", set_i_to_set, -1); + rb_define_method(rb_cSet, "to_set", set_i_to_set, 0); /* :nodoc: */ VALUE compat = rb_define_class_under(rb_cSet, "compatible", rb_cObject); rb_marshal_define_compat(rb_cSet, compat, compat_dumper, compat_loader); + // Create Set::CoreSet before defining inherited, so it does not include + // the backwards compatibility layer. + rb_define_class_under(rb_cSet, "CoreSet", rb_cSet); + rb_define_private_method(rb_singleton_class(rb_cSet), "inherited", set_s_inherited, 1); + rb_provide("set.rb"); } |
