summaryrefslogtreecommitdiff
path: root/set.c
diff options
context:
space:
mode:
Diffstat (limited to 'set.c')
-rw-r--r--set.c585
1 files changed, 346 insertions, 239 deletions
diff --git a/set.c b/set.c
index 221b9a07e1..8d6f33329b 100644
--- a/set.c
+++ b/set.c
@@ -7,6 +7,7 @@
#include "id.h"
#include "internal.h"
#include "internal/bits.h"
+#include "internal/error.h"
#include "internal/hash.h"
#include "internal/proc.h"
#include "internal/sanitizers.h"
@@ -101,6 +102,8 @@ static ID id_any_p;
static ID id_new;
static ID id_i_hash;
static ID id_set_iter_lev;
+static ID id_subclass_compatible;
+static ID id_class_methods;
#define RSET_INITIALIZED FL_USER1
#define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \
@@ -113,7 +116,7 @@ static ID id_set_iter_lev;
#define RSET_SIZE(set) set_table_size(RSET_TABLE(set))
#define RSET_EMPTY(set) (RSET_SIZE(set) == 0)
#define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set))
-#define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item))
+#define RSET_IS_MEMBER(set, item) set_table_lookup(RSET_TABLE(set), (st_data_t)(item))
#define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash)
struct set_object {
@@ -121,6 +124,14 @@ struct set_object {
};
static int
+mark_and_pin_key(st_data_t key, st_data_t data)
+{
+ rb_gc_mark((VALUE)key);
+
+ return ST_CONTINUE;
+}
+
+static int
mark_key(st_data_t key, st_data_t data)
{
rb_gc_mark_movable((VALUE)key);
@@ -132,22 +143,21 @@ static void
set_mark(void *ptr)
{
struct set_object *sobj = ptr;
- if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0);
-}
-
-static void
-set_free_embedded(struct set_object *sobj)
-{
- free((&sobj->table)->bins);
- free((&sobj->table)->entries);
+ if (sobj->table.entries) {
+ if (sobj->table.type == &identhash) {
+ set_table_foreach(&sobj->table, mark_and_pin_key, 0);
+ }
+ else {
+ set_table_foreach(&sobj->table, mark_key, 0);
+ }
+ }
}
static void
set_free(void *ptr)
{
struct set_object *sobj = ptr;
- set_free_embedded(sobj);
- memset(&sobj->table, 0, sizeof(sobj->table));
+ set_free_embedded_table(&sobj->table);
}
static size_t
@@ -171,9 +181,7 @@ set_foreach_replace(st_data_t key, st_data_t argp, int error)
static int
set_replace_ref(st_data_t *key, st_data_t argp, int existing)
{
- if (rb_gc_location((VALUE)*key) != (VALUE)*key) {
- *key = rb_gc_location((VALUE)*key);
- }
+ rb_gc_mark_and_move((VALUE *)key);
return ST_CONTINUE;
}
@@ -369,11 +377,10 @@ set_compact_after_delete(VALUE set)
}
static int
-set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr)
+set_table_insert_wb(set_table *tab, VALUE set, VALUE key)
{
if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) {
key = rb_hash_key_str(key);
- if (key_addr) *key_addr = key;
}
int ret = set_insert(tab, (st_data_t)key);
if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key);
@@ -381,9 +388,9 @@ set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr)
}
static int
-set_insert_wb(VALUE set, VALUE key, VALUE *key_addr)
+set_insert_wb(VALUE set, VALUE key)
{
- return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr);
+ return set_table_insert_wb(RSET_TABLE(set), set, key);
}
static VALUE
@@ -405,6 +412,13 @@ set_s_alloc(VALUE klass)
return set_alloc_with_size(klass, 0);
}
+/*
+ * call-seq:
+ * Set[*objects] -> new_set
+ *
+ * Returns a new Set object populated with the given objects,
+ * See Set::new.
+ */
static VALUE
set_s_create(int argc, VALUE *argv, VALUE klass)
{
@@ -413,12 +427,25 @@ set_s_create(int argc, VALUE *argv, VALUE klass)
int i;
for (i=0; i < argc; i++) {
- set_table_insert_wb(table, set, argv[i], NULL);
+ set_table_insert_wb(table, set, argv[i]);
}
return set;
}
+static VALUE
+set_s_inherited(VALUE klass, VALUE subclass)
+{
+ if (klass == rb_cSet) {
+ // When subclassing directly from Set, include the compatibility layer
+ rb_require("set/subclass_compatible.rb");
+ VALUE subclass_compatible = rb_const_get(klass, id_subclass_compatible);
+ rb_include_module(subclass, subclass_compatible);
+ rb_extend_object(subclass, rb_const_get(subclass_compatible, id_class_methods));
+ }
+ return Qnil;
+}
+
static void
check_set(VALUE arg)
{
@@ -451,7 +478,7 @@ static VALUE
set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
{
VALUE element = i;
- set_insert_wb(set, element, &element);
+ set_insert_wb(set, element);
return element;
}
@@ -459,27 +486,41 @@ static VALUE
set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
{
VALUE element = rb_yield(i);
- set_insert_wb(set, element, &element);
+ set_insert_wb(set, element);
return element;
}
/*
- * call-seq:
- * Set.new -> new_set
- * Set.new(enum) -> new_set
- * Set.new(enum) { |elem| ... } -> new_set
+ * call-seq:
+ * Set.new(object = nil) -> new_set
+ * Set.new(object = nil) {|element| ... } -> new_set
+ *
+ * Returns a new \Set object based on the given +object+,
+ * which must be an Enumerable or +nil+.
*
- * Creates a new set containing the elements of the given enumerable
- * object.
+ * With argument +object+ given as +nil+,
+ * returns a new empty \Set object:
*
- * If a block is given, the elements of enum are preprocessed by the
- * given block.
+ * Set.new # => Set[]
+ * Set.new { fail 'Cannot happen' } # => Set[] # Block not called.
+ *
+ * With no block given and +object+ given as an Enumerable,
+ * populates the new set with the elements of +object+:
+ *
+ * Set.new(%w[ a b c ]) # => Set["a", "b", "c"]
+ * Set.new({foo: 0, bar: 1}) # => Set[[:foo, 0], [:bar, 1]]
+ * Set.new(4..10) # => Set[4, 5, 6, 7, 8, 9, 10]
+ * Set.new(Dir.new('lib')).take(5)
+ * # => [".", "..", "bundled_gems.rb", "bundler", "bundler.rb"]
+ * Set.new(File.new('doc/NEWS/NEWS-4.0.0.md')).take(3)
+ * # => ["# NEWS for Ruby 4.0.0\n", "\n", "This document is a list of user-visible feature changes\n"]
+ *
+ * With a block given and +object+ given as an Enumerable,
+ * calls the block with each element of +object+;
+ * adds the block's return value to the new set:
+ *
+ * Set.new(4..10) {|i| i * 2 } # => Set[8, 10, 12, 14, 16, 18, 20]
*
- * Set.new([1, 2]) #=> #<Set: {1, 2}>
- * Set.new([1, 2, 1]) #=> #<Set: {1, 2}>
- * Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}>
- * Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}>
- * Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}>
*/
static VALUE
set_i_initialize(int argc, VALUE *argv, VALUE set)
@@ -494,18 +535,13 @@ set_i_initialize(int argc, VALUE *argv, VALUE set)
if (argc > 0 && (other = argv[0]) != Qnil) {
if (RB_TYPE_P(other, T_ARRAY)) {
- long len = RARRAY_LEN(other);
- if (RARRAY_LEN(other) != 0) {
- set_table *into = RSET_TABLE(set);
- VALUE key;
- int block_given = rb_block_given_p();
- RARRAY_PTR_USE(other, ptr, {
- for(; len > 0; len--, ptr++) {
- key = *ptr;
- if (block_given) key = rb_yield(key);
- set_table_insert_wb(into, set, key, NULL);
- }
- });
+ long i;
+ int block_given = rb_block_given_p();
+ set_table *into = RSET_TABLE(set);
+ for (i=0; i<RARRAY_LEN(other); i++) {
+ VALUE key = RARRAY_AREF(other, i);
+ if (block_given) key = rb_yield(key);
+ set_table_insert_wb(into, set, key);
}
}
else {
@@ -518,6 +554,7 @@ set_i_initialize(int argc, VALUE *argv, VALUE set)
return set;
}
+/* :nodoc: */
static VALUE
set_i_initialize_copy(VALUE set, VALUE other)
{
@@ -530,8 +567,9 @@ set_i_initialize_copy(VALUE set, VALUE other)
struct set_object *sobj;
TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
- set_free_embedded(sobj);
+ set_free_embedded_table(&sobj->table);
set_copy(&sobj->table, RSET_TABLE(other));
+ rb_gc_writebarrier_remember(set);
return set;
}
@@ -539,10 +577,14 @@ set_i_initialize_copy(VALUE set, VALUE other)
static int
set_inspect_i(st_data_t key, st_data_t arg)
{
- VALUE str = (VALUE)arg;
- if (RSTRING_LEN(str) > 8) {
+ VALUE *args = (VALUE*)arg;
+ VALUE str = args[0];
+ if (args[1] == Qtrue) {
rb_str_buf_cat_ascii(str, ", ");
}
+ else {
+ args[1] = Qtrue;
+ }
rb_str_buf_append(str, rb_inspect((VALUE)key));
return ST_CONTINUE;
@@ -552,11 +594,17 @@ static VALUE
set_inspect(VALUE set, VALUE dummy, int recur)
{
VALUE str;
+ VALUE klass_name = rb_class_path(CLASS_OF(set));
+
+ if (recur) {
+ str = rb_sprintf("%"PRIsVALUE"[...]", klass_name);
+ return rb_str_export_to_enc(str, rb_usascii_encoding());
+ }
- if (recur) return rb_usascii_str_new2("#<Set: {...}>");
- str = rb_str_buf_new2("#<Set: {");
- set_iter(set, set_inspect_i, str);
- rb_str_buf_cat2(str, "}>");
+ str = rb_sprintf("%"PRIsVALUE"[", klass_name);
+ VALUE args[2] = {str, Qfalse};
+ set_iter(set, set_inspect_i, (st_data_t)args);
+ rb_str_buf_cat2(str, "]");
return str;
}
@@ -568,11 +616,11 @@ set_inspect(VALUE set, VALUE dummy, int recur)
* Returns a new string containing the set entries:
*
* s = Set.new
- * s.inspect # => "#<Set: {}>"
+ * s.inspect # => "Set[]"
* s.add(1)
- * s.inspect # => "#<Set: {1}>"
+ * s.inspect # => "Set[1]"
* s.add(2)
- * s.inspect # => "#<Set: {1, 2}>"
+ * s.inspect # => "Set[1, 2]"
*
* Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting].
*/
@@ -621,35 +669,19 @@ set_i_to_a(VALUE set)
/*
* call-seq:
- * to_set(klass = Set, *args, &block) -> self or new_set
+ * to_set(&block) -> self or new_set
*
- * Returns self if receiver is an instance of +Set+ and no arguments or
- * block are given. Otherwise, converts the set to another with
- * <tt>klass.new(self, *args, &block)</tt>.
- *
- * In subclasses, returns `klass.new(self, *args, &block)` unless overridden.
+ * Without a block, if +self+ is an instance of +Set+, returns +self+.
+ * Otherwise, calls <tt>Set.new(self, &block)</tt>.
*/
static VALUE
-set_i_to_set(int argc, VALUE *argv, VALUE set)
+set_i_to_set(VALUE set)
{
- VALUE klass;
-
- if (argc == 0) {
- klass = rb_cSet;
- argv = &set;
- argc = 1;
- }
- else {
- klass = argv[0];
- argv[0] = set;
- }
-
- if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) &&
- argc == 1 && !rb_block_given_p()) {
+ if (rb_obj_is_instance_of(set, rb_cSet) && !rb_block_given_p()) {
return set;
}
- return rb_funcall_passing_block(klass, id_new, argc, argv);
+ return rb_funcall_passing_block(rb_cSet, id_new, 1, &set);
}
/*
@@ -669,24 +701,24 @@ set_i_join(int argc, VALUE *argv, VALUE set)
* call-seq:
* add(obj) -> self
*
- * Adds the given object to the set and returns self. Use `merge` to
+ * Adds the given object to the set and returns self. Use Set#merge to
* add many elements at once.
*
- * Set[1, 2].add(3) #=> #<Set: {1, 2, 3}>
- * Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
- * Set[1, 2].add(2) #=> #<Set: {1, 2}>
+ * Set[1, 2].add(3) #=> Set[1, 2, 3]
+ * Set[1, 2].add([3, 4]) #=> Set[1, 2, [3, 4]]
+ * Set[1, 2].add(2) #=> Set[1, 2]
*/
static VALUE
set_i_add(VALUE set, VALUE item)
{
rb_check_frozen(set);
if (set_iterating_p(set)) {
- if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
+ if (!set_table_lookup(RSET_TABLE(set), (st_data_t)item)) {
no_new_item();
}
}
else {
- set_insert_wb(set, item, NULL);
+ set_insert_wb(set, item);
}
return set;
}
@@ -698,8 +730,8 @@ set_i_add(VALUE set, VALUE item)
* Adds the given object to the set and returns self. If the object is
* already in the set, returns nil.
*
- * Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}>
- * Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
+ * Set[1, 2].add?(3) #=> Set[1, 2, 3]
+ * Set[1, 2].add?([3, 4]) #=> Set[1, 2, [3, 4]]
* Set[1, 2].add?(2) #=> nil
*/
static VALUE
@@ -707,13 +739,13 @@ set_i_add_p(VALUE set, VALUE item)
{
rb_check_frozen(set);
if (set_iterating_p(set)) {
- if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
+ if (!set_table_lookup(RSET_TABLE(set), (st_data_t)item)) {
no_new_item();
}
return Qnil;
}
else {
- return set_insert_wb(set, item, NULL) ? Qnil : set;
+ return set_insert_wb(set, item) ? Qnil : set;
}
}
@@ -728,7 +760,7 @@ static VALUE
set_i_delete(VALUE set, VALUE item)
{
rb_check_frozen(set);
- if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
+ if (set_table_delete(RSET_TABLE(set), (st_data_t *)&item)) {
set_compact_after_delete(set);
}
return set;
@@ -745,7 +777,7 @@ static VALUE
set_i_delete_p(VALUE set, VALUE item)
{
rb_check_frozen(set);
- if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
+ if (set_table_delete(RSET_TABLE(set), (st_data_t *)&item)) {
set_compact_after_delete(set);
return set;
}
@@ -828,9 +860,9 @@ set_classify_i(st_data_t key, st_data_t tmp)
*
* files = Set.new(Dir.glob("*.rb"))
* hash = files.classify { |f| File.mtime(f).year }
- * hash #=> {2000 => #<Set: {"a.rb", "b.rb"}>,
- * # 2001 => #<Set: {"c.rb", "d.rb", "e.rb"}>,
- * # 2002 => #<Set: {"f.rb"}>}
+ * hash #=> {2000 => Set["a.rb", "b.rb"],
+ * # 2001 => Set["c.rb", "d.rb", "e.rb"],
+ * # 2002 => Set["f.rb"]}
*
* Returns an enumerator if no block is given.
*/
@@ -845,66 +877,72 @@ set_i_classify(VALUE set)
return args[0];
}
-struct set_divide_args {
- VALUE self;
- VALUE set_class;
- VALUE final_set;
- VALUE hash;
- VALUE current_set;
- VALUE current_item;
- unsigned long ni;
- unsigned long nj;
-};
+// Union-find with path compression
+static long
+set_divide_union_find_root(long *uf_parents, long index, long *tmp_array)
+{
+ long root = uf_parents[index];
+ long update_size = 0;
+ while (root != index) {
+ tmp_array[update_size++] = index;
+ index = root;
+ root = uf_parents[index];
+ }
+ for (long j = 0; j < update_size; j++) {
+ long idx = tmp_array[j];
+ uf_parents[idx] = root;
+ }
+ return root;
+}
-static VALUE
-set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg))
-{
- struct set_divide_args *args = (struct set_divide_args *)arg;
- if (args->nj > args->ni) {
- VALUE i = args->current_item;
- if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) {
- VALUE hash = args->hash;
- if (args->current_set == Qnil) {
- VALUE set = rb_hash_aref(hash, j);
- if (set == Qnil) {
- VALUE both[2] = {i, j};
- set = set_s_create(2, both, args->set_class);
- rb_hash_aset(hash, i, set);
- rb_hash_aset(hash, j, set);
- set_i_add(args->final_set, set);
- }
- else {
- set_i_add(set, i);
- rb_hash_aset(hash, i, set);
- }
- args->current_set = set;
- }
- else {
- set_i_add(args->current_set, j);
- rb_hash_aset(hash, j, args->current_set);
+static void
+set_divide_union_find_merge(long *uf_parents, long i, long j, long *tmp_array)
+{
+ long root_i = set_divide_union_find_root(uf_parents, i, tmp_array);
+ long root_j = set_divide_union_find_root(uf_parents, j, tmp_array);
+ if (root_i != root_j) uf_parents[root_j] = root_i;
+}
+
+static VALUE
+set_divide_arity2(VALUE set)
+{
+ VALUE tmp, uf;
+ long size, *uf_parents, *tmp_array;
+ VALUE set_class = rb_obj_class(set);
+ VALUE items = set_i_to_a(set);
+ rb_ary_freeze(items);
+ size = RARRAY_LEN(items);
+ tmp_array = ALLOCV_N(long, tmp, size);
+ uf_parents = ALLOCV_N(long, uf, size);
+ for (long i = 0; i < size; i++) {
+ uf_parents[i] = i;
+ }
+ for (long i = 0; i < size - 1; i++) {
+ VALUE item1 = RARRAY_AREF(items, i);
+ for (long j = i + 1; j < size; j++) {
+ VALUE item2 = RARRAY_AREF(items, j);
+ if (RTEST(rb_yield_values(2, item1, item2)) &&
+ RTEST(rb_yield_values(2, item2, item1))) {
+ set_divide_union_find_merge(uf_parents, i, j, tmp_array);
}
}
}
- args->nj++;
- return j;
-}
-
-static VALUE
-set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg))
-{
- struct set_divide_args *args = (struct set_divide_args *)arg;
- VALUE hash = args->hash;
- args->current_set = rb_hash_aref(hash, i);
- args->current_item = i;
- args->nj = 0;
- rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg);
- if (args->current_set == Qnil) {
- VALUE set = set_s_create(1, &i, args->set_class);
- rb_hash_aset(hash, i, set);
- set_i_add(args->final_set, set);
+ VALUE final_set = set_s_create(0, 0, rb_cSet);
+ VALUE hash = rb_hash_new();
+ for (long i = 0; i < size; i++) {
+ VALUE v = RARRAY_AREF(items, i);
+ long root = set_divide_union_find_root(uf_parents, i, tmp_array);
+ VALUE set = rb_hash_aref(hash, LONG2FIX(root));
+ if (set == Qnil) {
+ set = set_s_create(0, 0, set_class);
+ rb_hash_aset(hash, LONG2FIX(root), set);
+ set_i_add(final_set, set);
+ }
+ set_i_add(set, v);
}
- args->ni++;
- return i;
+ ALLOCV_END(tmp);
+ ALLOCV_END(uf);
+ return final_set;
}
static void set_merge_enum_into(VALUE set, VALUE arg);
@@ -925,10 +963,10 @@ static void set_merge_enum_into(VALUE set, VALUE arg);
*
* numbers = Set[1, 3, 4, 6, 9, 10, 11]
* set = numbers.divide { |i,j| (i - j).abs == 1 }
- * set #=> #<Set: {#<Set: {1}>,
- * # #<Set: {3, 4}>,
- * # #<Set: {6}>}>
- * # #<Set: {9, 10, 11}>,
+ * set #=> Set[Set[1],
+ * # Set[3, 4],
+ * # Set[6],
+ * # Set[9, 10, 11]]
*
* Returns an enumerator if no block is given.
*/
@@ -938,19 +976,7 @@ set_i_divide(VALUE set)
RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
if (rb_block_arity() == 2) {
- VALUE final_set = set_s_create(0, 0, rb_cSet);
- struct set_divide_args args = {
- .self = set,
- .set_class = rb_obj_class(set),
- .final_set = final_set,
- .hash = rb_hash_new(),
- .current_set = 0,
- .current_item = 0,
- .ni = 0,
- .nj = 0
- };
- rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args);
- return final_set;
+ return set_divide_arity2(set);
}
VALUE values = rb_hash_values(set_i_classify(set));
@@ -971,9 +997,9 @@ set_clear_i(st_data_t key, st_data_t dummy)
*
* Removes all elements and returns self.
*
- * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
- * set.clear #=> #<Set: {}>
- * set #=> #<Set: {}>
+ * set = Set[1, 'c', :s] #=> Set[1, "c", :s]
+ * set.clear #=> Set[]
+ * set #=> Set[]
*/
static VALUE
set_i_clear(VALUE set)
@@ -984,7 +1010,7 @@ set_i_clear(VALUE set)
set_iter(set, set_clear_i, 0);
}
else {
- set_clear(RSET_TABLE(set));
+ set_table_clear(RSET_TABLE(set));
set_compact_after_delete(set);
}
return set;
@@ -1000,8 +1026,8 @@ static int
set_intersection_i(st_data_t key, st_data_t tmp)
{
struct set_intersection_data *data = (struct set_intersection_data *)tmp;
- if (set_lookup(data->other, key)) {
- set_table_insert_wb(data->into, data->set, key, NULL);
+ if (set_table_lookup(data->other, key)) {
+ set_table_insert_wb(data->into, data->set, key);
}
return ST_CONTINUE;
@@ -1021,8 +1047,8 @@ set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data))
* Returns a new set containing elements common to the set and the given
* enumerable object.
*
- * Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}>
- * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}>
+ * Set[1, 3, 5] & Set[3, 2, 1] #=> Set[3, 1]
+ * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> Set["a", "b"]
*/
static VALUE
set_i_intersection(VALUE set, VALUE other)
@@ -1097,7 +1123,7 @@ static int
set_merge_i(st_data_t key, st_data_t data)
{
struct set_merge_args *args = (struct set_merge_args *)data;
- set_table_insert_wb(args->into, args->set, key, NULL);
+ set_table_insert_wb(args->into, args->set, key);
return ST_CONTINUE;
}
@@ -1105,7 +1131,7 @@ static VALUE
set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
{
VALUE element = key;
- set_insert_wb(set, element, &element);
+ set_insert_wb(set, element);
return element;
}
@@ -1120,14 +1146,10 @@ set_merge_enum_into(VALUE set, VALUE arg)
set_iter(arg, set_merge_i, (st_data_t)&args);
}
else if (RB_TYPE_P(arg, T_ARRAY)) {
- long len = RARRAY_LEN(arg);
- if (RARRAY_LEN(arg) != 0) {
- set_table *into = RSET_TABLE(set);
- RARRAY_PTR_USE(arg, ptr, {
- for(; len > 0; len--, ptr++) {
- set_table_insert_wb(into, set, *ptr, NULL);
- }
- });
+ long i;
+ set_table *into = RSET_TABLE(set);
+ for (i=0; i<RARRAY_LEN(arg); i++) {
+ set_table_insert_wb(into, set, RARRAY_AREF(arg, i));
}
}
else {
@@ -1148,6 +1170,11 @@ set_i_merge(int argc, VALUE *argv, VALUE set)
if (rb_keyword_given_p()) {
rb_raise(rb_eArgError, "no keywords accepted");
}
+
+ if (set_iterating_p(set)) {
+ rb_raise(rb_eRuntimeError, "cannot add to set during iteration");
+ }
+
rb_check_frozen(set);
int i;
@@ -1176,9 +1203,9 @@ set_reset_table_with_type(VALUE set, const struct st_hash_type *type)
.into = new
};
set_iter(set, set_merge_i, (st_data_t)&args);
- set_free_embedded(sobj);
+ set_free_embedded_table(&sobj->table);
memcpy(&sobj->table, new, sizeof(*new));
- free(new);
+ SIZED_FREE(new);
}
else {
sobj->table.type = type;
@@ -1248,8 +1275,8 @@ set_xor_i(st_data_t key, st_data_t data)
VALUE element = (VALUE)key;
VALUE set = (VALUE)data;
set_table *table = RSET_TABLE(set);
- if (set_table_insert_wb(table, set, element, &element)) {
- set_delete(table, &element);
+ if (set_table_insert_wb(table, set, element)) {
+ set_table_delete(table, &element);
}
return ST_CONTINUE;
}
@@ -1262,21 +1289,23 @@ set_xor_i(st_data_t key, st_data_t data)
* given enumerable object. <tt>(set ^ enum)</tt> is equivalent to
* <tt>((set | enum) - (set & enum))</tt>.
*
- * Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}>
- * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}>
+ * Set[1, 2] ^ Set[2, 3] #=> Set[3, 1]
+ * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> Set["d", 1, "c"]
*/
static VALUE
set_i_xor(VALUE set, VALUE other)
{
- VALUE new_set;
+ VALUE new_set = rb_obj_dup(set);
+
if (rb_obj_is_kind_of(other, rb_cSet)) {
- new_set = other;
+ set_iter(other, set_xor_i, (st_data_t)new_set);
}
else {
- new_set = set_s_alloc(rb_obj_class(set));
- set_merge_enum_into(new_set, other);
+ VALUE tmp = set_s_alloc(rb_cSet);
+ set_merge_enum_into(tmp, other);
+ set_iter(tmp, set_xor_i, (st_data_t)new_set);
}
- set_iter(set, set_xor_i, (st_data_t)new_set);
+
return new_set;
}
@@ -1287,8 +1316,8 @@ set_i_xor(VALUE set, VALUE other)
* Returns a new set built by merging the set and the elements of the
* given enumerable object.
*
- * Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}>
- * Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}>
+ * Set[1, 2, 3] | Set[2, 4, 5] #=> Set[1, 2, 3, 4, 5]
+ * Set[1, 5, 'z'] | (1..6) #=> Set[1, 5, "z", 2, 3, 4, 6]
*/
static VALUE
set_i_union(VALUE set, VALUE other)
@@ -1301,7 +1330,7 @@ set_i_union(VALUE set, VALUE other)
static int
set_remove_i(st_data_t key, st_data_t from)
{
- set_delete((struct set_table *)from, (st_data_t *)&key);
+ set_table_delete((struct set_table *)from, (st_data_t *)&key);
return ST_CONTINUE;
}
@@ -1309,7 +1338,7 @@ static VALUE
set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
{
rb_check_frozen(set);
- set_delete(RSET_TABLE(set), (st_data_t *)&key);
+ set_table_delete(RSET_TABLE(set), (st_data_t *)&key);
return key;
}
@@ -1346,8 +1375,8 @@ set_i_subtract(VALUE set, VALUE other)
* Returns a new set built by duplicating the set, removing every
* element that appears in the given enumerable object.
*
- * Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}>
- * Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}>
+ * Set[1, 3, 5] - Set[1, 5] #=> Set[3]
+ * Set['a', 'b', 'z'] - ['a', 'c'] #=> Set["b", "z"]
*/
static VALUE
set_i_difference(VALUE set, VALUE other)
@@ -1382,7 +1411,7 @@ set_i_each(VALUE set)
static int
set_collect_i(st_data_t key, st_data_t data)
{
- set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL);
+ set_insert_wb((VALUE)data, rb_yield((VALUE)key));
return ST_CONTINUE;
}
@@ -1411,7 +1440,7 @@ static int
set_keep_if_i(st_data_t key, st_data_t into)
{
if (!RTEST(rb_yield((VALUE)key))) {
- set_delete((set_table *)into, &key);
+ set_table_delete((set_table *)into, &key);
}
return ST_CONTINUE;
}
@@ -1463,9 +1492,9 @@ set_i_select(VALUE set)
* Replaces the contents of the set with the contents of the given
* enumerable object and returns self.
*
- * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
- * set.replace([1, 2]) #=> #<Set: {1, 2}>
- * set #=> #<Set: {1, 2}>
+ * set = Set[1, 'c', :s] #=> Set[1, "c", :s]
+ * set.replace([1, 2]) #=> Set[1, 2]
+ * set #=> Set[1, 2]
*/
static VALUE
set_i_replace(VALUE set, VALUE other)
@@ -1483,7 +1512,7 @@ set_i_replace(VALUE set, VALUE other)
// make sure enum is enumerable before calling clear
enum_method_id(other);
- set_clear(RSET_TABLE(set));
+ set_table_clear(RSET_TABLE(set));
set_merge_enum_into(set, other);
}
@@ -1594,7 +1623,7 @@ static int
set_le_i(st_data_t key, st_data_t arg)
{
struct set_subset_data *data = (struct set_subset_data *)arg;
- if (set_lookup(data->table, key)) return ST_CONTINUE;
+ if (set_table_lookup(data->table, key)) return ST_CONTINUE;
data->result = Qfalse;
return ST_STOP;
}
@@ -1670,7 +1699,7 @@ static int
set_intersect_i(st_data_t key, st_data_t arg)
{
VALUE *args = (VALUE *)arg;
- if (set_lookup((set_table *)args[0], key)) {
+ if (set_table_lookup((set_table *)args[0], key)) {
args[1] = Qtrue;
return ST_STOP;
}
@@ -1741,7 +1770,7 @@ set_i_disjoint(VALUE set, VALUE other)
* set <=> other -> -1, 0, 1, or nil
*
* Returns 0 if the set are equal, -1 / 1 if the set is a
- * proper subset / superset of the given set, or or nil if
+ * proper subset / superset of the given set, or nil if
* they both have unique elements.
*/
static VALUE
@@ -1779,7 +1808,7 @@ set_eql_i(st_data_t item, st_data_t arg)
{
struct set_equal_data *data = (struct set_equal_data *)arg;
- if (!set_lookup(RSET_TABLE(data->set), item)) {
+ if (!set_table_lookup(RSET_TABLE(data->set), item)) {
data->result = Qfalse;
return ST_STOP;
}
@@ -1910,24 +1939,67 @@ compat_loader(VALUE self, VALUE a)
return set_i_from_hash(self, rb_ivar_get(a, id_i_hash));
}
+/* C-API functions */
+
+void
+rb_set_foreach(VALUE set, int (*func)(VALUE element, VALUE arg), VALUE arg)
+{
+ set_iter(set, func, arg);
+}
+
+VALUE
+rb_set_new(void)
+{
+ return set_alloc_with_size(rb_cSet, 0);
+}
+
+VALUE
+rb_set_new_capa(size_t capa)
+{
+ return set_alloc_with_size(rb_cSet, (st_index_t)capa);
+}
+
+bool
+rb_set_lookup(VALUE set, VALUE element)
+{
+ return RSET_IS_MEMBER(set, element);
+}
+
+bool
+rb_set_add(VALUE set, VALUE element)
+{
+ return set_i_add_p(set, element) != Qnil;
+}
+
+VALUE
+rb_set_clear(VALUE set)
+{
+ return set_i_clear(set);
+}
+
+bool
+rb_set_delete(VALUE set, VALUE element)
+{
+ return set_i_delete_p(set, element) != Qnil;
+}
+
+size_t
+rb_set_size(VALUE set)
+{
+ return RSET_SIZE(set);
+}
+
/*
* Document-class: Set
*
- * Copyright (c) 2002-2024 Akinori MUSHA <knu@iDaemons.org>
- *
- * Documentation by Akinori MUSHA and Gavin Sinclair.
- *
- * All rights reserved. You can redistribute and/or modify it under the same
- * terms as Ruby.
- *
* The Set class implements a collection of unordered values with no
* duplicates. It is a hybrid of Array's intuitive inter-operation
* facilities and Hash's fast lookup.
*
- * Set is easy to use with Enumerable objects (implementing `each`).
+ * Set is easy to use with Enumerable objects (implementing #each).
* Most of the initializer methods and binary operators accept generic
* Enumerable objects besides sets and arrays. An Enumerable object
- * can be converted to Set using the `to_set` method.
+ * can be converted to Set using the +to_set+ method.
*
* Set uses a data structure similar to Hash for storage, except that
* it only has keys and no values.
@@ -1951,11 +2023,11 @@ compat_loader(VALUE self, VALUE a)
*
* == Example
*
- * s1 = Set[1, 2] #=> #<Set: {1, 2}>
- * s2 = [1, 2].to_set #=> #<Set: {1, 2}>
+ * s1 = Set[1, 2] #=> Set[1, 2]
+ * s2 = [1, 2].to_set #=> Set[1, 2]
* s1 == s2 #=> true
- * s1.add("foo") #=> #<Set: {1, 2, "foo"}>
- * s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}>
+ * s1.add("foo") #=> Set[1, 2, "foo"]
+ * s1.merge([2, 6]) #=> Set[1, 2, "foo", 6]
* s1.subset?(s2) #=> false
* s2.subset?(s1) #=> true
*
@@ -1963,12 +2035,42 @@ compat_loader(VALUE self, VALUE a)
*
* - Akinori MUSHA <knu@iDaemons.org> (current maintainer)
*
- * == What's Here
+ * == Inheriting from \Set
+ *
+ * Before Ruby 4.0 (released December 2025), \Set had a different, less
+ * efficient implementation. It was reimplemented in C, and the behavior
+ * of some of the core methods were adjusted.
+ *
+ * To keep backward compatibility, when a class is inherited from \Set,
+ * additional module +Set::SubclassCompatible+ is included, which makes
+ * the inherited class behavior, as well as internal method names,
+ * closer to what it was before Ruby 4.0.
*
- * First, what's elsewhere. \Class \Set:
+ * It can be easily seen, for example, in the #inspect method behavior:
+ *
+ * p Set[1, 2, 3]
+ * # prints "Set[1, 2, 3]"
+ *
+ * class MySet < Set
+ * end
+ * p MySet[1, 2, 3]
+ * # prints "#<MySet: {1, 2, 3}>", like it was in Ruby 3.4
*
- * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here].
- * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here],
+ * For new code, if backward compatibility is not necessary,
+ * it is recommended to instead inherit from +Set::CoreSet+, which
+ * avoids including the "compatibility" layer:
+ *
+ * class MyCoreSet < Set::CoreSet
+ * end
+ * p MyCoreSet[1, 2, 3]
+ * # prints "MyCoreSet[1, 2, 3]"
+ *
+ * == Set's methods
+ *
+ * First, what's elsewhere. \Class \Set:
+ *
+ * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here].
+ * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here],
* which provides dozens of additional methods.
*
* In particular, class \Set does not have many methods of its own
@@ -1977,16 +2079,15 @@ compat_loader(VALUE self, VALUE a)
*
* Here, class \Set provides methods that are useful for:
*
- * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array]
- * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set]
- * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations]
- * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing]
- * - {Querying}[rdoc-ref:Array@Methods+for+Querying]
- * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning]
- * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting]
- * - {Converting}[rdoc-ref:Array@Methods+for+Converting]
- * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating]
- * - {And more....}[rdoc-ref:Array@Other+Methods]
+ * - {Creating a Set}[rdoc-ref:Set@Methods+for+Creating+a+Set]
+ * - {Set Operations}[rdoc-ref:Set@Methods+for+Set+Operations]
+ * - {Comparing}[rdoc-ref:Set@Methods+for+Comparing]
+ * - {Querying}[rdoc-ref:Set@Methods+for+Querying]
+ * - {Assigning}[rdoc-ref:Set@Methods+for+Assigning]
+ * - {Deleting}[rdoc-ref:Set@Methods+for+Deleting]
+ * - {Converting}[rdoc-ref:Set@Methods+for+Converting]
+ * - {Iterating}[rdoc-ref:Set@Methods+for+Iterating]
+ * - {And more....}[rdoc-ref:Set@Other+Methods]
*
* === Methods for Creating a \Set
*
@@ -2129,6 +2230,8 @@ Init_Set(void)
id_any_p = rb_intern_const("any?");
id_new = rb_intern_const("new");
id_i_hash = rb_intern_const("@hash");
+ id_subclass_compatible = rb_intern_const("SubclassCompatible");
+ id_class_methods = rb_intern_const("ClassMethods");
id_set_iter_lev = rb_make_internal_id();
rb_define_alloc_func(rb_cSet, set_s_alloc);
@@ -2193,12 +2296,16 @@ Init_Set(void)
rb_define_method(rb_cSet, "superset?", set_i_superset, 1);
rb_define_alias(rb_cSet, ">=", "superset?");
rb_define_method(rb_cSet, "to_a", set_i_to_a, 0);
- rb_define_method(rb_cSet, "to_h", set_i_to_h, 0);
- rb_define_method(rb_cSet, "to_set", set_i_to_set, -1);
+ rb_define_method(rb_cSet, "to_set", set_i_to_set, 0);
/* :nodoc: */
VALUE compat = rb_define_class_under(rb_cSet, "compatible", rb_cObject);
rb_marshal_define_compat(rb_cSet, compat, compat_dumper, compat_loader);
+ // Create Set::CoreSet before defining inherited, so it does not include
+ // the backwards compatibility layer.
+ rb_define_class_under(rb_cSet, "CoreSet", rb_cSet);
+ rb_define_private_method(rb_singleton_class(rb_cSet), "inherited", set_s_inherited, 1);
+
rb_provide("set.rb");
}