# frozen_string_literal: true # :markup: markdown # # set.rb - defines the Set class # # Copyright (c) 2002-2020 Akinori MUSHA # # Documentation by Akinori MUSHA and Gavin Sinclair. # # All rights reserved. You can redistribute and/or modify it under the same # terms as Ruby. ## # This library provides the Set class, which deals with a collection # of unordered values with no duplicates. It is a hybrid of Array's # intuitive inter-operation facilities and Hash's fast lookup. # # The method `to_set` is added to Enumerable for convenience. # # Set implements a collection of unordered values with no duplicates. # This is a hybrid of Array's intuitive inter-operation facilities and # Hash's fast lookup. # # Set is easy to use with Enumerable objects (implementing `each`). # Most of the initializer methods and binary operators accept generic # Enumerable objects besides sets and arrays. An Enumerable object # can be converted to Set using the `to_set` method. # # Set uses Hash as storage, so you must note the following points: # # * Equality of elements is determined according to Object#eql? and # Object#hash. Use Set#compare_by_identity to make a set compare # its elements by their identity. # * Set assumes that the identity of each element does not change # while it is stored. Modifying an element of a set will render the # set to an unreliable state. # * When a string is to be stored, a frozen copy of the string is # stored instead unless the original string is already frozen. # # ## Comparison # # The comparison operators `<`, `>`, `<=`, and `>=` are implemented as # shorthand for the {proper_,}{subset?,superset?} methods. The `<=>` # operator reflects this order, or return `nil` for sets that both # have distinct elements (`{x, y}` vs. `{x, z}` for example). # # ## Example # # ```ruby # require 'set' # s1 = Set[1, 2] #=> # # s2 = [1, 2].to_set #=> # # s1 == s2 #=> true # s1.add("foo") #=> # # s1.merge([2, 6]) #=> # # s1.subset?(s2) #=> false # s2.subset?(s1) #=> true # ``` # # ## Contact # # - Akinori MUSHA <> (current maintainer) # # ## What's Here # # First, what's elsewhere. \Set includes the module # {Enumerable}[rdoc-ref:Enumerable], # which provides dozens of additional methods. # # In particular, class \Set does not have many methods of its own # for fetching or for iterating. # Instead, it relies on those in \Enumerable. # # Here, class \Set provides methods that are useful for: # # - [Creating a Set](#class-Set-label-Methods+for+Creating+a+Set) # - [Set Operations](#class-Set-label-Methods+for+Set+Operations) # - [Comparing](#class-Set-label-Methods+for+Comparing) # - [Querying](#class-Set-label-Methods+for+Querying) # - [Assigning](#class-Set-label-Methods+for+Assigning) # - [Deleting](#class-Set-label-Methods+for+Deleting) # - [Converting](#class-Set-label-Methods+for+Converting) # - [Iterating](#class-Set-label-Methods+for+Iterating) # - [And more....](#class-Set-label-Other+Methods) # # ### Methods for Creating a \Set # # - ::[] - # Returns a new set containing the given objects. # - ::new - # Returns a new set containing either the given objects # (if no block given) or the return values from the called block # (if a block given). # # ### Methods for \Set Operations # # - [|](#method-i-7C) (aliased as #union and #+) - # Returns a new set containing all elements from +self+ # and all elements from a given enumerable (no duplicates). # - [&](#method-i-26) (aliased as #intersection) - # Returns a new set containing all elements common to +self+ # and a given enumerable. # - [-](#method-i-2D) (aliased as #difference) - # Returns a copy of +self+ with all elements # in a given enumerable removed. # - [\^](#method-i-5E) - # Returns a new set containing all elements from +self+ # and a given enumerable except those common to both. # # ### Methods for Comparing # # - [<=>](#method-i-3C-3D-3E) - # Returns -1, 0, or 1 as +self+ is less than, equal to, # or greater than a given object. # - [==](#method-i-3D-3D) - # Returns whether +self+ and a given enumerable are equal, # as determined by Object#eql?. # - \#compare_by_identity? - # Returns whether the set considers only identity # when comparing elements. # # ### Methods for Querying # # - \#length (aliased as #size) - # Returns the count of elements. # - \#empty? - # Returns whether the set has no elements. # - \#include? (aliased as #member? and #===) - # Returns whether a given object is an element in the set. # - \#subset? (aliased as [<=](#method-i-3C-3D)) - # Returns whether a given object is a subset of the set. # - \#proper_subset? (aliased as [<](#method-i-3C)) - # Returns whether a given enumerable is a proper subset of the set. # - \#superset? (aliased as [<=](#method-i-3E-3D])) - # Returns whether a given enumerable is a superset of the set. # - \#proper_superset? (aliased as [>](#method-i-3E)) - # Returns whether a given enumerable is a proper superset of the set. # - \#disjoint? - # Returns +true+ if the set and a given enumerable # have no common elements, +false+ otherwise. # - \#intersect? - # Returns +true+ if the set and a given enumerable - # have any common elements, +false+ otherwise. # - \#compare_by_identity? - # Returns whether the set considers only identity # when comparing elements. # # ### Methods for Assigning # # - \#add (aliased as #<<) - # Adds a given object to the set; returns +self+. # - \#add? - # If the given object is not an element in the set, # adds it and returns +self+; otherwise, returns +nil+. # - \#merge - # Adds each given object to the set; returns +self+. # - \#replace - # Replaces the contents of the set with the contents # of a given enumerable. # # ### Methods for Deleting # # - \#clear - # Removes all elements in the set; returns +self+. # - \#delete - # Removes a given object from the set; returns +self+. # - \#delete? - # If the given object is an element in the set, # removes it and returns +self+; otherwise, returns +nil+. # - \#subtract - # Removes each given object from the set; returns +self+. # - \#delete_if - Removes elements specified by a given block. # - \#select! (aliased as #filter!) - # Removes elements not specified by a given block. # - \#keep_if - # Removes elements not specified by a given block. # - \#reject! # Removes elements specified by a given block. # # ### Methods for Converting # # - \#classify - # Returns a hash that classifies the elements, # as determined by the given block. # - \#collect! (aliased as #map!) - # Replaces each element with a block return-value. # - \#divide - # Returns a hash that classifies the elements, # as determined by the given block; # differs from #classify in that the block may accept # either one or two arguments. # - \#flatten - # Returns a new set that is a recursive flattening of +self+. # \#flatten! - # Replaces each nested set in +self+ with the elements from that set. # - \#inspect (aliased as #to_s) - # Returns a string displaying the elements. # - \#join - # Returns a string containing all elements, converted to strings # as needed, and joined by the given record separator. # - \#to_a - # Returns an array containing all set elements. # - \#to_set - # Returns +self+ if given no arguments and no block; # with a block given, returns a new set consisting of block # return values. # # ### Methods for Iterating # # - \#each - # Calls the block with each successive element; returns +self+. # # ### Other Methods # # - \#reset - # Resets the internal state; useful if an object # has been modified while an element in the set. # class Set include Enumerable # Creates a new set containing the given objects. # # Set[1, 2] # => # # Set[1, 2, 1] # => # # Set[1, 'c', :s] # => # def self.[](*ary) new(ary) end # Creates a new set containing the elements of the given enumerable # object. # # If a block is given, the elements of enum are preprocessed by the # given block. # # Set.new([1, 2]) #=> # # Set.new([1, 2, 1]) #=> # # Set.new([1, 'c', :s]) #=> # # Set.new(1..5) #=> # # Set.new([1, 2, 3]) { |x| x * x } #=> # def initialize(enum = nil, &block) # :yields: o @hash ||= Hash.new(false) enum.nil? and return if block do_with_enum(enum) { |o| add(block[o]) } else merge(enum) end end # Makes the set compare its elements by their identity and returns # self. This method may not be supported by all subclasses of Set. def compare_by_identity if @hash.respond_to?(:compare_by_identity) @hash.compare_by_identity self else raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented" end end # Returns true if the set will compare its elements by their # identity. Also see Set#compare_by_identity. def compare_by_identity? @hash.respond_to?(:compare_by_identity?) && @hash.compare_by_identity? end def do_with_enum(enum, &block) # :nodoc: if enum.respond_to?(:each_entry) enum.each_entry(&block) if block elsif enum.respond_to?(:each) enum.each(&block) if block else raise ArgumentError, "value must be enumerable" end end private :do_with_enum # Dup internal hash. def initialize_dup(orig) super @hash = orig.instance_variable_get(:@hash).dup end if Kernel.instance_method(:initialize_clone).arity != 1 # Clone internal hash. def initialize_clone(orig, **options) super @hash = orig.instance_variable_get(:@hash).clone(**options) end else # Clone internal hash. def initialize_clone(orig) super @hash = orig.instance_variable_get(:@hash).clone end end def freeze # :nodoc: @hash.freeze super end # Returns the number of elements. def size @hash.size end alias length size # Returns true if the set contains no elements. def empty? @hash.empty? end # Removes all elements and returns self. # # set = Set[1, 'c', :s] #=> # # set.clear #=> # # set #=> # def clear @hash.clear self end # Replaces the contents of the set with the contents of the given # enumerable object and returns self. # # set = Set[1, 'c', :s] #=> # # set.replace([1, 2]) #=> # # set #=> # def replace(enum) if enum.instance_of?(self.class) @hash.replace(enum.instance_variable_get(:@hash)) self else do_with_enum(enum) # make sure enum is enumerable before calling clear clear merge(enum) end end # Converts the set to an array. The order of elements is uncertain. # # Set[1, 2].to_a #=> [1, 2] # Set[1, 'c', :s].to_a #=> [1, "c", :s] def to_a @hash.keys end # Returns self if no arguments are given. Otherwise, converts the # set to another with `klass.new(self, *args, &block)`. # # In subclasses, returns `klass.new(self, *args, &block)` unless # overridden. def to_set(klass = Set, *args, &block) return self if instance_of?(Set) && klass == Set && block.nil? && args.empty? klass.new(self, *args, &block) end def flatten_merge(set, seen = Set.new) # :nodoc: set.each { |e| if e.is_a?(Set) if seen.include?(e_id = e.object_id) raise ArgumentError, "tried to flatten recursive Set" end seen.add(e_id) flatten_merge(e, seen) seen.delete(e_id) else add(e) end } self end protected :flatten_merge # Returns a new set that is a copy of the set, flattening each # containing set recursively. def flatten self.class.new.flatten_merge(self) end # Equivalent to Set#flatten, but replaces the receiver with the # result in place. Returns nil if no modifications were made. def flatten! replace(flatten()) if any? { |e| e.is_a?(Set) } end # Returns true if the set contains the given object. # # Note that include? and member? do not test member # equality using == as do other Enumerables. # # See also Enumerable#include? def include?(o) @hash[o] end alias member? include? # Returns true if the set is a superset of the given set. def superset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:>=) @hash >= set.instance_variable_get(:@hash) when set.is_a?(Set) size >= set.size && set.all? { |o| include?(o) } else raise ArgumentError, "value must be a set" end end alias >= superset? # Returns true if the set is a proper superset of the given set. def proper_superset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:>) @hash > set.instance_variable_get(:@hash) when set.is_a?(Set) size > set.size && set.all? { |o| include?(o) } else raise ArgumentError, "value must be a set" end end alias > proper_superset? # Returns true if the set is a subset of the given set. def subset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:<=) @hash <= set.instance_variable_get(:@hash) when set.is_a?(Set) size <= set.size && all? { |o| set.include?(o) } else raise ArgumentError, "value must be a set" end end alias <= subset? # Returns true if the set is a proper subset of the given set. def proper_subset?(set) case when set.instance_of?(self.class) && @hash.respond_to?(:<) @hash < set.instance_variable_get(:@hash) when set.is_a?(Set) size < set.size && all? { |o| set.include?(o) } else raise ArgumentError, "value must be a set" end end alias < proper_subset? # Returns 0 if the set are equal, # -1 / +1 if the set is a proper subset / superset of the given set, # or nil if they both have unique elements. def <=>(set) return unless set.is_a?(Set) case size <=> set.size when -1 then -1 if proper_subset?(set) when +1 then +1 if proper_superset?(set) else 0 if self.==(set) end end # Returns true if the set and the given set have at least one # element in common. # # Set[1, 2, 3].intersect? Set[4, 5] #=> false # Set[1, 2, 3].intersect? Set[3, 4] #=> true def intersect?(set) set.is_a?(Set) or raise ArgumentError, "value must be a set" if size < set.size any? { |o| set.include?(o) } else set.any? { |o| include?(o) } end end # Returns true if the set and the given set have no element in # common. This method is the opposite of `intersect?`. # # Set[1, 2, 3].disjoint? Set[3, 4] #=> false # Set[1, 2, 3].disjoint? Set[4, 5] #=> true def disjoint?(set) !intersect?(set) end # Calls the given block once for each element in the set, passing # the element as parameter. Returns an enumerator if no block is # given. def each(&block) block or return enum_for(__method__) { size } @hash.each_key(&block) self end # Adds the given object to the set and returns self. Use `merge` to # add many elements at once. # # Set[1, 2].add(3) #=> # # Set[1, 2].add([3, 4]) #=> # # Set[1, 2].add(2) #=> # def add(o) @hash[o] = true self end alias << add # Adds the given object to the set and returns self. If the # object is already in the set, returns nil. # # Set[1, 2].add?(3) #=> # # Set[1, 2].add?([3, 4]) #=> # # Set[1, 2].add?(2) #=> nil def add?(o) add(o) unless include?(o) end # Deletes the given object from the set and returns self. Use # `subtract` to delete many items at once. def delete(o) @hash.delete(o) self end # Deletes the given object from the set and returns self. If the # object is not in the set, returns nil. def delete?(o) delete(o) if include?(o) end # Deletes every element of the set for which block evaluates to # true, and returns self. Returns an enumerator if no block is # given. def delete_if block_given? or return enum_for(__method__) { size } # @hash.delete_if should be faster, but using it breaks the order # of enumeration in subclasses. select { |o| yield o }.each { |o| @hash.delete(o) } self end # Deletes every element of the set for which block evaluates to # false, and returns self. Returns an enumerator if no block is # given. def keep_if block_given? or return enum_for(__method__) { size } # @hash.keep_if should be faster, but using it breaks the order of # enumeration in subclasses. reject { |o| yield o }.each { |o| @hash.delete(o) } self end # Replaces the elements with ones returned by `collect()`. # Returns an enumerator if no block is given. def collect! block_given? or return enum_for(__method__) { size } set = self.class.new each { |o| set << yield(o) } replace(set) end alias map! collect! # Equivalent to Set#delete_if, but returns nil if no changes were # made. Returns an enumerator if no block is given. def reject!(&block) block or return enum_for(__method__) { size } n = size delete_if(&block) self if size != n end # Equivalent to Set#keep_if, but returns nil if no changes were # made. Returns an enumerator if no block is given. def select!(&block) block or return enum_for(__method__) { size } n = size keep_if(&block) self if size != n end # Equivalent to Set#select! alias filter! select! # Merges the elements of the given enumerable object to the set and # returns self. def merge(enum) if enum.instance_of?(self.class) @hash.update(enum.instance_variable_get(:@hash)) else do_with_enum(enum) { |o| add(o) } end self end # Deletes every element that appears in the given enumerable object # and returns self. def subtract(enum) do_with_enum(enum) { |o| delete(o) } self end # Returns a new set built by merging the set and the elements of the # given enumerable object. # # Set[1, 2, 3] | Set[2, 4, 5] #=> # # Set[1, 5, 'z'] | (1..6) #=> # def |(enum) dup.merge(enum) end alias + | alias union | # Returns a new set built by duplicating the set, removing every # element that appears in the given enumerable object. # # Set[1, 3, 5] - Set[1, 5] #=> # # Set['a', 'b', 'z'] - ['a', 'c'] #=> # def -(enum) dup.subtract(enum) end alias difference - # Returns a new set containing elements common to the set and the # given enumerable object. # # Set[1, 3, 5] & Set[3, 2, 1] #=> # # Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> # def &(enum) n = self.class.new if enum.is_a?(Set) if enum.size > size each { |o| n.add(o) if enum.include?(o) } else enum.each { |o| n.add(o) if include?(o) } end else do_with_enum(enum) { |o| n.add(o) if include?(o) } end n end alias intersection & # Returns a new set containing elements exclusive between the set # and the given enumerable object. `(set ^ enum)` is equivalent to # `((set | enum) - (set & enum))`. # # Set[1, 2] ^ Set[2, 3] #=> # # Set[1, 'b', 'c'] ^ ['b', 'd'] #=> # def ^(enum) n = Set.new(enum) each { |o| n.add(o) unless n.delete?(o) } n end # Returns true if two sets are equal. The equality of each couple # of elements is defined according to Object#eql?. # # Set[1, 2] == Set[2, 1] #=> true # Set[1, 3, 5] == Set[1, 5] #=> false # Set['a', 'b', 'c'] == Set['a', 'c', 'b'] #=> true # Set['a', 'b', 'c'] == ['a', 'c', 'b'] #=> false def ==(other) if self.equal?(other) true elsif other.instance_of?(self.class) @hash == other.instance_variable_get(:@hash) elsif other.is_a?(Set) && self.size == other.size other.all? { |o| @hash.include?(o) } else false end end def hash # :nodoc: @hash.hash end def eql?(o) # :nodoc: return false unless o.is_a?(Set) @hash.eql?(o.instance_variable_get(:@hash)) end # Resets the internal state after modification to existing elements # and returns self. # # Elements will be reindexed and deduplicated. def reset if @hash.respond_to?(:rehash) @hash.rehash # This should perform frozenness check. else raise FrozenError, "can't modify frozen #{self.class.name}" if frozen? end self end # Returns true if the given object is a member of the set, # and false otherwise. # # Used in case statements: # # require 'set' # # case :apple # when Set[:potato, :carrot] # "vegetable" # when Set[:apple, :banana] # "fruit" # end # # => "fruit" # # Or by itself: # # Set[1, 2, 3] === 2 #=> true # Set[1, 2, 3] === 4 #=> false # alias === include? # Classifies the set by the return value of the given block and # returns a hash of {value => set of elements} pairs. The block is # called once for each element of the set, passing the element as # parameter. # # require 'set' # files = Set.new(Dir.glob("*.rb")) # hash = files.classify { |f| File.mtime(f).year } # hash #=> {2000=>#, # # 2001=>#, # # 2002=>#} # # Returns an enumerator if no block is given. def classify # :yields: o block_given? or return enum_for(__method__) { size } h = {} each { |i| (h[yield(i)] ||= self.class.new).add(i) } h end # Divides the set into a set of subsets according to the commonality # defined by the given block. # # If the arity of the block is 2, elements o1 and o2 are in common # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are # in common if block.call(o1) == block.call(o2). # # require 'set' # numbers = Set[1, 3, 4, 6, 9, 10, 11] # set = numbers.divide { |i,j| (i - j).abs == 1 } # set #=> #, # # #, # # #, # # #}> # # Returns an enumerator if no block is given. def divide(&func) func or return enum_for(__method__) { size } if func.arity == 2 require 'tsort' class << dig = {} # :nodoc: include TSort alias tsort_each_node each_key def tsort_each_child(node, &block) fetch(node).each(&block) end end each { |u| dig[u] = a = [] each{ |v| func.call(u, v) and a << v } } set = Set.new() dig.each_strongly_connected_component { |css| set.add(self.class.new(css)) } set else Set.new(classify(&func).values) end end # Returns a string created by converting each element of the set to a string # See also: Array#join def join(separator=nil) to_a.join(separator) end InspectKey = :__inspect_key__ # :nodoc: # Returns a string containing a human-readable representation of the # set ("#"). def inspect ids = (Thread.current[InspectKey] ||= []) if ids.include?(object_id) return sprintf('#<%s: {...}>', self.class.name) end ids << object_id begin return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2]) ensure ids.pop end end alias to_s inspect def pretty_print(pp) # :nodoc: pp.text sprintf('#<%s: {', self.class.name) pp.nest(1) { pp.seplist(self) { |o| pp.pp o } } pp.text "}>" end def pretty_print_cycle(pp) # :nodoc: pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...') end end module Enumerable # Makes a set from the enumerable object with given arguments. # Needs to `require "set"` to use this method. def to_set(klass = Set, *args, &block) klass.new(self, *args, &block) end end autoload :SortedSet, "#{__dir__}/set/sorted_set"