Skip to content

Commit

Permalink
Set operations: intersection, union, difference
Browse files Browse the repository at this point in the history
  • Loading branch information
emirpasic committed Apr 12, 2022
1 parent 7cf7cea commit e438e7b
Show file tree
Hide file tree
Showing 9 changed files with 427 additions and 5 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,19 @@ func main() {

A set is a data structure that can store elements and has no repeated values. It is a computer implementation of the mathematical concept of a finite set. Unlike most other collection types, rather than retrieving a specific element from a set, one typically tests an element for membership in a set. This structure is often used to ensure that no duplicates are present in a container.

Set additionally allow set operations such as [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)), [union](https://en.wikipedia.org/wiki/Union_(set_theory)), [difference](https://proofwiki.org/wiki/Definition:Set_Difference), etc.

Implements [Container](#containers) interface.

```go
type Set interface {
Add(elements ...interface{})
Remove(elements ...interface{})
Contains(elements ...interface{}) bool

// Intersection(another *Set) *Set
// Union(another *Set) *Set
// Difference(another *Set) *Set

containers.Container
// Empty() bool
// Size() int
Expand Down
4 changes: 0 additions & 4 deletions containers/enumerable.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ type EnumerableWithIndex interface {

// Map invokes the given function once for each element and returns a
// container containing the values returned by the given function.
// TODO would appreciate help on how to enforce this in containers (don't want to type assert when chaining)
// Map(func(index int, value interface{}) interface{}) Container

// Select returns a new container containing all elements for which the given function returns a true value.
// TODO need help on how to enforce this in containers (don't want to type assert when chaining)
// Select(func(index int, value interface{}) bool) Container

// Any passes each element of the container to the given function and
Expand All @@ -39,11 +37,9 @@ type EnumerableWithKey interface {

// Map invokes the given function once for each element and returns a container
// containing the values returned by the given function as key/value pairs.
// TODO need help on how to enforce this in containers (don't want to type assert when chaining)
// Map(func(key interface{}, value interface{}) (interface{}, interface{})) Container

// Select returns a new container containing all elements for which the given function returns a true value.
// TODO need help on how to enforce this in containers (don't want to type assert when chaining)
// Select(func(key interface{}, value interface{}) bool) Container

// Any passes each element of the container to the given function and
Expand Down
55 changes: 55 additions & 0 deletions sets/hashset/hashset.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,58 @@ func (set *Set) String() string {
str += strings.Join(items, ", ")
return str
}

// Intersection returns the intersection between two sets.
// The new set consists of all elements that are both in "set" and "another".
// Ref: https://en.wikipedia.org/wiki/Intersection_(set_theory)
func (set *Set) Intersection(another *Set) *Set {
result := New()

// Iterate over smaller set (optimization)
if set.Size() <= another.Size() {
for item, _ := range set.items {
if _, contains := another.items[item]; contains {
result.Add(item)
}
}
} else {
for item, _ := range another.items {
if _, contains := set.items[item]; contains {
result.Add(item)
}
}
}

return result
}

// Union returns the union of two sets.
// The new set consists of all elements that are in "set" or "another" (possibly both).
// Ref: https://en.wikipedia.org/wiki/Union_(set_theory)
func (set *Set) Union(another *Set) *Set {
result := New()

for item, _ := range set.items {
result.Add(item)
}
for item, _ := range another.items {
result.Add(item)
}

return result
}

// Difference returns the difference between two sets.
// The new set consists of all elements that are in "set" but not in "another".
// Ref: https://proofwiki.org/wiki/Definition:Set_Difference
func (set *Set) Difference(another *Set) *Set {
result := New()

for item, _ := range set.items {
if _, contains := another.items[item]; !contains {
result.Add(item)
}
}

return result
}
66 changes: 66 additions & 0 deletions sets/hashset/hashset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,72 @@ func TestSetSerialization(t *testing.T) {
}
}

func TestSetIntersection(t *testing.T) {
set := New()
another := New()

intersection := set.Intersection(another)
if actualValue, expectedValue := intersection.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

intersection = set.Intersection(another)

if actualValue, expectedValue := intersection.Size(), 2; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := intersection.Contains("c", "d"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func TestSetUnion(t *testing.T) {
set := New()
another := New()

union := set.Union(another)
if actualValue, expectedValue := union.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

union = set.Union(another)

if actualValue, expectedValue := union.Size(), 6; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := union.Contains("a", "b", "c", "d", "e", "f"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func TestSetDifference(t *testing.T) {
set := New()
another := New()

difference := set.Difference(another)
if actualValue, expectedValue := difference.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

difference = set.Difference(another)

if actualValue, expectedValue := difference.Size(), 2; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := difference.Contains("a", "b"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func benchmarkContains(b *testing.B, set *Set, size int) {
for i := 0; i < b.N; i++ {
for n := 0; n < size; n++ {
Expand Down
55 changes: 55 additions & 0 deletions sets/linkedhashset/linkedhashset.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,58 @@ func (set *Set) String() string {
str += strings.Join(items, ", ")
return str
}

// Intersection returns the intersection between two sets.
// The new set consists of all elements that are both in "set" and "another".
// Ref: https://en.wikipedia.org/wiki/Intersection_(set_theory)
func (set *Set) Intersection(another *Set) *Set {
result := New()

// Iterate over smaller set (optimization)
if set.Size() <= another.Size() {
for item, _ := range set.table {
if _, contains := another.table[item]; contains {
result.Add(item)
}
}
} else {
for item, _ := range another.table {
if _, contains := set.table[item]; contains {
result.Add(item)
}
}
}

return result
}

// Union returns the union of two sets.
// The new set consists of all elements that are in "set" or "another" (possibly both).
// Ref: https://en.wikipedia.org/wiki/Union_(set_theory)
func (set *Set) Union(another *Set) *Set {
result := New()

for item, _ := range set.table {
result.Add(item)
}
for item, _ := range another.table {
result.Add(item)
}

return result
}

// Difference returns the difference between two sets.
// The new set consists of all elements that are in "set" but not in "another".
// Ref: https://proofwiki.org/wiki/Definition:Set_Difference
func (set *Set) Difference(another *Set) *Set {
result := New()

for item, _ := range set.table {
if _, contains := another.table[item]; !contains {
result.Add(item)
}
}

return result
}
66 changes: 66 additions & 0 deletions sets/linkedhashset/linkedhashset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,72 @@ func TestSetSerialization(t *testing.T) {
}
}

func TestSetIntersection(t *testing.T) {
set := New()
another := New()

intersection := set.Intersection(another)
if actualValue, expectedValue := intersection.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

intersection = set.Intersection(another)

if actualValue, expectedValue := intersection.Size(), 2; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := intersection.Contains("c", "d"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func TestSetUnion(t *testing.T) {
set := New()
another := New()

union := set.Union(another)
if actualValue, expectedValue := union.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

union = set.Union(another)

if actualValue, expectedValue := union.Size(), 6; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := union.Contains("a", "b", "c", "d", "e", "f"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func TestSetDifference(t *testing.T) {
set := New()
another := New()

difference := set.Difference(another)
if actualValue, expectedValue := difference.Size(), 0; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}

set.Add("a", "b", "c", "d")
another.Add("c", "d", "e", "f")

difference = set.Difference(another)

if actualValue, expectedValue := difference.Size(), 2; actualValue != expectedValue {
t.Errorf("Got %v expected %v", actualValue, expectedValue)
}
if actualValue := difference.Contains("a", "b"); actualValue != true {
t.Errorf("Got %v expected %v", actualValue, true)
}
}

func benchmarkContains(b *testing.B, set *Set, size int) {
for i := 0; i < b.N; i++ {
for n := 0; n < size; n++ {
Expand Down
3 changes: 3 additions & 0 deletions sets/sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ type Set interface {
Add(elements ...interface{})
Remove(elements ...interface{})
Contains(elements ...interface{}) bool
// Intersection(another *Set) *Set
// Union(another *Set) *Set
// Difference(another *Set) *Set

containers.Container
// Empty() bool
Expand Down
Loading

0 comments on commit e438e7b

Please sign in to comment.