diff --git a/container/intsets/sparse.go b/container/intsets/sparse.go new file mode 100644 index 0000000000..4c77ae4cd1 --- /dev/null +++ b/container/intsets/sparse.go @@ -0,0 +1,774 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package intsets provides Sparse, a compact and fast representation +// for sparse sets of int values. +// +// The time complexity of the operations Len, Insert, Remove and Has +// is in O(n) but in practice those methods are faster and more +// space-efficient than equivalent operations on sets based on the Go +// map type. The IsEmpty, Min, Max, Clear and TakeMin operations +// require constant time. +// +package intsets + +// TODO(adonovan): +// - Add SymmetricDifference(x, y *Sparse), i.e. x ∆ y. +// - Add InsertAll(...int), RemoveAll(...int) +// - Add 'bool changed' results for {Intersection,Difference}With too. +// +// TODO(adonovan): implement Dense, a sparse bit vector with a similar +// API. The space usage would be proportional to Max(), not Len(), +// and the implementation would be based upon big.Int. + +import ( + "bytes" + "fmt" +) + +// A Sparse is a set of int values. +// Sparse operations (even queries) are not concurrency-safe. +// +// The zero value for Sparse is a valid empty set. +// +// Sparse sets must be copied using the Copy method, not by assigning +// a Sparse value. +// +type Sparse struct { + // An uninitialized Sparse represents an empty set. + // An empty set may also be represented by + // root.next == root.prev == &root. + // In a non-empty set, root.next points to the first block and + // root.prev to the last. + // root.offset and root.bits are unused. + root block +} + +type word uintptr + +const ( + _m = ^word(0) + bitsPerWord = 8 << (_m>>8&1 + _m>>16&1 + _m>>32&1) + bitsPerBlock = 128 + wordsPerBlock = bitsPerBlock / bitsPerWord +) + +// Limit values of implementation-specific int type. +const ( + MaxInt = int(^uint(0) >> 1) + MinInt = -MaxInt - 1 +) + +// -- block ------------------------------------------------------------ + +// A set is represented as a circular doubly-linked list of blocks, +// each containing an offset and a bit array of fixed size +// bitsPerBlock; the blocks are ordered by increasing offset. +// +// The set contains an element x iff the block whose offset is x - (x +// mod bitsPerBlock) has the bit (x mod bitsPerBlock) set, where mod +// is the Euclidean remainder. +// +// A block may only be empty transiently. +// +type block struct { + offset int // offset mod bitsPerBlock == 0 + bits [wordsPerBlock]word // contains at least one set bit + next, prev *block // doubly-linked list of blocks +} + +// wordMask returns the word index (in block.bits) +// and single-bit mask for the block's ith bit. +func wordMask(i uint) (w uint, mask word) { + w = i / bitsPerWord + mask = 1 << (i % bitsPerWord) + return +} + +// insert sets the block b's ith bit and +// returns true if it was not already set. +// +func (b *block) insert(i uint) bool { + w, mask := wordMask(i) + if b.bits[w]&mask == 0 { + b.bits[w] |= mask + return true + } + return false +} + +// remove clears the block's ith bit and +// returns true if the bit was previously set. +// NB: may leave the block empty. +// +func (b *block) remove(i uint) bool { + w, mask := wordMask(i) + if b.bits[w]&mask != 0 { + b.bits[w] &^= mask + return true + } + return false +} + +// has reports whether the block's ith bit is set. +func (b *block) has(i uint) bool { + w, mask := wordMask(i) + return b.bits[w]&mask != 0 +} + +// empty reports whether b.len()==0, but more efficiently. +func (b *block) empty() bool { + for _, w := range b.bits { + if w != 0 { + return false + } + } + return true +} + +// len returns the number of set bits in block b. +func (b *block) len() int { + var l int + for _, w := range b.bits { + l += int(popcount(w)) + } + return l +} + +// max returns the maximum element of the block. +// The block must not be empty. +// +func (b *block) max() int { + bi := b.offset + bitsPerBlock + // Decrement bi by number of high zeros in last.bits. + for i := len(b.bits) - 1; i >= 0; i-- { + if w := b.bits[i]; w != 0 { + return bi - int(nlz(w)) - 1 + } + bi -= bitsPerWord + } + panic("BUG: empty block") +} + +// min returns the minimum element of the block, +// and also removes it if take is set. +// The block must not be initially empty. +// NB: may leave the block empty. +// +func (b *block) min(take bool) int { + for i, w := range b.bits { + if w != 0 { + tz := ntz(w) + if take { + b.bits[i] = w &^ (1 << tz) + } + return b.offset + int(i*bitsPerWord) + int(tz) + } + } + panic("BUG: empty block") +} + +// forEach calls f for each element of block b. +// f must not mutate b's enclosing Sparse. +func (b *block) forEach(f func(int)) { + for i, w := range b.bits { + offset := b.offset + i*bitsPerWord + // TODO(adonovan): opt: uses subword + // masks to avoid testing every bit. + for bi := 0; w != 0 && bi < bitsPerWord; bi++ { + if w&1 != 0 { + f(offset) + } + offset++ + w >>= 1 + } + } +} + +// offsetAndBitIndex returns the offset of the block that would +// contain x and the bit index of x within that block. +// +func offsetAndBitIndex(x int) (int, uint) { + mod := x % bitsPerBlock + if mod < 0 { + // Euclidean (non-negative) remainder + mod += bitsPerBlock + } + return x - mod, uint(mod) +} + +// -- Sparse -------------------------------------------------------------- + +// start returns the root's next block, which is the root block +// (if s.IsEmpty()) or the first true block otherwise. +// start has the side effect of ensuring that s is properly +// initialized. +// +func (s *Sparse) start() *block { + if s.root.next == nil { + s.root.next = &s.root + s.root.prev = &s.root + } else if s.root.next.prev != &s.root { + // Copying a Sparse x leads to pernicious corruption: the + // new Sparse y shares the old linked list, but iteration + // on y will never encounter &y.root so it goes into a + // loop. Fail fast before this occurs. + panic("A Sparse has been copied without (*Sparse).Copy()") + } + + return s.root.next +} + +// IsEmpty reports whether the set s is empty. +func (s *Sparse) IsEmpty() bool { + return s.start() == &s.root +} + +// Len returns the number of elements in the set s. +func (s *Sparse) Len() int { + var l int + for b := s.start(); b != &s.root; b = b.next { + l += b.len() + } + return l +} + +// Max returns the maximum element of the set s, or MinInt if s is empty. +func (s *Sparse) Max() int { + if s.IsEmpty() { + return MinInt + } + return s.root.prev.max() +} + +// Min returns the minimum element of the set s, or MaxInt if s is empty. +func (s *Sparse) Min() int { + if s.IsEmpty() { + return MaxInt + } + return s.root.next.min(false) +} + +// block returns the block that would contain offset, +// or nil if s contains no such block. +// +func (s *Sparse) block(offset int) *block { + b := s.start() + for b != &s.root && b.offset <= offset { + if b.offset == offset { + return b + } + b = b.next + } + return nil +} + +// Insert adds x to the set s, and reports whether the set grew. +func (s *Sparse) Insert(x int) bool { + offset, i := offsetAndBitIndex(x) + b := s.start() + for b != &s.root && b.offset <= offset { + if b.offset == offset { + return b.insert(i) + } + b = b.next + } + + // Insert new block before b. + new := &block{offset: offset} + new.next = b + new.prev = b.prev + new.prev.next = new + new.next.prev = new + return new.insert(i) +} + +func (s *Sparse) removeBlock(b *block) { + b.prev.next = b.next + b.next.prev = b.prev +} + +// Remove removes x from the set s, and reports whether the set shrank. +func (s *Sparse) Remove(x int) bool { + offset, i := offsetAndBitIndex(x) + if b := s.block(offset); b != nil { + if !b.remove(i) { + return false + } + if b.empty() { + s.removeBlock(b) + } + return true + } + return false +} + +// Clear removes all elements from the set s. +func (s *Sparse) Clear() { + s.root.next = &s.root + s.root.prev = &s.root +} + +// If set s is non-empty, TakeMin sets *p to the minimum element of +// the set s, removes that element from the set and returns true. +// Otherwise, it returns false and *p is undefined. +// +// This method may be used for iteration over a worklist like so: +// +// var x int +// for worklist.TakeMin(&x) { use(x) } +// +func (s *Sparse) TakeMin(p *int) bool { + head := s.start() + if head == &s.root { + return false + } + *p = head.min(true) + if head.empty() { + s.removeBlock(head) + } + return true +} + +// Has reports whether x is an element of the set s. +func (s *Sparse) Has(x int) bool { + offset, i := offsetAndBitIndex(x) + if b := s.block(offset); b != nil { + return b.has(i) + } + return false +} + +// forEach applies function f to each element of the set s in order. +// +// f must not mutate s. Consequently, forEach is not safe to expose +// to clients. In any case, using "range s.AppendTo()" allows more +// natural control flow with continue/break/return. +// +func (s *Sparse) forEach(f func(int)) { + for b := s.start(); b != &s.root; b = b.next { + b.forEach(f) + } +} + +// Copy sets s to the value of x. +func (s *Sparse) Copy(x *Sparse) { + if s == x { + return + } + + xb := x.start() + sb := s.start() + for xb != &x.root { + if sb == &s.root { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + sb.bits = xb.bits + xb = xb.next + sb = sb.next + } + s.discardTail(sb) +} + +// insertBlockBefore returns a new block, inserting it before next. +func (s *Sparse) insertBlockBefore(next *block) *block { + b := new(block) + b.next = next + b.prev = next.prev + b.prev.next = b + next.prev = b + return b +} + +// discardTail removes block b and all its successors from s. +func (s *Sparse) discardTail(b *block) { + if b != &s.root { + b.prev.next = &s.root + s.root.prev = b.prev + } +} + +// IntersectionWith sets s to the intersection s ∩ x. +func (s *Sparse) IntersectionWith(x *Sparse) { + if s == x { + return + } + + xb := x.start() + sb := s.start() + for xb != &x.root && sb != &s.root { + switch { + case xb.offset < sb.offset: + xb = xb.next + + case xb.offset > sb.offset: + sb = sb.next + s.removeBlock(sb.prev) + + default: + var sum word + for i := range sb.bits { + r := xb.bits[i] & sb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = sb.next + } else { + // sb will be overwritten or removed + } + + xb = xb.next + } + } + + s.discardTail(sb) +} + +// Intersection sets s to the intersection x ∩ y. +func (s *Sparse) Intersection(x, y *Sparse) { + switch { + case s == x: + s.IntersectionWith(y) + return + case s == y: + s.IntersectionWith(x) + return + case x == y: + s.Copy(x) + return + } + + xb := x.start() + yb := y.start() + sb := s.start() + for xb != &x.root && yb != &y.root { + switch { + case xb.offset < yb.offset: + xb = xb.next + continue + case xb.offset > yb.offset: + yb = yb.next + continue + } + + if sb == &s.root { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + + var sum word + for i := range sb.bits { + r := xb.bits[i] & yb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = sb.next + } else { + // sb will be overwritten or removed + } + + xb = xb.next + yb = yb.next + } + + s.discardTail(sb) +} + +// UnionWith sets s to the union s ∪ x, and reports whether s grew. +func (s *Sparse) UnionWith(x *Sparse) bool { + if s == x { + return false + } + + var changed bool + xb := x.start() + sb := s.start() + for xb != &x.root { + if sb != &s.root && sb.offset == xb.offset { + for i := range xb.bits { + if sb.bits[i] != xb.bits[i] { + sb.bits[i] |= xb.bits[i] + changed = true + } + } + xb = xb.next + } else if sb == &s.root || sb.offset > xb.offset { + sb = s.insertBlockBefore(sb) + sb.offset = xb.offset + sb.bits = xb.bits + changed = true + + xb = xb.next + } + sb = sb.next + } + return changed +} + +// Union sets s to the union x ∪ y. +func (s *Sparse) Union(x, y *Sparse) { + switch { + case x == y: + s.Copy(x) + return + case s == x: + s.UnionWith(y) + return + case s == y: + s.UnionWith(x) + return + } + + xb := x.start() + yb := y.start() + sb := s.start() + for xb != &x.root || yb != &y.root { + if sb == &s.root { + sb = s.insertBlockBefore(sb) + } + switch { + case yb == &y.root || (xb != &x.root && xb.offset < yb.offset): + sb.offset = xb.offset + sb.bits = xb.bits + xb = xb.next + + case xb == &x.root || (yb != &y.root && yb.offset < xb.offset): + sb.offset = yb.offset + sb.bits = yb.bits + yb = yb.next + + default: + sb.offset = xb.offset + for i := range xb.bits { + sb.bits[i] = xb.bits[i] | yb.bits[i] + } + xb = xb.next + yb = yb.next + } + sb = sb.next + } + + s.discardTail(sb) +} + +// DifferenceWith sets s to the difference s ∖ x. +func (s *Sparse) DifferenceWith(x *Sparse) { + if s == x { + s.Clear() + return + } + + xb := x.start() + sb := s.start() + for xb != &x.root && sb != &s.root { + switch { + case xb.offset > sb.offset: + sb = sb.next + + case xb.offset < sb.offset: + xb = xb.next + + default: + var sum word + for i := range sb.bits { + r := sb.bits[i] & ^xb.bits[i] + sb.bits[i] = r + sum |= r + } + sb = sb.next + xb = xb.next + + if sum == 0 { + s.removeBlock(sb.prev) + } + } + } +} + +// Difference sets s to the difference x ∖ y. +func (s *Sparse) Difference(x, y *Sparse) { + switch { + case x == y: + s.Clear() + return + case s == x: + s.DifferenceWith(y) + return + case s == y: + var y2 Sparse + y2.Copy(y) + s.Difference(x, &y2) + return + } + + xb := x.start() + yb := y.start() + sb := s.start() + for xb != &x.root && yb != &y.root { + if xb.offset > yb.offset { + // y has block, x has none + yb = yb.next + continue + } + + if sb == &s.root { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + + switch { + case xb.offset < yb.offset: + // x has block, y has none + sb.bits = xb.bits + + sb = sb.next + + default: + // x and y have corresponding blocks + var sum word + for i := range sb.bits { + r := xb.bits[i] & ^yb.bits[i] + sb.bits[i] = r + sum |= r + } + if sum != 0 { + sb = sb.next + } else { + // sb will be overrwritten or removed + } + + yb = yb.next + } + xb = xb.next + } + + for xb != &x.root { + if sb == &s.root { + sb = s.insertBlockBefore(sb) + } + sb.offset = xb.offset + sb.bits = xb.bits + sb = sb.next + + xb = xb.next + } + + s.discardTail(sb) +} + +// Equals reports whether the sets s and t have the same elements. +func (s *Sparse) Equals(t *Sparse) bool { + if s == t { + return true + } + sb := s.start() + tb := t.start() + for { + switch { + case sb == &s.root && tb == &t.root: + return true + case sb == &s.root || tb == &t.root: + return false + case sb.offset != tb.offset: + return false + case sb.bits != tb.bits: + return false + } + + sb = sb.next + tb = tb.next + } +} + +// String returns a human-readable description of the set s. +func (s *Sparse) String() string { + var buf bytes.Buffer + buf.WriteByte('{') + s.forEach(func(x int) { + if buf.Len() > 1 { + buf.WriteString(", ") + } + fmt.Fprintf(&buf, "%d", x) + }) + buf.WriteByte('}') + return buf.String() +} + +// BitString returns the set s as a non-empty string of 1s and 0s. +// The ith character is 1 if the set contains i. +// +func (s *Sparse) BitString() string { + if s.IsEmpty() { + return "0" + } + b := make([]byte, s.Max()+1) + for i := range b { + b[i] = '0' + } + s.forEach(func(x int) { + b[x] = '1' + }) + return string(b) +} + +// GoString returns a string showing the internal representation of +// the set s. +// +func (s *Sparse) GoString() string { + var buf bytes.Buffer + for b := s.start(); b != &s.root; b = b.next { + fmt.Fprintf(&buf, "block %p {offset=%d next=%p prev=%p", + b, b.offset, b.next, b.prev) + for _, w := range b.bits { + fmt.Fprintf(&buf, " 0%016x", w) + } + fmt.Fprintf(&buf, "}\n") + } + return buf.String() +} + +// AppendTo returns the result of appending the elements of s to slice +// in order. +func (s *Sparse) AppendTo(slice []int) []int { + s.forEach(func(x int) { + slice = append(slice, x) + }) + return slice +} + +// -- Testing/debugging ------------------------------------------------ + +// check returns an error if the representation invariants of s are violated. +func (s *Sparse) check() error { + if !s.root.empty() { + return fmt.Errorf("non-empty root block") + } + if s.root.offset != 0 { + return fmt.Errorf("root block has non-zero offset %d", s.root.offset) + } + for b := s.start(); b != &s.root; b = b.next { + if b.offset%bitsPerBlock != 0 { + return fmt.Errorf("bad offset modulo: %d", b.offset) + } + if b.empty() { + return fmt.Errorf("empty block") + } + if b.prev.next != b { + return fmt.Errorf("bad prev.next link") + } + if b.next.prev != b { + return fmt.Errorf("bad next.prev link") + } + if b.prev != &s.root { + if b.offset <= b.prev.offset { + return fmt.Errorf("bad offset order: b.offset=%d, prev.offset=%d", + b.offset, b.prev.offset) + } + } + } + return nil +} diff --git a/container/intsets/sparse_test.go b/container/intsets/sparse_test.go new file mode 100644 index 0000000000..94d2e462fe --- /dev/null +++ b/container/intsets/sparse_test.go @@ -0,0 +1,500 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package intsets_test + +import ( + "fmt" + "log" + "math/rand" + "sort" + "strings" + "testing" + + "code.google.com/p/go.tools/container/intsets" +) + +func TestBasics(t *testing.T) { + var s intsets.Sparse + if len := s.Len(); len != 0 { + t.Errorf("Len({}): got %d, want 0", len) + } + if s := s.String(); s != "{}" { + t.Errorf("String({}): got %q, want \"{}\"", s) + } + if s := s.BitString(); s != "0" { + t.Errorf("BitString({}): got %q, want \"0\"", s) + } + if s.Has(3) { + t.Errorf("Has(3): got true, want false") + } + if err := s.Check(); err != nil { + t.Error(err) + } + + if !s.Insert(3) { + t.Errorf("Insert(3): got false, want true") + } + if max := s.Max(); max != 3 { + t.Errorf("Max: got %d, want 3", max) + } + + if !s.Insert(435) { + t.Errorf("Insert(435): got false, want true") + } + if s := s.String(); s != "{3, 435}" { + t.Errorf("String({3, 435}): got %q, want \"{3, 435}\"", s) + } + if max := s.Max(); max != 435 { + t.Errorf("Max: got %d, want 435", max) + } + if len := s.Len(); len != 2 { + t.Errorf("Len: got %d, want 2", len) + } + + if !s.Remove(435) { + t.Errorf("Remove(435): got false, want true") + } + if s := s.String(); s != "{3}" { + t.Errorf("String({3}): got %q, want \"{3}\"", s) + } +} + +// Insert, Len, IsEmpty, Hash, Clear, AppendTo. +func TestMoreBasics(t *testing.T) { + var set intsets.Sparse + set.Insert(456) + set.Insert(123) + set.Insert(789) + if set.Len() != 3 { + t.Errorf("%s.Len: got %d, want 3", set, set.Len()) + } + if set.IsEmpty() { + t.Error("%s.IsEmpty: got true", set) + } + if !set.Has(123) { + t.Error("%s.Has(123): got false", set) + } + if set.Has(1234) { + t.Error("%s.Has(1234): got true", set) + } + got := set.AppendTo([]int{-1}) + if want := []int{-1, 123, 456, 789}; fmt.Sprint(got) != fmt.Sprint(want) { + t.Error("%s.AppendTo: got %v, want %v", got, want) + } + + set.Clear() + + if set.Len() != 0 { + t.Errorf("Clear: got %d, want 0", set.Len()) + } + if !set.IsEmpty() { + t.Error("IsEmpty: got false") + } + if set.Has(123) { + t.Error("%s.Has: got false", set) + } +} + +func TestTakeMin(t *testing.T) { + var set intsets.Sparse + set.Insert(456) + set.Insert(123) + set.Insert(789) + set.Insert(-123) + var got int + for i, want := range []int{-123, 123, 456, 789} { + if !set.TakeMin(&got) || got != want { + t.Errorf("TakeMin #%d: got %d, want %d", i, got, want) + } + } + if set.TakeMin(&got) { + t.Errorf("%s.TakeMin returned true", set, got) + } + if err := set.Check(); err != nil { + t.Fatalf("check: %s: %#v", err, &set) + } +} + +func TestMinAndMax(t *testing.T) { + values := []int{0, 456, 123, 789, -123} // elt 0 => empty set + wantMax := []int{intsets.MinInt, 456, 456, 789, 789} + wantMin := []int{intsets.MaxInt, 456, 123, 123, -123} + + var set intsets.Sparse + for i, x := range values { + if i != 0 { + set.Insert(x) + } + if got, want := set.Min(), wantMin[i]; got != want { + t.Errorf("Min #%d: got %d, want %d", i, got, want) + } + if got, want := set.Max(), wantMax[i]; got != want { + t.Errorf("Max #%d: got %d, want %d", i, got, want) + } + } + + set.Insert(intsets.MinInt) + if got, want := set.Min(), intsets.MinInt; got != want { + t.Errorf("Min: got %d, want %d", got, want) + } + + set.Insert(intsets.MaxInt) + if got, want := set.Max(), intsets.MaxInt; got != want { + t.Errorf("Max: got %d, want %d", got, want) + } +} + +func TestEquals(t *testing.T) { + var setX intsets.Sparse + setX.Insert(456) + setX.Insert(123) + setX.Insert(789) + + if !setX.Equals(&setX) { + t.Errorf("Equals(%s, %s): got false", &setX, &setX) + } + + var setY intsets.Sparse + setY.Insert(789) + setY.Insert(456) + setY.Insert(123) + + if !setX.Equals(&setY) { + t.Errorf("Equals(%s, %s): got false", &setX, &setY) + } + + setY.Insert(1) + if setX.Equals(&setY) { + t.Errorf("Equals(%s, %s): got true", &setX, &setY) + } + + var empty intsets.Sparse + if setX.Equals(&empty) { + t.Errorf("Equals(%s, %s): got true", &setX, &empty) + } + + // Edge case: some block (with offset=0) appears in X but not Y. + setY.Remove(123) + if setX.Equals(&setY) { + t.Errorf("Equals(%s, %s): got true", &setX, &setY) + } +} + +// A pset is a parallel implementation of a set using both an intsets.Sparse +// and a built-in hash map. +type pset struct { + hash map[int]bool + bits intsets.Sparse +} + +func makePset() *pset { + return &pset{hash: make(map[int]bool)} +} + +func (set *pset) add(n int) { + prev := len(set.hash) + set.hash[n] = true + grewA := len(set.hash) > prev + + grewB := set.bits.Insert(n) + + if grewA != grewB { + panic(fmt.Sprintf("add(%d): grewA=%t grewB=%t", n, grewA, grewB)) + } +} + +func (set *pset) remove(n int) { + prev := len(set.hash) + delete(set.hash, n) + shrankA := len(set.hash) < prev + + shrankB := set.bits.Remove(n) + + if shrankA != shrankB { + panic(fmt.Sprintf("remove(%d): shrankA=%t shrankB=%t", n, shrankA, shrankB)) + } +} + +func (set *pset) check(t *testing.T, msg string) { + var eltsA []int + for elt := range set.hash { + eltsA = append(eltsA, int(elt)) + } + sort.Ints(eltsA) + + eltsB := set.bits.AppendTo(nil) + + if a, b := fmt.Sprint(eltsA), fmt.Sprint(eltsB); a != b { + t.Errorf("check(%s): hash=%s bits=%s (%s)", msg, a, b, &set.bits) + } + + if err := set.bits.Check(); err != nil { + t.Fatalf("Check(%s): %s: %#v", msg, err, &set.bits) + } +} + +// randomPset returns a parallel set of random size and elements. +func randomPset(prng *rand.Rand, maxSize int) *pset { + set := makePset() + size := int(prng.Int()) % maxSize + for i := 0; i < size; i++ { + // TODO(adonovan): benchmark how performance varies + // with this sparsity parameter. + n := int(prng.Int()) % 10000 + set.add(n) + } + return set +} + +// TestRandomMutations performs the same random adds/removes on two +// set implementations and ensures that they compute the same result. +func TestRandomMutations(t *testing.T) { + const debug = false + + set := makePset() + prng := rand.New(rand.NewSource(0)) + for i := 0; i < 10000; i++ { + n := int(prng.Int())%2000 - 1000 + if i%2 == 0 { + if debug { + log.Printf("add %d", n) + } + set.add(n) + } else { + if debug { + log.Printf("remove %d", n) + } + set.remove(n) + } + if debug { + set.check(t, "post mutation") + } + } + set.check(t, "final") + if debug { + log.Print(&set.bits) + } +} + +// TestSetOperations exercises classic set operations: ∩ , ∪, \. +func TestSetOperations(t *testing.T) { + prng := rand.New(rand.NewSource(0)) + + // Use random sets of sizes from 0 to about 1000. + // For each operator, we test variations such as + // Z.op(X, Y), Z.op(X, Z) and Z.op(Z, Y) to exercise + // the degenerate cases of each method implementation. + for i := uint(0); i < 12; i++ { + X := randomPset(prng, 1<, == cases in IntersectionWith that the + // TestSetOperations data is too dense to cover. + var X, Y intsets.Sparse + X.Insert(1) + X.Insert(1000) + X.Insert(8000) + Y.Insert(1) + Y.Insert(2000) + Y.Insert(4000) + X.IntersectionWith(&Y) + if got, want := X.String(), "{1}"; got != want { + t.Errorf("IntersectionWith: got %s, want %s", got, want) + } +} + +func TestBitString(t *testing.T) { + var set intsets.Sparse + set.Insert(0) + set.Insert(7) + set.Insert(177) + want := "10000001" + strings.Repeat("0", 169) + "1" + if got := set.BitString(); got != want { + t.Errorf("BitString: got %s, want %s", got, want) + } +} + +func TestFailFastOnShallowCopy(t *testing.T) { + var x intsets.Sparse + x.Insert(1) + + y := x // shallow copy (breaks representation invariants) + defer func() { + got := fmt.Sprint(recover()) + want := "A Sparse has been copied without (*Sparse).Copy()" + if got != want { + t.Error("shallow copy: recover() = %q, want %q", got, want) + } + }() + y.String() // panics + t.Error("didn't panic as expected") +} + +// -- Benchmarks ------------------------------------------------------- + +// TODO(adonovan): +// - Gather set distributions from pointer analysis. +// - Measure memory usage. + +func BenchmarkSparseBitVector(b *testing.B) { + prng := rand.New(rand.NewSource(0)) + for tries := 0; tries < b.N; tries++ { + var x, y, z intsets.Sparse + for i := 0; i < 1000; i++ { + n := int(prng.Int()) % 10000 + if i%2 == 0 { + x.Insert(n) + } else { + y.Insert(n) + } + } + z.Union(&x, &y) + z.Difference(&x, &y) + } +} + +func BenchmarkHashTable(b *testing.B) { + prng := rand.New(rand.NewSource(0)) + for tries := 0; tries < b.N; tries++ { + x, y, z := make(map[int]bool), make(map[int]bool), make(map[int]bool) + for i := 0; i < 1000; i++ { + n := int(prng.Int()) % 10000 + if i%2 == 0 { + x[n] = true + } else { + y[n] = true + } + } + // union + for n := range x { + z[n] = true + } + for n := range y { + z[n] = true + } + // difference + z = make(map[int]bool) + for n := range y { + if !x[n] { + z[n] = true + } + } + } +} diff --git a/container/intsets/util.go b/container/intsets/util.go new file mode 100644 index 0000000000..fe26f29e17 --- /dev/null +++ b/container/intsets/util.go @@ -0,0 +1,75 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package intsets + +var a [1 << 8]byte + +func init() { + for i := range a { + var n byte + for x := i; x != 0; x >>= 1 { + if x&1 != 0 { + n++ + } + } + a[i] = n + } +} + +// popcount returns the population count (number of set bits) of x. +func popcount(x word) word { + return word(a[byte(x>>(0*8))] + + a[byte(x>>(1*8))] + + a[byte(x>>(2*8))] + + a[byte(x>>(3*8))] + + a[byte(x>>(4*8))] + + a[byte(x>>(5*8))] + + a[byte(x>>(6*8))] + + a[byte(x>>(7*8))]) +} + +// nlz returns the number of leading zeros of x. +// From Hacker's Delight, fig 5.11. +func nlz(x word) word { + x |= (x >> 1) + x |= (x >> 2) + x |= (x >> 4) + x |= (x >> 8) + x |= (x >> 16) + x |= (x >> 32) + return popcount(^x) +} + +// ntz returns the number of trailing zeros of x. +// From Hacker's Delight, fig 5.13. +func ntz(x word) word { + if x == 0 { + return bitsPerWord + } + var n word = 1 + if bitsPerWord == 64 { + if (x & 0xffffffff) == 0 { + n = n + 32 + x = x >> 32 + } + } + if (x & 0x0000ffff) == 0 { + n = n + 16 + x = x >> 16 + } + if (x & 0x000000ff) == 0 { + n = n + 8 + x = x >> 8 + } + if (x & 0x0000000f) == 0 { + n = n + 4 + x = x >> 4 + } + if (x & 0x00000003) == 0 { + n = n + 2 + x = x >> 2 + } + return n - x&1 +} diff --git a/container/intsets/util_test.go b/container/intsets/util_test.go new file mode 100644 index 0000000000..900c8cb660 --- /dev/null +++ b/container/intsets/util_test.go @@ -0,0 +1,16 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package intsets + +import "testing" + +func TestNLZ(t *testing.T) { + if x := nlz(0x0000801000000000); x != 16 { + t.Errorf("bad %d", x) + } +} + +// Backdoor for testing. +func (s *Sparse) Check() error { return s.check() }