From f3d63bea42e38720834ce6589d0747982b30ea7d Mon Sep 17 00:00:00 2001 From: Michael Elkins Date: Thu, 3 Dec 2009 20:03:07 -0800 Subject: [PATCH] Add Count, Cycle, ZipWith, GroupBy, Repeat, RepeatTimes, Unique to exp/iterable. Modify iterFunc to take chan<- instead of just chan. R=rsc, dsymonds1 CC=golang-dev, r https://golang.org/cl/160064 --- src/pkg/exp/iterable/iterable.go | 183 ++++++++++++++++++++--- src/pkg/exp/iterable/iterable_test.go | 205 +++++++++++++++++++++++++- 2 files changed, 363 insertions(+), 25 deletions(-) diff --git a/src/pkg/exp/iterable/iterable.go b/src/pkg/exp/iterable/iterable.go index 4ca0e6d057a..764900c53c8 100644 --- a/src/pkg/exp/iterable/iterable.go +++ b/src/pkg/exp/iterable/iterable.go @@ -8,7 +8,10 @@ // something that would produce an infinite amount of data. package iterable -import "container/vector" +import ( + "container/list"; + "container/vector"; +) type Iterable interface { // Iter should return a fresh channel each time it is called. @@ -130,12 +133,9 @@ func Partition(iter Iterable, f func(interface{}) bool) (Iterable, Iterable) { return Filter(iter, f), Filter(iter, not(f)) } -// TODO: -// - Zip - // helper type for the Take/TakeWhile/Drop/DropWhile functions. // primarily used so that the .Iter() method can be attached -type iterFunc func(chan interface{}) +type iterFunc func(chan<- interface{}) // provide the Iterable interface func (v iterFunc) Iter() <-chan interface{} { @@ -145,26 +145,11 @@ func (v iterFunc) Iter() <-chan interface{} { } // Take returns an Iterable that contains the first n elements of iter. -func Take(iter Iterable, n int) Iterable { - return iterFunc(func(ch chan interface{}) { - defer close(ch); - if n <= 0 { - return - } - m := n; - for v := range iter.Iter() { - ch <- v; - m--; - if m == 0 { - return - } - } - }) -} +func Take(iter Iterable, n int) Iterable { return Slice(iter, 0, n) } // TakeWhile returns an Iterable that contains elements from iter while f is true. func TakeWhile(iter Iterable, f func(interface{}) bool) Iterable { - return iterFunc(func(ch chan interface{}) { + return iterFunc(func(ch chan<- interface{}) { for v := range iter.Iter() { if !f(v) { break @@ -177,7 +162,7 @@ func TakeWhile(iter Iterable, f func(interface{}) bool) Iterable { // Drop returns an Iterable that returns each element of iter after the first n elements. func Drop(iter Iterable, n int) Iterable { - return iterFunc(func(ch chan interface{}) { + return iterFunc(func(ch chan<- interface{}) { m := n; for v := range iter.Iter() { if m > 0 { @@ -192,7 +177,7 @@ func Drop(iter Iterable, n int) Iterable { // DropWhile returns an Iterable that returns each element of iter after the initial sequence for which f returns true. func DropWhile(iter Iterable, f func(interface{}) bool) Iterable { - return iterFunc(func(ch chan interface{}) { + return iterFunc(func(ch chan<- interface{}) { drop := true; for v := range iter.Iter() { if drop { @@ -206,3 +191,153 @@ func DropWhile(iter Iterable, f func(interface{}) bool) Iterable { close(ch); }) } + +// Cycle repeats the values of iter in order infinitely. +func Cycle(iter Iterable) Iterable { + return iterFunc(func(ch chan<- interface{}) { + for { + for v := range iter.Iter() { + ch <- v + } + } + }) +} + +// Chain returns an Iterable that concatentates all values from the specified Iterables. +func Chain(args []Iterable) Iterable { + return iterFunc(func(ch chan<- interface{}) { + for _, e := range args { + for v := range e.Iter() { + ch <- v + } + } + close(ch); + }) +} + +// Zip returns an Iterable of []interface{} consisting of the next element from +// each input Iterable. The length of the returned Iterable is the minimum of +// the lengths of the input Iterables. +func Zip(args []Iterable) Iterable { + return iterFunc(func(ch chan<- interface{}) { + defer close(ch); + if len(args) == 0 { + return + } + iters := make([]<-chan interface{}, len(args)); + for i := 0; i < len(iters); i++ { + iters[i] = args[i].Iter() + } + for { + out := make([]interface{}, len(args)); + for i, v := range iters { + out[i] = <-v; + if closed(v) { + return + } + } + ch <- out; + } + }) +} + +// ZipWith returns an Iterable containing the result of executing f using arguments read from a and b. +func ZipWith2(f func(c, d interface{}) interface{}, a, b Iterable) Iterable { + return Map(Zip([]Iterable{a, b}), func(a1 interface{}) interface{} { + arr := a1.([]interface{}); + return f(arr[0], arr[1]); + }) +} + +// ZipWith returns an Iterable containing the result of executing f using arguments read from a, b and c. +func ZipWith3(f func(d, e, f interface{}) interface{}, a, b, c Iterable) Iterable { + return Map(Zip([]Iterable{a, b, c}), func(a1 interface{}) interface{} { + arr := a1.([]interface{}); + return f(arr[0], arr[1], arr[2]); + }) +} + +// Slice returns an Iterable that contains the elements from iter +// with indexes in [start, stop). +func Slice(iter Iterable, start, stop int) Iterable { + return iterFunc(func(ch chan<- interface{}) { + defer close(ch); + i := 0; + for v := range iter.Iter() { + switch { + case i >= stop: + return + case i >= start: + ch <- v + } + i++; + } + }) +} + +// Repeat generates an infinite stream of v. +func Repeat(v interface{}) Iterable { + return iterFunc(func(ch chan<- interface{}) { + for { + ch <- v + } + }) +} + +// RepeatTimes generates a stream of n copies of v. +func RepeatTimes(v interface{}, n int) Iterable { + return iterFunc(func(ch chan<- interface{}) { + for i := 0; i < n; i++ { + ch <- v + } + close(ch); + }) +} + +// Group is the type for elements returned by the GroupBy function. +type Group struct { + Key interface{}; // key value for matching items + Vals Iterable; // Iterable for receiving values in the group +} + +// Key defines the interface required by the GroupBy function. +type Grouper interface { + // Return the key for the given value + Key(interface{}) interface{}; + + // Compute equality for the given keys + Equal(a, b interface{}) bool; +} + +// GroupBy combines sequences of logically identical values from iter using k +// to generate a key to compare values. Each value emitted by the returned +// Iterable is of type Group, which contains the key used for matching the +// values for the group, and an Iterable for retrieving all the values in the +// group. +func GroupBy(iter Iterable, k Grouper) Iterable { + return iterFunc(func(ch chan<- interface{}) { + var curkey interface{} + var lst *list.List; + // Basic strategy is to read one group at a time into a list prior to emitting the Group value + for v := range iter.Iter() { + kv := k.Key(v); + if lst == nil || !k.Equal(curkey, kv) { + if lst != nil { + ch <- Group{curkey, lst} + } + lst = list.New(); + curkey = kv; + } + lst.PushBack(v); + } + if lst != nil { + ch <- Group{curkey, lst} + } + close(ch); + }) +} + +// Unique removes duplicate values which occur consecutively using id to compute keys. +func Unique(iter Iterable, id Grouper) Iterable { + return Map(GroupBy(iter, id), func(v interface{}) interface{} { return v.(Group).Key }) +} diff --git a/src/pkg/exp/iterable/iterable_test.go b/src/pkg/exp/iterable/iterable_test.go index eefe222695e..242a725a925 100644 --- a/src/pkg/exp/iterable/iterable_test.go +++ b/src/pkg/exp/iterable/iterable_test.go @@ -5,6 +5,7 @@ package iterable import ( + "container/vector"; "testing"; ) @@ -28,7 +29,11 @@ func TestArrayTypes(t *testing.T) { } } -var oneToFive = IntArray{1, 2, 3, 4, 5} +var ( + oneToFive = IntArray{1, 2, 3, 4, 5}; + sixToTen = IntArray{6, 7, 8, 9, 10}; + elevenToTwenty = IntArray{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; +) func isNegative(n interface{}) bool { return n.(int) < 0 } func isPositive(n interface{}) bool { return n.(int) > 0 } @@ -182,3 +187,201 @@ func TestDropWhile(t *testing.T) { res = DropWhile(oneToFive, func(v interface{}) bool { return v.(int) > 1000 }); assertArraysAreEqual(t, Data(res), oneToFive); } + +func TestCycle(t *testing.T) { + res := Cycle(oneToFive); + exp := []int{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4}; + + // read the first nineteen values from the iterable + out := make([]interface{}, 19); + for i, it := 0, res.Iter(); i < 19; i++ { + out[i] = <-it + } + assertArraysAreEqual(t, out, exp); + + res2 := Cycle(sixToTen); + exp2 := []int{6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 6, 7, 8, 9}; + for i, it := 0, res2.Iter(); i < 19; i++ { + out[i] = <-it + } + assertArraysAreEqual(t, out, exp2); + + // ensure first iterator was not harmed + for i, it := 0, res.Iter(); i < 19; i++ { + out[i] = <-it + } + assertArraysAreEqual(t, out, exp); +} + +func TestChain(t *testing.T) { + + exp := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + res := Chain([]Iterable{oneToFive, sixToTen, elevenToTwenty}); + assertArraysAreEqual(t, Data(res), exp); + + // reusing the same iterator should produce the same result again + assertArraysAreEqual(t, Data(res), exp); + + // test short read from Chain + i := 0; + out := make([]interface{}, 4); + for v := range res.Iter() { + out[i] = v; + i++; + if i == len(out) { + break + } + } + assertArraysAreEqual(t, out, exp[0:4]); + + // test zero length array + res = Chain([]Iterable{}); + assertArraysAreEqual(t, Data(res), []int{}); +} + +func TestZipWith(t *testing.T) { + exp := []int{7, 9, 11, 13, 15}; + + // f with 2 args and 1 return value + f := func(a, b interface{}) interface{} { return a.(int) + b.(int) }; + res := ZipWith2(f, oneToFive, sixToTen); + assertArraysAreEqual(t, Data(res), exp); + + // test again to make sure returns new iter each time + assertArraysAreEqual(t, Data(res), exp); + + // test a function with 3 args + f2 := func(a, b, c interface{}) interface{} { return a.(int) + b.(int) + c.(int) }; + res = ZipWith3(f2, oneToFive, sixToTen, oneToFive); + exp = []int{8, 11, 14, 17, 20}; + assertArraysAreEqual(t, Data(res), exp); + + // test a function with multiple values returned + f3 := func(a, b interface{}) interface{} { return ([]interface{}{a.(int) + 1, b.(int) + 1}) }; + res = ZipWith2(f3, oneToFive, sixToTen); + + exp2 := [][]int{[]int{2, 7}, []int{3, 8}, []int{4, 9}, []int{5, 10}, []int{6, 11}}; + i := 0; + for v := range res.Iter() { + out := v.([]interface{}); + assertArraysAreEqual(t, out, exp2[i]); + i++; + } + + // test different length iterators--should stop after shortest is exhausted + res = ZipWith2(f, elevenToTwenty, oneToFive); + exp = []int{12, 14, 16, 18, 20}; + assertArraysAreEqual(t, Data(res), exp); +} + +func TestSlice(t *testing.T) { + out := Data(Slice(elevenToTwenty, 2, 6)); + exp := []int{13, 14, 15, 16}; + assertArraysAreEqual(t, out, exp); + + // entire iterable + out = Data(Slice(elevenToTwenty, 0, len(elevenToTwenty))); + exp = []int{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + assertArraysAreEqual(t, out, exp); + + // empty slice at offset 0 + exp = []int{}; + out = Data(Slice(elevenToTwenty, 0, 0)); + assertArraysAreEqual(t, out, exp); + + // slice upper bound exceeds length of iterable + exp = []int{1, 2, 3, 4, 5}; + out = Data(Slice(oneToFive, 0, 88)); + assertArraysAreEqual(t, out, exp); + + // slice upper bounce is lower than lower bound + exp = []int{}; + out = Data(Slice(oneToFive, 93, 4)); + assertArraysAreEqual(t, out, exp); + + // slice lower bound is greater than len of iterable + exp = []int{}; + out = Data(Slice(oneToFive, 93, 108)); + assertArraysAreEqual(t, out, exp); +} + +func TestRepeat(t *testing.T) { + res := Repeat(42); + i := 0; + for v := range res.Iter() { + if v.(int) != 42 { + t.Fatal("Repeat returned the wrong value") + } + if i == 9 { + break + } + i++; + } +} + +func TestRepeatTimes(t *testing.T) { + res := RepeatTimes(84, 9); + exp := []int{84, 84, 84, 84, 84, 84, 84, 84, 84}; + assertArraysAreEqual(t, Data(res), exp); + assertArraysAreEqual(t, Data(res), exp); // second time to ensure new iter is returned + + // 0 repeat + res = RepeatTimes(7, 0); + exp = []int{}; + assertArraysAreEqual(t, Data(res), exp); + + // negative repeat + res = RepeatTimes(7, -3); + exp = []int{}; + assertArraysAreEqual(t, Data(res), exp); +} + +// a type that implements Key for ints +type intkey struct{} + +func (v intkey) Key(a interface{}) interface{} { + return a +} +func (v intkey) Equal(a, b interface{}) bool { return a.(int) == b.(int) } + +func TestGroupBy(t *testing.T) { + in := IntArray{1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5}; + exp := [][]int{[]int{1}, []int{2, 2}, []int{3, 3, 3}, []int{4, 4, 4, 4}, []int{5, 5, 5, 5, 5}}; + i := 0; + for x := range GroupBy(in, intkey{}).Iter() { + gr := x.(Group); + if gr.Key.(int) != i+1 { + t.Fatal("group key wrong; expected", i+1, "but got", gr.Key.(int)) + } + vals := Data(gr.Vals); + assertArraysAreEqual(t, vals, exp[i]); + i++; + } + if i != 5 { + t.Fatal("did not return expected number of groups") + } + + // test 0 length Iterable + for _ = range GroupBy(IntArray([]int{}), &intkey{}).Iter() { + t.Fatal("iterator should be empty") + } + + // test case with only uniques + var out vector.Vector; + for x := range GroupBy(elevenToTwenty, intkey{}).Iter() { + out.Push(x.(Group).Key) + } + assertArraysAreEqual(t, out.Data(), elevenToTwenty); +} + +func TestUnique(t *testing.T) { + in := IntArray([]int{1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5}); + exp := []int{1, 2, 3, 4, 5}; + res := Unique(in, intkey{}); + assertArraysAreEqual(t, Data(res), exp); + assertArraysAreEqual(t, Data(res), exp); // second time to ensure new iter is returned + + // test case with only uniques + res = Unique(elevenToTwenty, intkey{}); + assertArraysAreEqual(t, Data(res), elevenToTwenty); +}