1
0
mirror of https://github.com/golang/go synced 2024-11-26 05:48:05 -07:00

slices: add sorting and comparison functions

Now that the `cmp` package exists, sorting and comparison functions from
`x/exp/slices` can be ported to the standard library, using the
`cmp.Ordered` type and the `cmp.Less` and `cmp.Compare` functions.

This move also includes adjustments to the discussions in #60091 w.r.t.
NaN handling and cmp vs. less functions, and adds Min/Max functions.
The final API is taken from
https://github.com/golang/go/issues/60091#issuecomment-1553850782

Updates #60091

Change-Id: Id7e6c88035b60d4ddd0c48dd82add8e8bc4e22d3
Reviewed-on: https://go-review.googlesource.com/c/go/+/496078
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
Run-TryBot: Eli Bendersky‎ <eliben@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Eli Bendersky 2023-05-19 10:10:21 -07:00 committed by Eli Bendersky
parent 6b7aab7750
commit 0df6812488
10 changed files with 2080 additions and 11 deletions

13
api/next/60091.txt Normal file
View File

@ -0,0 +1,13 @@
pkg slices, func BinarySearchFunc[$0 interface{}, $1 interface{}]([]$0, $1, func($0, $1) int) (int, bool) #60091
pkg slices, func BinarySearch[$0 cmp.Ordered]([]$0, $0) (int, bool) #60091
pkg slices, func CompareFunc[$0 interface{}, $1 interface{}]([]$0, []$1, func($0, $1) int) int #60091
pkg slices, func Compare[$0 cmp.Ordered]([]$0, []$0) int #60091
pkg slices, func IsSortedFunc[$0 interface{}]([]$0, func($0, $0) int) bool #60091
pkg slices, func IsSorted[$0 cmp.Ordered]([]$0) bool #60091
pkg slices, func MaxFunc[$0 interface{}]([]$0, func($0, $0) int) $0 #60091
pkg slices, func Max[$0 cmp.Ordered]([]$0) $0 #60091
pkg slices, func MinFunc[$0 interface{}]([]$0, func($0, $0) int) $0 #60091
pkg slices, func Min[$0 cmp.Ordered]([]$0) $0 #60091
pkg slices, func SortFunc[$0 interface{}]([]$0, func($0, $0) int) #60091
pkg slices, func SortStableFunc[$0 interface{}]([]$0, func($0, $0) int) #60091
pkg slices, func Sort[$0 cmp.Ordered]([]$0) #60091

View File

@ -49,10 +49,6 @@ var depsRules = `
unicode/utf8, unicode/utf16, unicode,
unsafe;
# slices depends on unsafe for overlapping check.
unsafe
< slices;
# These packages depend only on internal/goarch and unsafe.
internal/goarch, unsafe
< internal/abi;
@ -227,6 +223,11 @@ var depsRules = `
< hash
< hash/adler32, hash/crc32, hash/crc64, hash/fnv;
# slices depends on unsafe for overlapping check, cmp for comparison
# semantics, and math/bits for # calculating bitlength of numbers.
unsafe, cmp, math/bits
< slices;
# math/big
FMT, encoding/binary, math/rand
< math/big;

View File

@ -6,6 +6,7 @@
package slices
import (
"cmp"
"unsafe"
)
@ -44,6 +45,50 @@ func EqualFunc[E1, E2 any](s1 []E1, s2 []E2, eq func(E1, E2) bool) bool {
return true
}
// Compare compares the elements of s1 and s2, using [cmp.Compare] on each pair
// of elements. The elements are compared sequentially, starting at index 0,
// until one element is not equal to the other.
// The result of comparing the first non-matching elements is returned.
// If both slices are equal until one of them ends, the shorter slice is
// considered less than the longer one.
// The result is 0 if s1 == s2, -1 if s1 < s2, and +1 if s1 > s2.
func Compare[E cmp.Ordered](s1, s2 []E) int {
for i, v1 := range s1 {
if i >= len(s2) {
return +1
}
v2 := s2[i]
if c := cmp.Compare(v1, v2); c != 0 {
return c
}
}
if len(s1) < len(s2) {
return -1
}
return 0
}
// CompareFunc is like Compare but uses a custom comparison function on each
// pair of elements.
// The result is the first non-zero result of cmp; if cmp always
// returns 0 the result is 0 if len(s1) == len(s2), -1 if len(s1) < len(s2),
// and +1 if len(s1) > len(s2).
func CompareFunc[E1, E2 any](s1 []E1, s2 []E2, cmp func(E1, E2) int) int {
for i, v1 := range s1 {
if i >= len(s2) {
return +1
}
v2 := s2[i]
if c := cmp(v1, v2); c != 0 {
return c
}
}
if len(s1) < len(s2) {
return -1
}
return 0
}
// Index returns the index of the first occurrence of v in s,
// or -1 if not present.
func Index[E comparable](s []E, v E) int {

View File

@ -5,6 +5,7 @@
package slices
import (
"cmp"
"internal/race"
"internal/testenv"
"math"
@ -134,6 +135,213 @@ func BenchmarkEqualFunc_Large(b *testing.B) {
}
}
var compareIntTests = []struct {
s1, s2 []int
want int
}{
{
[]int{1},
[]int{1},
0,
},
{
[]int{1},
[]int{},
1,
},
{
[]int{},
[]int{1},
-1,
},
{
[]int{},
[]int{},
0,
},
{
[]int{1, 2, 3},
[]int{1, 2, 3},
0,
},
{
[]int{1, 2, 3},
[]int{1, 2, 3, 4},
-1,
},
{
[]int{1, 2, 3, 4},
[]int{1, 2, 3},
+1,
},
{
[]int{1, 2, 3},
[]int{1, 4, 3},
-1,
},
{
[]int{1, 4, 3},
[]int{1, 2, 3},
+1,
},
{
[]int{1, 4, 3},
[]int{1, 2, 3, 8, 9},
+1,
},
}
var compareFloatTests = []struct {
s1, s2 []float64
want int
}{
{
[]float64{},
[]float64{},
0,
},
{
[]float64{1},
[]float64{1},
0,
},
{
[]float64{math.NaN()},
[]float64{math.NaN()},
0,
},
{
[]float64{1, 2, math.NaN()},
[]float64{1, 2, math.NaN()},
0,
},
{
[]float64{1, math.NaN(), 3},
[]float64{1, math.NaN(), 4},
-1,
},
{
[]float64{1, math.NaN(), 3},
[]float64{1, 2, 4},
-1,
},
{
[]float64{1, math.NaN(), 3},
[]float64{1, 2, math.NaN()},
-1,
},
{
[]float64{1, 2, 3},
[]float64{1, 2, math.NaN()},
+1,
},
{
[]float64{1, 2, 3},
[]float64{1, math.NaN(), 3},
+1,
},
{
[]float64{1, math.NaN(), 3, 4},
[]float64{1, 2, math.NaN()},
-1,
},
}
func TestCompare(t *testing.T) {
intWant := func(want bool) string {
if want {
return "0"
}
return "!= 0"
}
for _, test := range equalIntTests {
if got := Compare(test.s1, test.s2); (got == 0) != test.want {
t.Errorf("Compare(%v, %v) = %d, want %s", test.s1, test.s2, got, intWant(test.want))
}
}
for _, test := range equalFloatTests {
if got := Compare(test.s1, test.s2); (got == 0) != test.wantEqualNaN {
t.Errorf("Compare(%v, %v) = %d, want %s", test.s1, test.s2, got, intWant(test.wantEqualNaN))
}
}
for _, test := range compareIntTests {
if got := Compare(test.s1, test.s2); got != test.want {
t.Errorf("Compare(%v, %v) = %d, want %d", test.s1, test.s2, got, test.want)
}
}
for _, test := range compareFloatTests {
if got := Compare(test.s1, test.s2); got != test.want {
t.Errorf("Compare(%v, %v) = %d, want %d", test.s1, test.s2, got, test.want)
}
}
}
func equalToCmp[T comparable](eq func(T, T) bool) func(T, T) int {
return func(v1, v2 T) int {
if eq(v1, v2) {
return 0
}
return 1
}
}
func TestCompareFunc(t *testing.T) {
intWant := func(want bool) string {
if want {
return "0"
}
return "!= 0"
}
for _, test := range equalIntTests {
if got := CompareFunc(test.s1, test.s2, equalToCmp(equal[int])); (got == 0) != test.want {
t.Errorf("CompareFunc(%v, %v, equalToCmp(equal[int])) = %d, want %s", test.s1, test.s2, got, intWant(test.want))
}
}
for _, test := range equalFloatTests {
if got := CompareFunc(test.s1, test.s2, equalToCmp(equal[float64])); (got == 0) != test.wantEqual {
t.Errorf("CompareFunc(%v, %v, equalToCmp(equal[float64])) = %d, want %s", test.s1, test.s2, got, intWant(test.wantEqual))
}
}
for _, test := range compareIntTests {
if got := CompareFunc(test.s1, test.s2, cmp.Compare[int]); got != test.want {
t.Errorf("CompareFunc(%v, %v, cmp[int]) = %d, want %d", test.s1, test.s2, got, test.want)
}
}
for _, test := range compareFloatTests {
if got := CompareFunc(test.s1, test.s2, cmp.Compare[float64]); got != test.want {
t.Errorf("CompareFunc(%v, %v, cmp[float64]) = %d, want %d", test.s1, test.s2, got, test.want)
}
}
s1 := []int{1, 2, 3}
s2 := []int{2, 3, 4}
if got := CompareFunc(s1, s2, equalToCmp(offByOne)); got != 0 {
t.Errorf("CompareFunc(%v, %v, offByOne) = %d, want 0", s1, s2, got)
}
s3 := []string{"a", "b", "c"}
s4 := []string{"A", "B", "C"}
if got := CompareFunc(s3, s4, strings.Compare); got != 1 {
t.Errorf("CompareFunc(%v, %v, strings.Compare) = %d, want 1", s3, s4, got)
}
compareLower := func(v1, v2 string) int {
return strings.Compare(strings.ToLower(v1), strings.ToLower(v2))
}
if got := CompareFunc(s3, s4, compareLower); got != 0 {
t.Errorf("CompareFunc(%v, %v, compareLower) = %d, want 0", s3, s4, got)
}
cmpIntString := func(v1 int, v2 string) int {
return strings.Compare(string(rune(v1)-1+'a'), v2)
}
if got := CompareFunc(s1, s3, cmpIntString); got != 0 {
t.Errorf("CompareFunc(%v, %v, cmpIntString) = %d, want 0", s1, s3, got)
}
}
var indexTests = []struct {
s []int
v int

190
src/slices/sort.go Normal file
View File

@ -0,0 +1,190 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import (
"cmp"
"math/bits"
)
// Sort sorts a slice of any ordered type in ascending order.
// When sorting floating-point numbers, NaNs are ordered before other values.
func Sort[E cmp.Ordered](x []E) {
n := len(x)
pdqsortOrdered(x, 0, n, bits.Len(uint(n)))
}
// SortFunc sorts the slice x in ascending order as determined by the cmp
// function. This sort is not guaranteed to be stable.
// cmp(a, b) should return a negative number when a < b, a positive number when
// a > b and zero when a == b.
//
// SortFunc requires that cmp is a strict weak ordering.
// See https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings.
func SortFunc[E any](x []E, cmp func(a, b E) int) {
n := len(x)
pdqsortCmpFunc(x, 0, n, bits.Len(uint(n)), cmp)
}
// SortStableFunc sorts the slice x while keeping the original order of equal
// elements, using cmp to compare elements.
func SortStableFunc[E any](x []E, cmp func(a, b E) int) {
stableCmpFunc(x, len(x), cmp)
}
// IsSorted reports whether x is sorted in ascending order.
func IsSorted[E cmp.Ordered](x []E) bool {
for i := len(x) - 1; i > 0; i-- {
if cmp.Less(x[i], x[i-1]) {
return false
}
}
return true
}
// IsSortedFunc reports whether x is sorted in ascending order, with cmp as the
// comparison function.
func IsSortedFunc[E any](x []E, cmp func(a, b E) int) bool {
for i := len(x) - 1; i > 0; i-- {
if cmp(x[i], x[i-1]) < 0 {
return false
}
}
return true
}
// Min returns the minimal value in x. It panics if x is empty.
// For floating-point numbers, Min propagates NaNs (any NaN value in x
// forces the output to be NaN).
func Min[E cmp.Ordered](x []E) E {
if len(x) < 1 {
panic("slices.Min: empty list")
}
m := x[0]
for i := 1; i < len(x); i++ {
m = min(m, x[i])
}
return m
}
// MinFunc returns the minimal value in x, using cmp to compare elements.
// It panics if x is empty.
func MinFunc[E any](x []E, cmp func(a, b E) int) E {
if len(x) < 1 {
panic("slices.MinFunc: empty list")
}
m := x[0]
for i := 1; i < len(x); i++ {
if cmp(x[i], m) < 0 {
m = x[i]
}
}
return m
}
// Max returns the maximal value in x. It panics if x is empty.
// For floating-point E, Max propagates NaNs (any NaN value in x
// forces the output to be NaN).
func Max[E cmp.Ordered](x []E) E {
if len(x) < 1 {
panic("slices.Max: empty list")
}
m := x[0]
for i := 1; i < len(x); i++ {
m = max(m, x[i])
}
return m
}
// MaxFunc returns the maximal value in x, using cmp to compare elements.
// It panics if x is empty.
func MaxFunc[E any](x []E, cmp func(a, b E) int) E {
if len(x) < 1 {
panic("slices.MaxFunc: empty list")
}
m := x[0]
for i := 1; i < len(x); i++ {
if cmp(x[i], m) > 0 {
m = x[i]
}
}
return m
}
// BinarySearch searches for target in a sorted slice and returns the position
// where target is found, or the position where target would appear in the
// sort order; it also returns a bool saying whether the target is really found
// in the slice. The slice must be sorted in increasing order.
func BinarySearch[E cmp.Ordered](x []E, target E) (int, bool) {
// Inlining is faster than calling BinarySearchFunc with a lambda.
n := len(x)
// Define x[-1] < target and x[n] >= target.
// Invariant: x[i-1] < target, x[j] >= target.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if cmp.Less(x[h], target) {
i = h + 1 // preserves x[i-1] < target
} else {
j = h // preserves x[j] >= target
}
}
// i == j, x[i-1] < target, and x[j] (= x[i]) >= target => answer is i.
return i, i < n && (x[i] == target || (isNaN(x[i]) && isNaN(target)))
}
// BinarySearchFunc works like BinarySearch, but uses a custom comparison
// function. The slice must be sorted in increasing order, where "increasing"
// is defined by cmp. cmp should return 0 if the slice element matches
// the target, a negative number if the slice element precedes the target,
// or a positive number if the slice element follows the target.
// cmp must implement the same ordering as the slice, such that if
// cmp(a, t) < 0 and cmp(b, t) >= 0, then a must precede b in the slice.
func BinarySearchFunc[E, T any](x []E, target T, cmp func(E, T) int) (int, bool) {
n := len(x)
// Define cmp(x[-1], target) < 0 and cmp(x[n], target) >= 0 .
// Invariant: cmp(x[i - 1], target) < 0, cmp(x[j], target) >= 0.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if cmp(x[h], target) < 0 {
i = h + 1 // preserves cmp(x[i - 1], target) < 0
} else {
j = h // preserves cmp(x[j], target) >= 0
}
}
// i == j, cmp(x[i-1], target) < 0, and cmp(x[j], target) (= cmp(x[i], target)) >= 0 => answer is i.
return i, i < n && cmp(x[i], target) == 0
}
type sortedHint int // hint for pdqsort when choosing the pivot
const (
unknownHint sortedHint = iota
increasingHint
decreasingHint
)
// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
type xorshift uint64
func (r *xorshift) Next() uint64 {
*r ^= *r << 13
*r ^= *r >> 17
*r ^= *r << 5
return uint64(*r)
}
func nextPowerOfTwo(length int) uint {
return 1 << bits.Len(uint(length))
}
// isNaN reports whether x is a NaN without requiring the math package.
// This will always return false if T is not floating-point.
func isNaN[T cmp.Ordered](x T) bool {
return x != x
}

View File

@ -0,0 +1,238 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import (
"fmt"
"math/rand"
"sort"
"strings"
"testing"
)
// These benchmarks compare sorting a large slice of int with sort.Ints vs.
// slices.Sort
func makeRandomInts(n int) []int {
rand.Seed(42)
ints := make([]int, n)
for i := 0; i < n; i++ {
ints[i] = rand.Intn(n)
}
return ints
}
func makeSortedInts(n int) []int {
ints := make([]int, n)
for i := 0; i < n; i++ {
ints[i] = i
}
return ints
}
func makeReversedInts(n int) []int {
ints := make([]int, n)
for i := 0; i < n; i++ {
ints[i] = n - i
}
return ints
}
const N = 100_000
func BenchmarkSortInts(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeRandomInts(N)
b.StartTimer()
sort.Ints(ints)
}
}
func BenchmarkSlicesSortInts(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeRandomInts(N)
b.StartTimer()
Sort(ints)
}
}
func BenchmarkSlicesSortInts_Sorted(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeSortedInts(N)
b.StartTimer()
Sort(ints)
}
}
func BenchmarkSlicesSortInts_Reversed(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeReversedInts(N)
b.StartTimer()
Sort(ints)
}
}
// Since we're benchmarking these sorts against each other, make sure that they
// generate similar results.
func TestIntSorts(t *testing.T) {
ints := makeRandomInts(200)
ints2 := Clone(ints)
sort.Ints(ints)
Sort(ints2)
for i := range ints {
if ints[i] != ints2[i] {
t.Fatalf("ints2 mismatch at %d; %d != %d", i, ints[i], ints2[i])
}
}
}
// The following is a benchmark for sorting strings.
// makeRandomStrings generates n random strings with alphabetic runes of
// varying lengths.
func makeRandomStrings(n int) []string {
rand.Seed(42)
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
ss := make([]string, n)
for i := 0; i < n; i++ {
var sb strings.Builder
slen := 2 + rand.Intn(50)
for j := 0; j < slen; j++ {
sb.WriteRune(letters[rand.Intn(len(letters))])
}
ss[i] = sb.String()
}
return ss
}
func TestStringSorts(t *testing.T) {
ss := makeRandomStrings(200)
ss2 := Clone(ss)
sort.Strings(ss)
Sort(ss2)
for i := range ss {
if ss[i] != ss2[i] {
t.Fatalf("ss2 mismatch at %d; %s != %s", i, ss[i], ss2[i])
}
}
}
func BenchmarkSortStrings(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStrings(N)
b.StartTimer()
sort.Strings(ss)
}
}
func BenchmarkSlicesSortStrings(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStrings(N)
b.StartTimer()
Sort(ss)
}
}
// These benchmarks compare sorting a slice of structs with sort.Sort vs.
// slices.SortFunc.
type myStruct struct {
a, b, c, d string
n int
}
type myStructs []*myStruct
func (s myStructs) Len() int { return len(s) }
func (s myStructs) Less(i, j int) bool { return s[i].n < s[j].n }
func (s myStructs) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func makeRandomStructs(n int) myStructs {
rand.Seed(42)
structs := make([]*myStruct, n)
for i := 0; i < n; i++ {
structs[i] = &myStruct{n: rand.Intn(n)}
}
return structs
}
func TestStructSorts(t *testing.T) {
ss := makeRandomStructs(200)
ss2 := make([]*myStruct, len(ss))
for i := range ss {
ss2[i] = &myStruct{n: ss[i].n}
}
sort.Sort(ss)
SortFunc(ss2, func(a, b *myStruct) int { return a.n - b.n })
for i := range ss {
if *ss[i] != *ss2[i] {
t.Fatalf("ints2 mismatch at %d; %v != %v", i, *ss[i], *ss2[i])
}
}
}
func BenchmarkSortStructs(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStructs(N)
b.StartTimer()
sort.Sort(ss)
}
}
func BenchmarkSortFuncStructs(b *testing.B) {
cmpFunc := func(a, b *myStruct) int { return a.n - b.n }
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStructs(N)
b.StartTimer()
SortFunc(ss, cmpFunc)
}
}
func BenchmarkBinarySearchFloats(b *testing.B) {
for _, size := range []int{16, 32, 64, 128, 512, 1024} {
b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
floats := make([]float64, size)
for i := range floats {
floats[i] = float64(i)
}
midpoint := len(floats) / 2
needle := (floats[midpoint] + floats[midpoint+1]) / 2
b.ResetTimer()
for i := 0; i < b.N; i++ {
BinarySearch(floats, needle)
}
})
}
}
func BenchmarkBinarySearchFuncStruct(b *testing.B) {
for _, size := range []int{16, 32, 64, 128, 512, 1024} {
b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
structs := make([]*myStruct, size)
for i := range structs {
structs[i] = &myStruct{n: i}
}
midpoint := len(structs) / 2
needle := &myStruct{n: (structs[midpoint].n + structs[midpoint+1].n) / 2}
lessFunc := func(a, b *myStruct) int { return a.n - b.n }
b.ResetTimer()
for i := 0; i < b.N; i++ {
BinarySearchFunc(structs, needle, lessFunc)
}
})
}
}

414
src/slices/sort_test.go Normal file
View File

@ -0,0 +1,414 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import (
"cmp"
"fmt"
"math"
"math/rand"
"sort"
"strconv"
"strings"
"testing"
)
var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586}
var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8, 74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3}
var float64sWithNaNs = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8}
var strs = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"}
func TestSortIntSlice(t *testing.T) {
data := Clone(ints[:])
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", ints)
t.Errorf(" got %v", data)
}
}
func TestSortFuncIntSlice(t *testing.T) {
data := Clone(ints[:])
SortFunc(data, func(a, b int) int { return a - b })
if !IsSorted(data) {
t.Errorf("sorted %v", ints)
t.Errorf(" got %v", data)
}
}
func TestSortFloat64Slice(t *testing.T) {
data := Clone(float64s[:])
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", float64s)
t.Errorf(" got %v", data)
}
}
func TestSortFloat64SliceWithNaNs(t *testing.T) {
data := float64sWithNaNs[:]
data2 := Clone(data)
Sort(data)
sort.Float64s(data2)
if !IsSorted(data) {
t.Error("IsSorted indicates data isn't sorted")
}
// Compare for equality using cmp.Compare, which considers NaNs equal.
if !EqualFunc(data, data2, func(a, b float64) bool { return cmp.Compare(a, b) == 0 }) {
t.Errorf("mismatch between Sort and sort.Float64: got %v, want %v", data, data2)
}
}
func TestSortStringSlice(t *testing.T) {
data := Clone(strs[:])
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", strs)
t.Errorf(" got %v", data)
}
}
func TestSortLarge_Random(t *testing.T) {
n := 1000000
if testing.Short() {
n /= 100
}
data := make([]int, n)
for i := 0; i < len(data); i++ {
data[i] = rand.Intn(100)
}
if IsSorted(data) {
t.Fatalf("terrible rand.rand")
}
Sort(data)
if !IsSorted(data) {
t.Errorf("sort didn't sort - 1M ints")
}
}
type intPair struct {
a, b int
}
type intPairs []intPair
// Pairs compare on a only.
func intPairCmp(x, y intPair) int {
return x.a - y.a
}
// Record initial order in B.
func (d intPairs) initB() {
for i := range d {
d[i].b = i
}
}
// InOrder checks if a-equal elements were not reordered.
func (d intPairs) inOrder() bool {
lastA, lastB := -1, 0
for i := 0; i < len(d); i++ {
if lastA != d[i].a {
lastA = d[i].a
lastB = d[i].b
continue
}
if d[i].b <= lastB {
return false
}
lastB = d[i].b
}
return true
}
func TestStability(t *testing.T) {
n, m := 100000, 1000
if testing.Short() {
n, m = 1000, 100
}
data := make(intPairs, n)
// random distribution
for i := 0; i < len(data); i++ {
data[i].a = rand.Intn(m)
}
if IsSortedFunc(data, intPairCmp) {
t.Fatalf("terrible rand.rand")
}
data.initB()
SortStableFunc(data, intPairCmp)
if !IsSortedFunc(data, intPairCmp) {
t.Errorf("Stable didn't sort %d ints", n)
}
if !data.inOrder() {
t.Errorf("Stable wasn't stable on %d ints", n)
}
// already sorted
data.initB()
SortStableFunc(data, intPairCmp)
if !IsSortedFunc(data, intPairCmp) {
t.Errorf("Stable shuffled sorted %d ints (order)", n)
}
if !data.inOrder() {
t.Errorf("Stable shuffled sorted %d ints (stability)", n)
}
// sorted reversed
for i := 0; i < len(data); i++ {
data[i].a = len(data) - i
}
data.initB()
SortStableFunc(data, intPairCmp)
if !IsSortedFunc(data, intPairCmp) {
t.Errorf("Stable didn't sort %d ints", n)
}
if !data.inOrder() {
t.Errorf("Stable wasn't stable on %d ints", n)
}
}
func TestMinMax(t *testing.T) {
intCmp := func(a, b int) int { return a - b }
tests := []struct {
data []int
wantMin int
wantMax int
}{
{[]int{7}, 7, 7},
{[]int{1, 2}, 1, 2},
{[]int{2, 1}, 1, 2},
{[]int{1, 2, 3}, 1, 3},
{[]int{3, 2, 1}, 1, 3},
{[]int{2, 1, 3}, 1, 3},
{[]int{2, 2, 3}, 2, 3},
{[]int{3, 2, 3}, 2, 3},
{[]int{0, 2, -9}, -9, 2},
}
for _, tt := range tests {
t.Run(fmt.Sprintf("%v", tt.data), func(t *testing.T) {
gotMin := Min(tt.data)
if gotMin != tt.wantMin {
t.Errorf("Min got %v, want %v", gotMin, tt.wantMin)
}
gotMinFunc := MinFunc(tt.data, intCmp)
if gotMinFunc != tt.wantMin {
t.Errorf("MinFunc got %v, want %v", gotMinFunc, tt.wantMin)
}
gotMax := Max(tt.data)
if gotMax != tt.wantMax {
t.Errorf("Max got %v, want %v", gotMax, tt.wantMax)
}
gotMaxFunc := MaxFunc(tt.data, intCmp)
if gotMaxFunc != tt.wantMax {
t.Errorf("MaxFunc got %v, want %v", gotMaxFunc, tt.wantMax)
}
})
}
}
func TestMinMaxNaNs(t *testing.T) {
fs := []float64{1.0, 999.9, 3.14, -400.4, -5.14}
if Min(fs) != -400.4 {
t.Errorf("got min %v, want -400.4", Min(fs))
}
if Max(fs) != 999.9 {
t.Errorf("got max %v, want 999.9", Max(fs))
}
// No matter which element of fs is replaced with a NaN, both Min and Max
// should propagate the NaN to their output.
for i := 0; i < len(fs); i++ {
testfs := Clone(fs)
testfs[i] = math.NaN()
fmin := Min(testfs)
if !math.IsNaN(fmin) {
t.Errorf("got min %v, want NaN", fmin)
}
fmax := Max(testfs)
if !math.IsNaN(fmax) {
t.Errorf("got max %v, want NaN", fmax)
}
}
}
func TestMinMaxPanics(t *testing.T) {
intCmp := func(a, b int) int { return a - b }
emptySlice := []int{}
if !panics(func() { Min(emptySlice) }) {
t.Errorf("Min([]): got no panic, want panic")
}
if !panics(func() { Max(emptySlice) }) {
t.Errorf("Max([]): got no panic, want panic")
}
if !panics(func() { MinFunc(emptySlice, intCmp) }) {
t.Errorf("MinFunc([]): got no panic, want panic")
}
if !panics(func() { MaxFunc(emptySlice, intCmp) }) {
t.Errorf("MaxFunc([]): got no panic, want panic")
}
}
func TestBinarySearch(t *testing.T) {
str1 := []string{"foo"}
str2 := []string{"ab", "ca"}
str3 := []string{"mo", "qo", "vo"}
str4 := []string{"ab", "ad", "ca", "xy"}
// slice with repeating elements
strRepeats := []string{"ba", "ca", "da", "da", "da", "ka", "ma", "ma", "ta"}
// slice with all element equal
strSame := []string{"xx", "xx", "xx"}
tests := []struct {
data []string
target string
wantPos int
wantFound bool
}{
{[]string{}, "foo", 0, false},
{[]string{}, "", 0, false},
{str1, "foo", 0, true},
{str1, "bar", 0, false},
{str1, "zx", 1, false},
{str2, "aa", 0, false},
{str2, "ab", 0, true},
{str2, "ad", 1, false},
{str2, "ca", 1, true},
{str2, "ra", 2, false},
{str3, "bb", 0, false},
{str3, "mo", 0, true},
{str3, "nb", 1, false},
{str3, "qo", 1, true},
{str3, "tr", 2, false},
{str3, "vo", 2, true},
{str3, "xr", 3, false},
{str4, "aa", 0, false},
{str4, "ab", 0, true},
{str4, "ac", 1, false},
{str4, "ad", 1, true},
{str4, "ax", 2, false},
{str4, "ca", 2, true},
{str4, "cc", 3, false},
{str4, "dd", 3, false},
{str4, "xy", 3, true},
{str4, "zz", 4, false},
{strRepeats, "da", 2, true},
{strRepeats, "db", 5, false},
{strRepeats, "ma", 6, true},
{strRepeats, "mb", 8, false},
{strSame, "xx", 0, true},
{strSame, "ab", 0, false},
{strSame, "zz", 3, false},
}
for _, tt := range tests {
t.Run(tt.target, func(t *testing.T) {
{
pos, found := BinarySearch(tt.data, tt.target)
if pos != tt.wantPos || found != tt.wantFound {
t.Errorf("BinarySearch got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound)
}
}
{
pos, found := BinarySearchFunc(tt.data, tt.target, strings.Compare)
if pos != tt.wantPos || found != tt.wantFound {
t.Errorf("BinarySearchFunc got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound)
}
}
})
}
}
func TestBinarySearchInts(t *testing.T) {
data := []int{20, 30, 40, 50, 60, 70, 80, 90}
tests := []struct {
target int
wantPos int
wantFound bool
}{
{20, 0, true},
{23, 1, false},
{43, 3, false},
{80, 6, true},
}
for _, tt := range tests {
t.Run(strconv.Itoa(tt.target), func(t *testing.T) {
{
pos, found := BinarySearch(data, tt.target)
if pos != tt.wantPos || found != tt.wantFound {
t.Errorf("BinarySearch got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound)
}
}
{
cmp := func(a, b int) int {
return a - b
}
pos, found := BinarySearchFunc(data, tt.target, cmp)
if pos != tt.wantPos || found != tt.wantFound {
t.Errorf("BinarySearchFunc got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound)
}
}
})
}
}
func TestBinarySearchFloats(t *testing.T) {
data := []float64{math.NaN(), -0.25, 0.0, 1.4}
tests := []struct {
target float64
wantPos int
wantFound bool
}{
{math.NaN(), 0, true},
{math.Inf(-1), 1, false},
{-0.25, 1, true},
{0.0, 2, true},
{1.4, 3, true},
{1.5, 4, false},
}
for _, tt := range tests {
t.Run(fmt.Sprintf("%v", tt.target), func(t *testing.T) {
{
pos, found := BinarySearch(data, tt.target)
if pos != tt.wantPos || found != tt.wantFound {
t.Errorf("BinarySearch got (%v, %v), want (%v, %v)", pos, found, tt.wantPos, tt.wantFound)
}
}
})
}
}
func TestBinarySearchFunc(t *testing.T) {
data := []int{1, 10, 11, 2} // sorted lexicographically
cmp := func(a int, b string) int {
return strings.Compare(strconv.Itoa(a), b)
}
pos, found := BinarySearchFunc(data, "2", cmp)
if pos != 3 || !found {
t.Errorf("BinarySearchFunc(%v, %q, cmp) = %v, %v, want %v, %v", data, "2", pos, found, 3, true)
}
}

479
src/slices/zsortanyfunc.go Normal file
View File

@ -0,0 +1,479 @@
// Code generated by gen_sort_variants.go; DO NOT EDIT.
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
// insertionSortCmpFunc sorts data[a:b] using insertion sort.
func insertionSortCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (cmp(data[j], data[j-1]) < 0); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// siftDownCmpFunc implements the heap property on data[lo:hi].
// first is an offset into the array where the root of the heap lies.
func siftDownCmpFunc[E any](data []E, lo, hi, first int, cmp func(a, b E) int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && (cmp(data[first+child], data[first+child+1]) < 0) {
child++
}
if !(cmp(data[first+root], data[first+child]) < 0) {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func heapSortCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDownCmpFunc(data, i, hi, first, cmp)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDownCmpFunc(data, lo, i, first, cmp)
}
}
// pdqsortCmpFunc sorts data[a:b].
// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
// C++ implementation: https://github.com/orlp/pdqsort
// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
func pdqsortCmpFunc[E any](data []E, a, b, limit int, cmp func(a, b E) int) {
const maxInsertion = 12
var (
wasBalanced = true // whether the last partitioning was reasonably balanced
wasPartitioned = true // whether the slice was already partitioned
)
for {
length := b - a
if length <= maxInsertion {
insertionSortCmpFunc(data, a, b, cmp)
return
}
// Fall back to heapsort if too many bad choices were made.
if limit == 0 {
heapSortCmpFunc(data, a, b, cmp)
return
}
// If the last partitioning was imbalanced, we need to breaking patterns.
if !wasBalanced {
breakPatternsCmpFunc(data, a, b, cmp)
limit--
}
pivot, hint := choosePivotCmpFunc(data, a, b, cmp)
if hint == decreasingHint {
reverseRangeCmpFunc(data, a, b, cmp)
// The chosen pivot was pivot-a elements after the start of the array.
// After reversing it is pivot-a elements before the end of the array.
// The idea came from Rust's implementation.
pivot = (b - 1) - (pivot - a)
hint = increasingHint
}
// The slice is likely already sorted.
if wasBalanced && wasPartitioned && hint == increasingHint {
if partialInsertionSortCmpFunc(data, a, b, cmp) {
return
}
}
// Probably the slice contains many duplicate elements, partition the slice into
// elements equal to and elements greater than the pivot.
if a > 0 && !(cmp(data[a-1], data[pivot]) < 0) {
mid := partitionEqualCmpFunc(data, a, b, pivot, cmp)
a = mid
continue
}
mid, alreadyPartitioned := partitionCmpFunc(data, a, b, pivot, cmp)
wasPartitioned = alreadyPartitioned
leftLen, rightLen := mid-a, b-mid
balanceThreshold := length / 8
if leftLen < rightLen {
wasBalanced = leftLen >= balanceThreshold
pdqsortCmpFunc(data, a, mid, limit, cmp)
a = mid + 1
} else {
wasBalanced = rightLen >= balanceThreshold
pdqsortCmpFunc(data, mid+1, b, limit, cmp)
b = mid
}
}
}
// partitionCmpFunc does one quicksort partition.
// Let p = data[pivot]
// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
// On return, data[newpivot] = p
func partitionCmpFunc[E any](data []E, a, b, pivot int, cmp func(a, b E) int) (newpivot int, alreadyPartitioned bool) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for i <= j && (cmp(data[i], data[a]) < 0) {
i++
}
for i <= j && !(cmp(data[j], data[a]) < 0) {
j--
}
if i > j {
data[j], data[a] = data[a], data[j]
return j, true
}
data[i], data[j] = data[j], data[i]
i++
j--
for {
for i <= j && (cmp(data[i], data[a]) < 0) {
i++
}
for i <= j && !(cmp(data[j], data[a]) < 0) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
data[j], data[a] = data[a], data[j]
return j, false
}
// partitionEqualCmpFunc partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
func partitionEqualCmpFunc[E any](data []E, a, b, pivot int, cmp func(a, b E) int) (newpivot int) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for {
for i <= j && !(cmp(data[a], data[i]) < 0) {
i++
}
for i <= j && (cmp(data[a], data[j]) < 0) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
return i
}
// partialInsertionSortCmpFunc partially sorts a slice, returns true if the slice is sorted at the end.
func partialInsertionSortCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) bool {
const (
maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
shortestShifting = 50 // don't shift any elements on short arrays
)
i := a + 1
for j := 0; j < maxSteps; j++ {
for i < b && !(cmp(data[i], data[i-1]) < 0) {
i++
}
if i == b {
return true
}
if b-a < shortestShifting {
return false
}
data[i], data[i-1] = data[i-1], data[i]
// Shift the smaller one to the left.
if i-a >= 2 {
for j := i - 1; j >= 1; j-- {
if !(cmp(data[j], data[j-1]) < 0) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
// Shift the greater one to the right.
if b-i >= 2 {
for j := i + 1; j < b; j++ {
if !(cmp(data[j], data[j-1]) < 0) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
}
return false
}
// breakPatternsCmpFunc scatters some elements around in an attempt to break some patterns
// that might cause imbalanced partitions in quicksort.
func breakPatternsCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) {
length := b - a
if length >= 8 {
random := xorshift(length)
modulus := nextPowerOfTwo(length)
for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
other := int(uint(random.Next()) & (modulus - 1))
if other >= length {
other -= length
}
data[idx], data[a+other] = data[a+other], data[idx]
}
}
}
// choosePivotCmpFunc chooses a pivot in data[a:b].
//
// [0,8): chooses a static pivot.
// [8,shortestNinther): uses the simple median-of-three method.
// [shortestNinther,∞): uses the Tukey ninther method.
func choosePivotCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) (pivot int, hint sortedHint) {
const (
shortestNinther = 50
maxSwaps = 4 * 3
)
l := b - a
var (
swaps int
i = a + l/4*1
j = a + l/4*2
k = a + l/4*3
)
if l >= 8 {
if l >= shortestNinther {
// Tukey ninther method, the idea came from Rust's implementation.
i = medianAdjacentCmpFunc(data, i, &swaps, cmp)
j = medianAdjacentCmpFunc(data, j, &swaps, cmp)
k = medianAdjacentCmpFunc(data, k, &swaps, cmp)
}
// Find the median among i, j, k and stores it into j.
j = medianCmpFunc(data, i, j, k, &swaps, cmp)
}
switch swaps {
case 0:
return j, increasingHint
case maxSwaps:
return j, decreasingHint
default:
return j, unknownHint
}
}
// order2CmpFunc returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
func order2CmpFunc[E any](data []E, a, b int, swaps *int, cmp func(a, b E) int) (int, int) {
if cmp(data[b], data[a]) < 0 {
*swaps++
return b, a
}
return a, b
}
// medianCmpFunc returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
func medianCmpFunc[E any](data []E, a, b, c int, swaps *int, cmp func(a, b E) int) int {
a, b = order2CmpFunc(data, a, b, swaps, cmp)
b, c = order2CmpFunc(data, b, c, swaps, cmp)
a, b = order2CmpFunc(data, a, b, swaps, cmp)
return b
}
// medianAdjacentCmpFunc finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
func medianAdjacentCmpFunc[E any](data []E, a int, swaps *int, cmp func(a, b E) int) int {
return medianCmpFunc(data, a-1, a, a+1, swaps, cmp)
}
func reverseRangeCmpFunc[E any](data []E, a, b int, cmp func(a, b E) int) {
i := a
j := b - 1
for i < j {
data[i], data[j] = data[j], data[i]
i++
j--
}
}
func swapRangeCmpFunc[E any](data []E, a, b, n int, cmp func(a, b E) int) {
for i := 0; i < n; i++ {
data[a+i], data[b+i] = data[b+i], data[a+i]
}
}
func stableCmpFunc[E any](data []E, n int, cmp func(a, b E) int) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
for b <= n {
insertionSortCmpFunc(data, a, b, cmp)
a = b
b += blockSize
}
insertionSortCmpFunc(data, a, n, cmp)
for blockSize < n {
a, b = 0, 2*blockSize
for b <= n {
symMergeCmpFunc(data, a, a+blockSize, b, cmp)
a = b
b += 2 * blockSize
}
if m := a + blockSize; m < n {
symMergeCmpFunc(data, a, m, n, cmp)
}
blockSize *= 2
}
}
// symMergeCmpFunc merges the two sorted subsequences data[a:m] and data[m:b] using
// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
// Computer Science, pages 714-723. Springer, 2004.
//
// Let M = m-a and N = b-n. Wolog M < N.
// The recursion depth is bound by ceil(log(N+M)).
// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
//
// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
// in the paper carries through for Swap operations, especially as the block
// swapping rotate uses only O(M+N) Swaps.
//
// symMerge assumes non-degenerate arguments: a < m && m < b.
// Having the caller check this condition eliminates many leaf recursion calls,
// which improves performance.
func symMergeCmpFunc[E any](data []E, a, m, b int, cmp func(a, b E) int) {
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[a] into data[m:b]
// if data[a:m] only contains one element.
if m-a == 1 {
// Use binary search to find the lowest index i
// such that data[i] >= data[a] for m <= i < b.
// Exit the search loop with i == b in case no such index exists.
i := m
j := b
for i < j {
h := int(uint(i+j) >> 1)
if cmp(data[h], data[a]) < 0 {
i = h + 1
} else {
j = h
}
}
// Swap values until data[a] reaches the position before i.
for k := a; k < i-1; k++ {
data[k], data[k+1] = data[k+1], data[k]
}
return
}
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[m] into data[a:m]
// if data[m:b] only contains one element.
if b-m == 1 {
// Use binary search to find the lowest index i
// such that data[i] > data[m] for a <= i < m.
// Exit the search loop with i == m in case no such index exists.
i := a
j := m
for i < j {
h := int(uint(i+j) >> 1)
if !(cmp(data[m], data[h]) < 0) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[m] reaches the position i.
for k := m; k > i; k-- {
data[k], data[k-1] = data[k-1], data[k]
}
return
}
mid := int(uint(a+b) >> 1)
n := mid + m
var start, r int
if m > mid {
start = n - b
r = mid
} else {
start = a
r = m
}
p := n - 1
for start < r {
c := int(uint(start+r) >> 1)
if !(cmp(data[p-c], data[c]) < 0) {
start = c + 1
} else {
r = c
}
}
end := n - start
if start < m && m < end {
rotateCmpFunc(data, start, m, end, cmp)
}
if a < start && start < mid {
symMergeCmpFunc(data, a, start, mid, cmp)
}
if mid < end && end < b {
symMergeCmpFunc(data, mid, end, b, cmp)
}
}
// rotateCmpFunc rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
// Data of the form 'x u v y' is changed to 'x v u y'.
// rotate performs at most b-a many calls to data.Swap,
// and it assumes non-degenerate arguments: a < m && m < b.
func rotateCmpFunc[E any](data []E, a, m, b int, cmp func(a, b E) int) {
i := m - a
j := b - m
for i != j {
if i > j {
swapRangeCmpFunc(data, m-i, m, j, cmp)
i -= j
} else {
swapRangeCmpFunc(data, m-i, m+j-i, i, cmp)
j -= i
}
}
// i == j
swapRangeCmpFunc(data, m-i, m, i, cmp)
}

481
src/slices/zsortordered.go Normal file
View File

@ -0,0 +1,481 @@
// Code generated by gen_sort_variants.go; DO NOT EDIT.
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import "cmp"
// insertionSortOrdered sorts data[a:b] using insertion sort.
func insertionSortOrdered[E cmp.Ordered](data []E, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && cmp.Less(data[j], data[j-1]); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// siftDownOrdered implements the heap property on data[lo:hi].
// first is an offset into the array where the root of the heap lies.
func siftDownOrdered[E cmp.Ordered](data []E, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && cmp.Less(data[first+child], data[first+child+1]) {
child++
}
if !cmp.Less(data[first+root], data[first+child]) {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func heapSortOrdered[E cmp.Ordered](data []E, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDownOrdered(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDownOrdered(data, lo, i, first)
}
}
// pdqsortOrdered sorts data[a:b].
// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
// C++ implementation: https://github.com/orlp/pdqsort
// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
func pdqsortOrdered[E cmp.Ordered](data []E, a, b, limit int) {
const maxInsertion = 12
var (
wasBalanced = true // whether the last partitioning was reasonably balanced
wasPartitioned = true // whether the slice was already partitioned
)
for {
length := b - a
if length <= maxInsertion {
insertionSortOrdered(data, a, b)
return
}
// Fall back to heapsort if too many bad choices were made.
if limit == 0 {
heapSortOrdered(data, a, b)
return
}
// If the last partitioning was imbalanced, we need to breaking patterns.
if !wasBalanced {
breakPatternsOrdered(data, a, b)
limit--
}
pivot, hint := choosePivotOrdered(data, a, b)
if hint == decreasingHint {
reverseRangeOrdered(data, a, b)
// The chosen pivot was pivot-a elements after the start of the array.
// After reversing it is pivot-a elements before the end of the array.
// The idea came from Rust's implementation.
pivot = (b - 1) - (pivot - a)
hint = increasingHint
}
// The slice is likely already sorted.
if wasBalanced && wasPartitioned && hint == increasingHint {
if partialInsertionSortOrdered(data, a, b) {
return
}
}
// Probably the slice contains many duplicate elements, partition the slice into
// elements equal to and elements greater than the pivot.
if a > 0 && !cmp.Less(data[a-1], data[pivot]) {
mid := partitionEqualOrdered(data, a, b, pivot)
a = mid
continue
}
mid, alreadyPartitioned := partitionOrdered(data, a, b, pivot)
wasPartitioned = alreadyPartitioned
leftLen, rightLen := mid-a, b-mid
balanceThreshold := length / 8
if leftLen < rightLen {
wasBalanced = leftLen >= balanceThreshold
pdqsortOrdered(data, a, mid, limit)
a = mid + 1
} else {
wasBalanced = rightLen >= balanceThreshold
pdqsortOrdered(data, mid+1, b, limit)
b = mid
}
}
}
// partitionOrdered does one quicksort partition.
// Let p = data[pivot]
// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
// On return, data[newpivot] = p
func partitionOrdered[E cmp.Ordered](data []E, a, b, pivot int) (newpivot int, alreadyPartitioned bool) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for i <= j && cmp.Less(data[i], data[a]) {
i++
}
for i <= j && !cmp.Less(data[j], data[a]) {
j--
}
if i > j {
data[j], data[a] = data[a], data[j]
return j, true
}
data[i], data[j] = data[j], data[i]
i++
j--
for {
for i <= j && cmp.Less(data[i], data[a]) {
i++
}
for i <= j && !cmp.Less(data[j], data[a]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
data[j], data[a] = data[a], data[j]
return j, false
}
// partitionEqualOrdered partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
func partitionEqualOrdered[E cmp.Ordered](data []E, a, b, pivot int) (newpivot int) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for {
for i <= j && !cmp.Less(data[a], data[i]) {
i++
}
for i <= j && cmp.Less(data[a], data[j]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
return i
}
// partialInsertionSortOrdered partially sorts a slice, returns true if the slice is sorted at the end.
func partialInsertionSortOrdered[E cmp.Ordered](data []E, a, b int) bool {
const (
maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
shortestShifting = 50 // don't shift any elements on short arrays
)
i := a + 1
for j := 0; j < maxSteps; j++ {
for i < b && !cmp.Less(data[i], data[i-1]) {
i++
}
if i == b {
return true
}
if b-a < shortestShifting {
return false
}
data[i], data[i-1] = data[i-1], data[i]
// Shift the smaller one to the left.
if i-a >= 2 {
for j := i - 1; j >= 1; j-- {
if !cmp.Less(data[j], data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
// Shift the greater one to the right.
if b-i >= 2 {
for j := i + 1; j < b; j++ {
if !cmp.Less(data[j], data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
}
return false
}
// breakPatternsOrdered scatters some elements around in an attempt to break some patterns
// that might cause imbalanced partitions in quicksort.
func breakPatternsOrdered[E cmp.Ordered](data []E, a, b int) {
length := b - a
if length >= 8 {
random := xorshift(length)
modulus := nextPowerOfTwo(length)
for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
other := int(uint(random.Next()) & (modulus - 1))
if other >= length {
other -= length
}
data[idx], data[a+other] = data[a+other], data[idx]
}
}
}
// choosePivotOrdered chooses a pivot in data[a:b].
//
// [0,8): chooses a static pivot.
// [8,shortestNinther): uses the simple median-of-three method.
// [shortestNinther,∞): uses the Tukey ninther method.
func choosePivotOrdered[E cmp.Ordered](data []E, a, b int) (pivot int, hint sortedHint) {
const (
shortestNinther = 50
maxSwaps = 4 * 3
)
l := b - a
var (
swaps int
i = a + l/4*1
j = a + l/4*2
k = a + l/4*3
)
if l >= 8 {
if l >= shortestNinther {
// Tukey ninther method, the idea came from Rust's implementation.
i = medianAdjacentOrdered(data, i, &swaps)
j = medianAdjacentOrdered(data, j, &swaps)
k = medianAdjacentOrdered(data, k, &swaps)
}
// Find the median among i, j, k and stores it into j.
j = medianOrdered(data, i, j, k, &swaps)
}
switch swaps {
case 0:
return j, increasingHint
case maxSwaps:
return j, decreasingHint
default:
return j, unknownHint
}
}
// order2Ordered returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
func order2Ordered[E cmp.Ordered](data []E, a, b int, swaps *int) (int, int) {
if cmp.Less(data[b], data[a]) {
*swaps++
return b, a
}
return a, b
}
// medianOrdered returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
func medianOrdered[E cmp.Ordered](data []E, a, b, c int, swaps *int) int {
a, b = order2Ordered(data, a, b, swaps)
b, c = order2Ordered(data, b, c, swaps)
a, b = order2Ordered(data, a, b, swaps)
return b
}
// medianAdjacentOrdered finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
func medianAdjacentOrdered[E cmp.Ordered](data []E, a int, swaps *int) int {
return medianOrdered(data, a-1, a, a+1, swaps)
}
func reverseRangeOrdered[E cmp.Ordered](data []E, a, b int) {
i := a
j := b - 1
for i < j {
data[i], data[j] = data[j], data[i]
i++
j--
}
}
func swapRangeOrdered[E cmp.Ordered](data []E, a, b, n int) {
for i := 0; i < n; i++ {
data[a+i], data[b+i] = data[b+i], data[a+i]
}
}
func stableOrdered[E cmp.Ordered](data []E, n int) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
for b <= n {
insertionSortOrdered(data, a, b)
a = b
b += blockSize
}
insertionSortOrdered(data, a, n)
for blockSize < n {
a, b = 0, 2*blockSize
for b <= n {
symMergeOrdered(data, a, a+blockSize, b)
a = b
b += 2 * blockSize
}
if m := a + blockSize; m < n {
symMergeOrdered(data, a, m, n)
}
blockSize *= 2
}
}
// symMergeOrdered merges the two sorted subsequences data[a:m] and data[m:b] using
// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
// Computer Science, pages 714-723. Springer, 2004.
//
// Let M = m-a and N = b-n. Wolog M < N.
// The recursion depth is bound by ceil(log(N+M)).
// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
//
// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
// in the paper carries through for Swap operations, especially as the block
// swapping rotate uses only O(M+N) Swaps.
//
// symMerge assumes non-degenerate arguments: a < m && m < b.
// Having the caller check this condition eliminates many leaf recursion calls,
// which improves performance.
func symMergeOrdered[E cmp.Ordered](data []E, a, m, b int) {
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[a] into data[m:b]
// if data[a:m] only contains one element.
if m-a == 1 {
// Use binary search to find the lowest index i
// such that data[i] >= data[a] for m <= i < b.
// Exit the search loop with i == b in case no such index exists.
i := m
j := b
for i < j {
h := int(uint(i+j) >> 1)
if cmp.Less(data[h], data[a]) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[a] reaches the position before i.
for k := a; k < i-1; k++ {
data[k], data[k+1] = data[k+1], data[k]
}
return
}
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[m] into data[a:m]
// if data[m:b] only contains one element.
if b-m == 1 {
// Use binary search to find the lowest index i
// such that data[i] > data[m] for a <= i < m.
// Exit the search loop with i == m in case no such index exists.
i := a
j := m
for i < j {
h := int(uint(i+j) >> 1)
if !cmp.Less(data[m], data[h]) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[m] reaches the position i.
for k := m; k > i; k-- {
data[k], data[k-1] = data[k-1], data[k]
}
return
}
mid := int(uint(a+b) >> 1)
n := mid + m
var start, r int
if m > mid {
start = n - b
r = mid
} else {
start = a
r = m
}
p := n - 1
for start < r {
c := int(uint(start+r) >> 1)
if !cmp.Less(data[p-c], data[c]) {
start = c + 1
} else {
r = c
}
}
end := n - start
if start < m && m < end {
rotateOrdered(data, start, m, end)
}
if a < start && start < mid {
symMergeOrdered(data, a, start, mid)
}
if mid < end && end < b {
symMergeOrdered(data, mid, end, b)
}
}
// rotateOrdered rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
// Data of the form 'x u v y' is changed to 'x v u y'.
// rotate performs at most b-a many calls to data.Swap,
// and it assumes non-degenerate arguments: a < m && m < b.
func rotateOrdered[E cmp.Ordered](data []E, a, m, b int) {
i := m - a
j := b - m
for i != j {
if i > j {
swapRangeOrdered(data, m-i, m, j)
i -= j
} else {
swapRangeOrdered(data, m-i, m+j-i, i)
j -= i
}
}
// i == j
swapRangeOrdered(data, m-i, m, i)
}

View File

@ -77,15 +77,15 @@ func main() {
Name: "generic_ordered",
Path: "zsortordered.go",
Package: "slices",
Imports: "import \"constraints\"\n",
Imports: "import \"cmp\"\n",
FuncSuffix: "Ordered",
TypeParam: "[E constraints.Ordered]",
TypeParam: "[E cmp.Ordered]",
ExtraParam: "",
ExtraArg: "",
DataType: "[]E",
Funcs: template.FuncMap{
"Less": func(name, i, j string) string {
return fmt.Sprintf("(%s[%s] < %s[%s])", name, i, name, j)
return fmt.Sprintf("cmp.Less(%s[%s], %s[%s])", name, i, name, j)
},
"Swap": func(name, i, j string) string {
return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)
@ -97,14 +97,14 @@ func main() {
Name: "generic_func",
Path: "zsortanyfunc.go",
Package: "slices",
FuncSuffix: "LessFunc",
FuncSuffix: "CmpFunc",
TypeParam: "[E any]",
ExtraParam: ", less func(a, b E) bool",
ExtraArg: ", less",
ExtraParam: ", cmp func(a, b E) int",
ExtraArg: ", cmp",
DataType: "[]E",
Funcs: template.FuncMap{
"Less": func(name, i, j string) string {
return fmt.Sprintf("less(%s[%s], %s[%s])", name, i, name, j)
return fmt.Sprintf("(cmp(%s[%s], %s[%s]) < 0)", name, i, name, j)
},
"Swap": func(name, i, j string) string {
return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)