mirror of
https://github.com/golang/go
synced 2024-11-06 15:36:24 -07:00
72e77a7f41
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm. - In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices). The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf This CL is inspired by both C++ implementation and Rust implementation. - C++ implementation: https://github.com/orlp/pdqsort - Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ For #50154 name old time/op new time/op delta SearchWrappers-16 72.8ns ± 3% 75.1ns ± 2% +3.25% (p=0.000 n=20+10) SortString1K-16 85.2µs ± 3% 86.2µs ± 5% ~ (p=0.247 n=19+10) SortString1K_Slice-16 84.6µs ± 4% 86.1µs ± 4% ~ (p=0.120 n=20+10) StableString1K-16 112µs ± 5% 112µs ± 5% ~ (p=0.604 n=19+10) SortInt1K-16 44.8µs ± 3% 43.2µs ± 2% -3.68% (p=0.000 n=20+10) SortInt1K_Sorted-16 28.2µs ± 3% 3.3µs ± 3% -88.16% (p=0.000 n=19+10) SortInt1K_Reversed-16 29.4µs ± 3% 4.8µs ± 2% -83.59% (p=0.000 n=20+10) SortInt1K_Mod8-16 25.1µs ± 2% 20.0µs ± 2% -20.35% (p=0.000 n=18+10) StableInt1K-16 51.3µs ± 3% 50.9µs ± 2% ~ (p=0.562 n=20+9) StableInt1K_Slice-16 49.5µs ± 2% 50.7µs ± 4% +2.55% (p=0.009 n=19+10) SortInt64K-16 4.73ms ± 3% 4.49ms ± 4% -5.08% (p=0.000 n=20+10) SortInt64K_Slice-16 4.51ms ± 3% 4.35ms ± 1% -3.42% (p=0.000 n=20+8) StableInt64K-16 4.85ms ± 2% 4.82ms ± 2% ~ (p=0.267 n=20+10) Sort1e2-16 27.9µs ± 1% 28.1µs ± 2% ~ (p=0.198 n=20+10) Stable1e2-16 56.6µs ± 2% 55.0µs ± 2% -2.88% (p=0.000 n=20+10) Sort1e4-16 5.51ms ± 1% 5.36ms ± 1% -2.58% (p=0.000 n=19+9) Stable1e4-16 17.8ms ± 1% 17.3ms ± 1% -2.40% (p=0.000 n=20+10) Sort1e6-16 833ms ± 1% 807ms ± 1% -3.02% (p=0.000 n=20+10) Stable1e6-16 3.49s ± 2% 3.44s ± 1% -1.41% (p=0.001 n=20+10) Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a Reviewed-on: https://go-review.googlesource.com/c/go/+/371574 Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com> Run-TryBot: Ian Lance Taylor <iant@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Eli Bendersky <eliben@google.com>
263 lines
9.4 KiB
Go
263 lines
9.4 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//go:generate go run gen_sort_variants.go
|
|
|
|
// Package sort provides primitives for sorting slices and user-defined collections.
|
|
package sort
|
|
|
|
import "math/bits"
|
|
|
|
// An implementation of Interface can be sorted by the routines in this package.
|
|
// The methods refer to elements of the underlying collection by integer index.
|
|
type Interface interface {
|
|
// Len is the number of elements in the collection.
|
|
Len() int
|
|
|
|
// Less reports whether the element with index i
|
|
// must sort before the element with index j.
|
|
//
|
|
// If both Less(i, j) and Less(j, i) are false,
|
|
// then the elements at index i and j are considered equal.
|
|
// Sort may place equal elements in any order in the final result,
|
|
// while Stable preserves the original input order of equal elements.
|
|
//
|
|
// Less must describe a transitive ordering:
|
|
// - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
|
|
// - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
|
|
//
|
|
// Note that floating-point comparison (the < operator on float32 or float64 values)
|
|
// is not a transitive ordering when not-a-number (NaN) values are involved.
|
|
// See Float64Slice.Less for a correct implementation for floating-point values.
|
|
Less(i, j int) bool
|
|
|
|
// Swap swaps the elements with indexes i and j.
|
|
Swap(i, j int)
|
|
}
|
|
|
|
// Sort sorts data in ascending order as determined by the Less method.
|
|
// It makes one call to data.Len to determine n and O(n*log(n)) calls to
|
|
// data.Less and data.Swap. The sort is not guaranteed to be stable.
|
|
func Sort(data Interface) {
|
|
n := data.Len()
|
|
if n <= 1 {
|
|
return
|
|
}
|
|
limit := bits.Len(uint(n))
|
|
pdqsort(data, 0, n, limit)
|
|
}
|
|
|
|
type sortedHint int // hint for pdqsort when choosing the pivot
|
|
|
|
const (
|
|
unknownHint sortedHint = iota
|
|
increasingHint
|
|
decreasingHint
|
|
)
|
|
|
|
// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
|
|
type xorshift uint64
|
|
|
|
func (r *xorshift) Next() uint64 {
|
|
*r ^= *r << 13
|
|
*r ^= *r >> 17
|
|
*r ^= *r << 5
|
|
return uint64(*r)
|
|
}
|
|
|
|
func nextPowerOfTwo(length int) uint {
|
|
shift := uint(bits.Len(uint(length)))
|
|
return uint(1 << shift)
|
|
}
|
|
|
|
// lessSwap is a pair of Less and Swap function for use with the
|
|
// auto-generated func-optimized variant of sort.go in
|
|
// zfuncversion.go.
|
|
type lessSwap struct {
|
|
Less func(i, j int) bool
|
|
Swap func(i, j int)
|
|
}
|
|
|
|
type reverse struct {
|
|
// This embedded Interface permits Reverse to use the methods of
|
|
// another Interface implementation.
|
|
Interface
|
|
}
|
|
|
|
// Less returns the opposite of the embedded implementation's Less method.
|
|
func (r reverse) Less(i, j int) bool {
|
|
return r.Interface.Less(j, i)
|
|
}
|
|
|
|
// Reverse returns the reverse order for data.
|
|
func Reverse(data Interface) Interface {
|
|
return &reverse{data}
|
|
}
|
|
|
|
// IsSorted reports whether data is sorted.
|
|
func IsSorted(data Interface) bool {
|
|
n := data.Len()
|
|
for i := n - 1; i > 0; i-- {
|
|
if data.Less(i, i-1) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Convenience types for common cases
|
|
|
|
// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
|
|
type IntSlice []int
|
|
|
|
func (x IntSlice) Len() int { return len(x) }
|
|
func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
|
|
func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
|
|
// Sort is a convenience method: x.Sort() calls Sort(x).
|
|
func (x IntSlice) Sort() { Sort(x) }
|
|
|
|
// Float64Slice implements Interface for a []float64, sorting in increasing order,
|
|
// with not-a-number (NaN) values ordered before other values.
|
|
type Float64Slice []float64
|
|
|
|
func (x Float64Slice) Len() int { return len(x) }
|
|
|
|
// Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
|
|
// Note that floating-point comparison by itself is not a transitive relation: it does not
|
|
// report a consistent ordering for not-a-number (NaN) values.
|
|
// This implementation of Less places NaN values before any others, by using:
|
|
//
|
|
// x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
|
|
func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
|
|
func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
|
|
// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
|
|
func isNaN(f float64) bool {
|
|
return f != f
|
|
}
|
|
|
|
// Sort is a convenience method: x.Sort() calls Sort(x).
|
|
func (x Float64Slice) Sort() { Sort(x) }
|
|
|
|
// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
|
|
type StringSlice []string
|
|
|
|
func (x StringSlice) Len() int { return len(x) }
|
|
func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
|
|
func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
|
|
// Sort is a convenience method: x.Sort() calls Sort(x).
|
|
func (x StringSlice) Sort() { Sort(x) }
|
|
|
|
// Convenience wrappers for common cases
|
|
|
|
// Ints sorts a slice of ints in increasing order.
|
|
func Ints(x []int) { Sort(IntSlice(x)) }
|
|
|
|
// Float64s sorts a slice of float64s in increasing order.
|
|
// Not-a-number (NaN) values are ordered before other values.
|
|
func Float64s(x []float64) { Sort(Float64Slice(x)) }
|
|
|
|
// Strings sorts a slice of strings in increasing order.
|
|
func Strings(x []string) { Sort(StringSlice(x)) }
|
|
|
|
// IntsAreSorted reports whether the slice x is sorted in increasing order.
|
|
func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) }
|
|
|
|
// Float64sAreSorted reports whether the slice x is sorted in increasing order,
|
|
// with not-a-number (NaN) values before any other values.
|
|
func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) }
|
|
|
|
// StringsAreSorted reports whether the slice x is sorted in increasing order.
|
|
func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) }
|
|
|
|
// Notes on stable sorting:
|
|
// The used algorithms are simple and provable correct on all input and use
|
|
// only logarithmic additional stack space. They perform well if compared
|
|
// experimentally to other stable in-place sorting algorithms.
|
|
//
|
|
// Remarks on other algorithms evaluated:
|
|
// - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
|
|
// Not faster.
|
|
// - GCC's __rotate for block rotations: Not faster.
|
|
// - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen
|
|
// and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
|
|
// The given algorithms are in-place, number of Swap and Assignments
|
|
// grow as n log n but the algorithm is not stable.
|
|
// - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
|
|
// V. Raman in Algorithmica (1996) 16, 115-160:
|
|
// This algorithm either needs additional 2n bits or works only if there
|
|
// are enough different elements available to encode some permutations
|
|
// which have to be undone later (so not stable on any input).
|
|
// - All the optimal in-place sorting/merging algorithms I found are either
|
|
// unstable or rely on enough different elements in each step to encode the
|
|
// performed block rearrangements. See also "In-Place Merging Algorithms",
|
|
// Denham Coates-Evely, Department of Computer Science, Kings College,
|
|
// January 2004 and the references in there.
|
|
// - Often "optimal" algorithms are optimal in the number of assignments
|
|
// but Interface has only Swap as operation.
|
|
|
|
// Stable sorts data in ascending order as determined by the Less method,
|
|
// while keeping the original order of equal elements.
|
|
//
|
|
// It makes one call to data.Len to determine n, O(n*log(n)) calls to
|
|
// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
|
|
func Stable(data Interface) {
|
|
stable(data, data.Len())
|
|
}
|
|
|
|
/*
|
|
Complexity of Stable Sorting
|
|
|
|
|
|
Complexity of block swapping rotation
|
|
|
|
Each Swap puts one new element into its correct, final position.
|
|
Elements which reach their final position are no longer moved.
|
|
Thus block swapping rotation needs |u|+|v| calls to Swaps.
|
|
This is best possible as each element might need a move.
|
|
|
|
Pay attention when comparing to other optimal algorithms which
|
|
typically count the number of assignments instead of swaps:
|
|
E.g. the optimal algorithm of Dudzinski and Dydek for in-place
|
|
rotations uses O(u + v + gcd(u,v)) assignments which is
|
|
better than our O(3 * (u+v)) as gcd(u,v) <= u.
|
|
|
|
|
|
Stable sorting by SymMerge and BlockSwap rotations
|
|
|
|
SymMerg complexity for same size input M = N:
|
|
Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N)
|
|
Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
|
|
|
|
(The following argument does not fuzz over a missing -1 or
|
|
other stuff which does not impact the final result).
|
|
|
|
Let n = data.Len(). Assume n = 2^k.
|
|
|
|
Plain merge sort performs log(n) = k iterations.
|
|
On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
|
|
|
|
Thus iteration i of merge sort performs:
|
|
Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
|
|
Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
|
|
|
|
In total k = log(n) iterations are performed; so in total:
|
|
Calls to Less O(log(n) * n)
|
|
Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
|
|
= O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
|
|
|
|
|
|
Above results should generalize to arbitrary n = 2^k + p
|
|
and should not be influenced by the initial insertion sort phase:
|
|
Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
|
|
size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort.
|
|
Merge sort iterations start at i = log(bs). With t = log(bs) constant:
|
|
Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
|
|
= O(n * log(n))
|
|
Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
|
|
|
|
*/
|