1
0
mirror of https://github.com/golang/go synced 2024-11-19 14:34:42 -07:00
go/src/runtime/mgclarge.go

325 lines
8.8 KiB
Go
Raw Normal View History

runtime: redo insert/remove of large spans Currently for spans with up to 1 MBytes (128 pages) we maintain an array indexed by the number of pages in the span. This is efficient both in terms of space as well as time to insert or remove a span of a particular size. Unfortunately for spans larger than 1 MByte we currently place them on a separate linked list. This results in O(n) behavior. Now that we are seeing heaps approaching 100 GBytes n is large enough to be noticed in real programs. This change replaces the linked list now used with a balanced binary tree structure called a treap. A treap is a probabilistically balanced tree offering O(logN) behavior for inserting and removing spans. To verify that this approach will work we start with noting that only spans with sizes > 1MByte will be put into the treap. This means that to support 1 TByte a treap will need at most 1 million nodes and can ideally be held in a treap with a depth of 20. Experiments with adding and removing randomly sized spans from the treap seem to result in treaps with depths of about twice the ideal or 40. A petabyte would require a tree of only twice again that depth again so this algorithm should last well into the future. Fixes #19393 Go1 benchmarks indicate this is basically an overall wash. Tue Mar 28 21:29:21 EDT 2017 name old time/op new time/op delta BinaryTree17-4 2.42s ± 1% 2.42s ± 1% ~ (p=0.980 n=21+21) Fannkuch11-4 3.00s ± 1% 3.18s ± 4% +6.10% (p=0.000 n=22+24) FmtFprintfEmpty-4 40.5ns ± 1% 40.3ns ± 3% ~ (p=0.692 n=22+25) FmtFprintfString-4 65.9ns ± 3% 64.6ns ± 1% -1.98% (p=0.000 n=24+23) FmtFprintfInt-4 69.6ns ± 1% 68.0ns ± 7% -2.30% (p=0.001 n=21+22) FmtFprintfIntInt-4 102ns ± 2% 99ns ± 1% -3.07% (p=0.000 n=23+23) FmtFprintfPrefixedInt-4 126ns ± 0% 125ns ± 0% -0.79% (p=0.000 n=19+17) FmtFprintfFloat-4 206ns ± 2% 205ns ± 1% ~ (p=0.671 n=23+21) FmtManyArgs-4 441ns ± 1% 445ns ± 1% +0.88% (p=0.000 n=22+23) GobDecode-4 5.73ms ± 1% 5.86ms ± 1% +2.37% (p=0.000 n=23+22) GobEncode-4 4.51ms ± 1% 4.89ms ± 1% +8.32% (p=0.000 n=22+22) Gzip-4 197ms ± 0% 202ms ± 1% +2.75% (p=0.000 n=23+24) Gunzip-4 32.9ms ± 8% 32.7ms ± 2% ~ (p=0.466 n=23+24) HTTPClientServer-4 57.3µs ± 1% 56.7µs ± 1% -0.94% (p=0.000 n=21+22) JSONEncode-4 13.8ms ± 1% 13.9ms ± 2% +1.14% (p=0.000 n=22+23) JSONDecode-4 47.4ms ± 1% 48.1ms ± 1% +1.49% (p=0.000 n=23+23) Mandelbrot200-4 3.92ms ± 0% 3.92ms ± 1% +0.21% (p=0.000 n=22+22) GoParse-4 2.89ms ± 1% 2.87ms ± 1% -0.68% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 73.6ns ± 1% 72.0ns ± 2% -2.15% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 173ns ± 1% 173ns ± 1% ~ (p=0.847 n=22+24) RegexpMatchEasy1_32-4 71.9ns ± 1% 69.8ns ± 1% -2.99% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 314ns ± 1% 308ns ± 1% -1.91% (p=0.000 n=22+23) RegexpMatchMedium_32-4 106ns ± 0% 105ns ± 1% -0.58% (p=0.000 n=19+21) RegexpMatchMedium_1K-4 34.3µs ± 1% 34.3µs ± 1% ~ (p=0.871 n=23+22) RegexpMatchHard_32-4 1.67µs ± 1% 1.67µs ± 7% ~ (p=0.224 n=22+23) RegexpMatchHard_1K-4 51.5µs ± 1% 50.4µs ± 1% -1.99% (p=0.000 n=22+23) Revcomp-4 383ms ± 1% 415ms ± 0% +8.51% (p=0.000 n=22+22) Template-4 51.5ms ± 1% 51.5ms ± 1% ~ (p=0.555 n=20+23) TimeParse-4 279ns ± 2% 277ns ± 1% -0.95% (p=0.000 n=24+22) TimeFormat-4 294ns ± 1% 296ns ± 1% +0.58% (p=0.003 n=24+23) [Geo mean] 43.7µs 43.8µs +0.32% name old speed new speed delta GobDecode-4 134MB/s ± 1% 131MB/s ± 1% -2.32% (p=0.000 n=23+22) GobEncode-4 170MB/s ± 1% 157MB/s ± 1% -7.68% (p=0.000 n=22+22) Gzip-4 98.7MB/s ± 0% 96.1MB/s ± 1% -2.68% (p=0.000 n=23+24) Gunzip-4 590MB/s ± 7% 593MB/s ± 2% ~ (p=0.466 n=23+24) JSONEncode-4 141MB/s ± 1% 139MB/s ± 2% -1.13% (p=0.000 n=22+23) JSONDecode-4 40.9MB/s ± 1% 40.3MB/s ± 0% -1.47% (p=0.000 n=23+23) GoParse-4 20.1MB/s ± 1% 20.2MB/s ± 1% +0.69% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 435MB/s ± 1% 444MB/s ± 2% +2.21% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 5.89GB/s ± 1% 5.89GB/s ± 1% ~ (p=0.439 n=22+24) RegexpMatchEasy1_32-4 445MB/s ± 1% 459MB/s ± 1% +3.06% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 3.26GB/s ± 1% 3.32GB/s ± 1% +1.97% (p=0.000 n=22+23) RegexpMatchMedium_32-4 9.40MB/s ± 1% 9.44MB/s ± 1% +0.43% (p=0.000 n=23+21) RegexpMatchMedium_1K-4 29.8MB/s ± 1% 29.8MB/s ± 1% ~ (p=0.826 n=23+22) RegexpMatchHard_32-4 19.1MB/s ± 1% 19.1MB/s ± 7% ~ (p=0.233 n=22+23) RegexpMatchHard_1K-4 19.9MB/s ± 1% 20.3MB/s ± 1% +2.03% (p=0.000 n=22+23) Revcomp-4 664MB/s ± 1% 612MB/s ± 0% -7.85% (p=0.000 n=22+22) Template-4 37.6MB/s ± 1% 37.7MB/s ± 1% ~ (p=0.558 n=20+23) [Geo mean] 134MB/s 133MB/s -0.76% Tue Mar 28 22:16:54 EDT 2017 Change-Id: I4a4f5c2b53d3fb85ef76c98522d3ed5cf8ae5b7e Reviewed-on: https://go-review.googlesource.com/38732 Reviewed-by: Russ Cox <rsc@golang.org>
2017-03-27 12:20:35 -06:00
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Page heap.
//
// See malloc.go for the general overview.
//
// Large spans are the subject of this file. Spans consisting of less than
// _MaxMHeapLists are held in lists of like sized spans. Larger spans
// are held in a treap. See https://en.wikipedia.org/wiki/Treap or
// http://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
// sema.go also holds an implementation of a treap.
//
// Each treapNode holds a single span. The treap is sorted by page size
// and for spans of the same size a secondary sort based on start address
// is done.
// Spans are returned based on a best fit algorithm and for spans of the same
// size the one at the lowest address is selected.
//
// The primary routines are
// insert: adds a span to the treap
// remove: removes the span from that treap that best fits the required size
// removeSpan: which removes a specific span from the treap
//
// _mheap.lock must be held when manipulating this data structure.
runtime: redo insert/remove of large spans Currently for spans with up to 1 MBytes (128 pages) we maintain an array indexed by the number of pages in the span. This is efficient both in terms of space as well as time to insert or remove a span of a particular size. Unfortunately for spans larger than 1 MByte we currently place them on a separate linked list. This results in O(n) behavior. Now that we are seeing heaps approaching 100 GBytes n is large enough to be noticed in real programs. This change replaces the linked list now used with a balanced binary tree structure called a treap. A treap is a probabilistically balanced tree offering O(logN) behavior for inserting and removing spans. To verify that this approach will work we start with noting that only spans with sizes > 1MByte will be put into the treap. This means that to support 1 TByte a treap will need at most 1 million nodes and can ideally be held in a treap with a depth of 20. Experiments with adding and removing randomly sized spans from the treap seem to result in treaps with depths of about twice the ideal or 40. A petabyte would require a tree of only twice again that depth again so this algorithm should last well into the future. Fixes #19393 Go1 benchmarks indicate this is basically an overall wash. Tue Mar 28 21:29:21 EDT 2017 name old time/op new time/op delta BinaryTree17-4 2.42s ± 1% 2.42s ± 1% ~ (p=0.980 n=21+21) Fannkuch11-4 3.00s ± 1% 3.18s ± 4% +6.10% (p=0.000 n=22+24) FmtFprintfEmpty-4 40.5ns ± 1% 40.3ns ± 3% ~ (p=0.692 n=22+25) FmtFprintfString-4 65.9ns ± 3% 64.6ns ± 1% -1.98% (p=0.000 n=24+23) FmtFprintfInt-4 69.6ns ± 1% 68.0ns ± 7% -2.30% (p=0.001 n=21+22) FmtFprintfIntInt-4 102ns ± 2% 99ns ± 1% -3.07% (p=0.000 n=23+23) FmtFprintfPrefixedInt-4 126ns ± 0% 125ns ± 0% -0.79% (p=0.000 n=19+17) FmtFprintfFloat-4 206ns ± 2% 205ns ± 1% ~ (p=0.671 n=23+21) FmtManyArgs-4 441ns ± 1% 445ns ± 1% +0.88% (p=0.000 n=22+23) GobDecode-4 5.73ms ± 1% 5.86ms ± 1% +2.37% (p=0.000 n=23+22) GobEncode-4 4.51ms ± 1% 4.89ms ± 1% +8.32% (p=0.000 n=22+22) Gzip-4 197ms ± 0% 202ms ± 1% +2.75% (p=0.000 n=23+24) Gunzip-4 32.9ms ± 8% 32.7ms ± 2% ~ (p=0.466 n=23+24) HTTPClientServer-4 57.3µs ± 1% 56.7µs ± 1% -0.94% (p=0.000 n=21+22) JSONEncode-4 13.8ms ± 1% 13.9ms ± 2% +1.14% (p=0.000 n=22+23) JSONDecode-4 47.4ms ± 1% 48.1ms ± 1% +1.49% (p=0.000 n=23+23) Mandelbrot200-4 3.92ms ± 0% 3.92ms ± 1% +0.21% (p=0.000 n=22+22) GoParse-4 2.89ms ± 1% 2.87ms ± 1% -0.68% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 73.6ns ± 1% 72.0ns ± 2% -2.15% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 173ns ± 1% 173ns ± 1% ~ (p=0.847 n=22+24) RegexpMatchEasy1_32-4 71.9ns ± 1% 69.8ns ± 1% -2.99% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 314ns ± 1% 308ns ± 1% -1.91% (p=0.000 n=22+23) RegexpMatchMedium_32-4 106ns ± 0% 105ns ± 1% -0.58% (p=0.000 n=19+21) RegexpMatchMedium_1K-4 34.3µs ± 1% 34.3µs ± 1% ~ (p=0.871 n=23+22) RegexpMatchHard_32-4 1.67µs ± 1% 1.67µs ± 7% ~ (p=0.224 n=22+23) RegexpMatchHard_1K-4 51.5µs ± 1% 50.4µs ± 1% -1.99% (p=0.000 n=22+23) Revcomp-4 383ms ± 1% 415ms ± 0% +8.51% (p=0.000 n=22+22) Template-4 51.5ms ± 1% 51.5ms ± 1% ~ (p=0.555 n=20+23) TimeParse-4 279ns ± 2% 277ns ± 1% -0.95% (p=0.000 n=24+22) TimeFormat-4 294ns ± 1% 296ns ± 1% +0.58% (p=0.003 n=24+23) [Geo mean] 43.7µs 43.8µs +0.32% name old speed new speed delta GobDecode-4 134MB/s ± 1% 131MB/s ± 1% -2.32% (p=0.000 n=23+22) GobEncode-4 170MB/s ± 1% 157MB/s ± 1% -7.68% (p=0.000 n=22+22) Gzip-4 98.7MB/s ± 0% 96.1MB/s ± 1% -2.68% (p=0.000 n=23+24) Gunzip-4 590MB/s ± 7% 593MB/s ± 2% ~ (p=0.466 n=23+24) JSONEncode-4 141MB/s ± 1% 139MB/s ± 2% -1.13% (p=0.000 n=22+23) JSONDecode-4 40.9MB/s ± 1% 40.3MB/s ± 0% -1.47% (p=0.000 n=23+23) GoParse-4 20.1MB/s ± 1% 20.2MB/s ± 1% +0.69% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 435MB/s ± 1% 444MB/s ± 2% +2.21% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 5.89GB/s ± 1% 5.89GB/s ± 1% ~ (p=0.439 n=22+24) RegexpMatchEasy1_32-4 445MB/s ± 1% 459MB/s ± 1% +3.06% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 3.26GB/s ± 1% 3.32GB/s ± 1% +1.97% (p=0.000 n=22+23) RegexpMatchMedium_32-4 9.40MB/s ± 1% 9.44MB/s ± 1% +0.43% (p=0.000 n=23+21) RegexpMatchMedium_1K-4 29.8MB/s ± 1% 29.8MB/s ± 1% ~ (p=0.826 n=23+22) RegexpMatchHard_32-4 19.1MB/s ± 1% 19.1MB/s ± 7% ~ (p=0.233 n=22+23) RegexpMatchHard_1K-4 19.9MB/s ± 1% 20.3MB/s ± 1% +2.03% (p=0.000 n=22+23) Revcomp-4 664MB/s ± 1% 612MB/s ± 0% -7.85% (p=0.000 n=22+22) Template-4 37.6MB/s ± 1% 37.7MB/s ± 1% ~ (p=0.558 n=20+23) [Geo mean] 134MB/s 133MB/s -0.76% Tue Mar 28 22:16:54 EDT 2017 Change-Id: I4a4f5c2b53d3fb85ef76c98522d3ed5cf8ae5b7e Reviewed-on: https://go-review.googlesource.com/38732 Reviewed-by: Russ Cox <rsc@golang.org>
2017-03-27 12:20:35 -06:00
package runtime
import (
"unsafe"
)
//go:notinheap
type mTreap struct {
treap *treapNode
}
//go:notinheap
type treapNode struct {
right *treapNode // all treapNodes > this treap node
left *treapNode // all treapNodes < this treap node
parent *treapNode // direct parent of this node, nil if root
npagesKey uintptr // number of pages in spanKey, used as primary sort key
spanKey *mspan // span of size npagesKey, used as secondary sort key
priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced
runtime: redo insert/remove of large spans Currently for spans with up to 1 MBytes (128 pages) we maintain an array indexed by the number of pages in the span. This is efficient both in terms of space as well as time to insert or remove a span of a particular size. Unfortunately for spans larger than 1 MByte we currently place them on a separate linked list. This results in O(n) behavior. Now that we are seeing heaps approaching 100 GBytes n is large enough to be noticed in real programs. This change replaces the linked list now used with a balanced binary tree structure called a treap. A treap is a probabilistically balanced tree offering O(logN) behavior for inserting and removing spans. To verify that this approach will work we start with noting that only spans with sizes > 1MByte will be put into the treap. This means that to support 1 TByte a treap will need at most 1 million nodes and can ideally be held in a treap with a depth of 20. Experiments with adding and removing randomly sized spans from the treap seem to result in treaps with depths of about twice the ideal or 40. A petabyte would require a tree of only twice again that depth again so this algorithm should last well into the future. Fixes #19393 Go1 benchmarks indicate this is basically an overall wash. Tue Mar 28 21:29:21 EDT 2017 name old time/op new time/op delta BinaryTree17-4 2.42s ± 1% 2.42s ± 1% ~ (p=0.980 n=21+21) Fannkuch11-4 3.00s ± 1% 3.18s ± 4% +6.10% (p=0.000 n=22+24) FmtFprintfEmpty-4 40.5ns ± 1% 40.3ns ± 3% ~ (p=0.692 n=22+25) FmtFprintfString-4 65.9ns ± 3% 64.6ns ± 1% -1.98% (p=0.000 n=24+23) FmtFprintfInt-4 69.6ns ± 1% 68.0ns ± 7% -2.30% (p=0.001 n=21+22) FmtFprintfIntInt-4 102ns ± 2% 99ns ± 1% -3.07% (p=0.000 n=23+23) FmtFprintfPrefixedInt-4 126ns ± 0% 125ns ± 0% -0.79% (p=0.000 n=19+17) FmtFprintfFloat-4 206ns ± 2% 205ns ± 1% ~ (p=0.671 n=23+21) FmtManyArgs-4 441ns ± 1% 445ns ± 1% +0.88% (p=0.000 n=22+23) GobDecode-4 5.73ms ± 1% 5.86ms ± 1% +2.37% (p=0.000 n=23+22) GobEncode-4 4.51ms ± 1% 4.89ms ± 1% +8.32% (p=0.000 n=22+22) Gzip-4 197ms ± 0% 202ms ± 1% +2.75% (p=0.000 n=23+24) Gunzip-4 32.9ms ± 8% 32.7ms ± 2% ~ (p=0.466 n=23+24) HTTPClientServer-4 57.3µs ± 1% 56.7µs ± 1% -0.94% (p=0.000 n=21+22) JSONEncode-4 13.8ms ± 1% 13.9ms ± 2% +1.14% (p=0.000 n=22+23) JSONDecode-4 47.4ms ± 1% 48.1ms ± 1% +1.49% (p=0.000 n=23+23) Mandelbrot200-4 3.92ms ± 0% 3.92ms ± 1% +0.21% (p=0.000 n=22+22) GoParse-4 2.89ms ± 1% 2.87ms ± 1% -0.68% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 73.6ns ± 1% 72.0ns ± 2% -2.15% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 173ns ± 1% 173ns ± 1% ~ (p=0.847 n=22+24) RegexpMatchEasy1_32-4 71.9ns ± 1% 69.8ns ± 1% -2.99% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 314ns ± 1% 308ns ± 1% -1.91% (p=0.000 n=22+23) RegexpMatchMedium_32-4 106ns ± 0% 105ns ± 1% -0.58% (p=0.000 n=19+21) RegexpMatchMedium_1K-4 34.3µs ± 1% 34.3µs ± 1% ~ (p=0.871 n=23+22) RegexpMatchHard_32-4 1.67µs ± 1% 1.67µs ± 7% ~ (p=0.224 n=22+23) RegexpMatchHard_1K-4 51.5µs ± 1% 50.4µs ± 1% -1.99% (p=0.000 n=22+23) Revcomp-4 383ms ± 1% 415ms ± 0% +8.51% (p=0.000 n=22+22) Template-4 51.5ms ± 1% 51.5ms ± 1% ~ (p=0.555 n=20+23) TimeParse-4 279ns ± 2% 277ns ± 1% -0.95% (p=0.000 n=24+22) TimeFormat-4 294ns ± 1% 296ns ± 1% +0.58% (p=0.003 n=24+23) [Geo mean] 43.7µs 43.8µs +0.32% name old speed new speed delta GobDecode-4 134MB/s ± 1% 131MB/s ± 1% -2.32% (p=0.000 n=23+22) GobEncode-4 170MB/s ± 1% 157MB/s ± 1% -7.68% (p=0.000 n=22+22) Gzip-4 98.7MB/s ± 0% 96.1MB/s ± 1% -2.68% (p=0.000 n=23+24) Gunzip-4 590MB/s ± 7% 593MB/s ± 2% ~ (p=0.466 n=23+24) JSONEncode-4 141MB/s ± 1% 139MB/s ± 2% -1.13% (p=0.000 n=22+23) JSONDecode-4 40.9MB/s ± 1% 40.3MB/s ± 0% -1.47% (p=0.000 n=23+23) GoParse-4 20.1MB/s ± 1% 20.2MB/s ± 1% +0.69% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 435MB/s ± 1% 444MB/s ± 2% +2.21% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 5.89GB/s ± 1% 5.89GB/s ± 1% ~ (p=0.439 n=22+24) RegexpMatchEasy1_32-4 445MB/s ± 1% 459MB/s ± 1% +3.06% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 3.26GB/s ± 1% 3.32GB/s ± 1% +1.97% (p=0.000 n=22+23) RegexpMatchMedium_32-4 9.40MB/s ± 1% 9.44MB/s ± 1% +0.43% (p=0.000 n=23+21) RegexpMatchMedium_1K-4 29.8MB/s ± 1% 29.8MB/s ± 1% ~ (p=0.826 n=23+22) RegexpMatchHard_32-4 19.1MB/s ± 1% 19.1MB/s ± 7% ~ (p=0.233 n=22+23) RegexpMatchHard_1K-4 19.9MB/s ± 1% 20.3MB/s ± 1% +2.03% (p=0.000 n=22+23) Revcomp-4 664MB/s ± 1% 612MB/s ± 0% -7.85% (p=0.000 n=22+22) Template-4 37.6MB/s ± 1% 37.7MB/s ± 1% ~ (p=0.558 n=20+23) [Geo mean] 134MB/s 133MB/s -0.76% Tue Mar 28 22:16:54 EDT 2017 Change-Id: I4a4f5c2b53d3fb85ef76c98522d3ed5cf8ae5b7e Reviewed-on: https://go-review.googlesource.com/38732 Reviewed-by: Russ Cox <rsc@golang.org>
2017-03-27 12:20:35 -06:00
}
func (t *treapNode) init() {
t.right = nil
t.left = nil
t.parent = nil
t.spanKey = nil
t.npagesKey = 0
t.priority = 0
}
// isSpanInTreap is handy for debugging. One should hold the heap lock, usually
// mheap_.lock().
func (t *treapNode) isSpanInTreap(s *mspan) bool {
if t == nil {
return false
}
return t.spanKey == s || t.left.isSpanInTreap(s) || t.right.isSpanInTreap(s)
}
// walkTreap is handy for debugging.
// Starting at some treapnode t, for example the root, do a depth first preorder walk of
// the tree executing fn at each treap node. One should hold the heap lock, usually
// mheap_.lock().
func (t *treapNode) walkTreap(fn func(tn *treapNode)) {
if t == nil {
return
}
fn(t)
t.left.walkTreap(fn)
t.right.walkTreap(fn)
}
// checkTreapNode when used in conjunction with walkTreap can usually detect a
// poorly formed treap.
func checkTreapNode(t *treapNode) {
// lessThan is used to order the treap.
// npagesKey and npages are the primary keys.
// spanKey and span are the secondary keys.
// span == nil (0) will always be lessThan all
// spans of the same size.
lessThan := func(npages uintptr, s *mspan) bool {
if t.npagesKey != npages {
return t.npagesKey < npages
}
// t.npagesKey == npages
return uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(s))
}
if t == nil {
return
}
if t.spanKey.npages != t.npagesKey || t.spanKey.next != nil {
println("runtime: checkTreapNode treapNode t=", t, " t.npagesKey=", t.npagesKey,
"t.spanKey.npages=", t.spanKey.npages)
throw("why does span.npages and treap.ngagesKey do not match?")
}
if t.left != nil && lessThan(t.left.npagesKey, t.left.spanKey) {
throw("t.lessThan(t.left.npagesKey, t.left.spanKey) is not false")
}
if t.right != nil && !lessThan(t.right.npagesKey, t.right.spanKey) {
throw("!t.lessThan(t.left.npagesKey, t.left.spanKey) is not false")
}
}
// insert adds span to the large span treap.
func (root *mTreap) insert(span *mspan) {
npages := span.npages
var last *treapNode
pt := &root.treap
for t := *pt; t != nil; t = *pt {
last = t
if t.npagesKey < npages {
pt = &t.right
} else if t.npagesKey > npages {
pt = &t.left
} else if uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(span)) {
// t.npagesKey == npages, so sort on span addresses.
pt = &t.right
} else if uintptr(unsafe.Pointer(t.spanKey)) > uintptr(unsafe.Pointer(span)) {
pt = &t.left
} else {
throw("inserting span already in treap")
}
}
// Add t as new leaf in tree of span size and unique addrs.
// The balanced tree is a treap using priority as the random heap priority.
// That is, it is a binary tree ordered according to the npagesKey,
// but then among the space of possible binary trees respecting those
// npagesKeys, it is kept balanced on average by maintaining a heap ordering
// on the priority: s.priority <= both s.right.priority and s.right.priority.
// https://en.wikipedia.org/wiki/Treap
// http://faculty.washington.edu/aragon/pubs/rst89.pdf
t := (*treapNode)(mheap_.treapalloc.alloc())
t.init()
t.npagesKey = span.npages
t.priority = fastrand()
t.spanKey = span
t.parent = last
*pt = t // t now at a leaf.
// Rotate up into tree according to priority.
for t.parent != nil && t.parent.priority > t.priority {
if t != nil && t.spanKey.npages != t.npagesKey {
println("runtime: insert t=", t, "t.npagesKey=", t.npagesKey)
println("runtime: t.spanKey=", t.spanKey, "t.spanKey.npages=", t.spanKey.npages)
throw("span and treap sizes do not match?")
}
if t.parent.left == t {
root.rotateRight(t.parent)
} else {
if t.parent.right != t {
throw("treap insert finds a broken treap")
}
root.rotateLeft(t.parent)
}
}
}
func (root *mTreap) removeNode(t *treapNode) {
runtime: redo insert/remove of large spans Currently for spans with up to 1 MBytes (128 pages) we maintain an array indexed by the number of pages in the span. This is efficient both in terms of space as well as time to insert or remove a span of a particular size. Unfortunately for spans larger than 1 MByte we currently place them on a separate linked list. This results in O(n) behavior. Now that we are seeing heaps approaching 100 GBytes n is large enough to be noticed in real programs. This change replaces the linked list now used with a balanced binary tree structure called a treap. A treap is a probabilistically balanced tree offering O(logN) behavior for inserting and removing spans. To verify that this approach will work we start with noting that only spans with sizes > 1MByte will be put into the treap. This means that to support 1 TByte a treap will need at most 1 million nodes and can ideally be held in a treap with a depth of 20. Experiments with adding and removing randomly sized spans from the treap seem to result in treaps with depths of about twice the ideal or 40. A petabyte would require a tree of only twice again that depth again so this algorithm should last well into the future. Fixes #19393 Go1 benchmarks indicate this is basically an overall wash. Tue Mar 28 21:29:21 EDT 2017 name old time/op new time/op delta BinaryTree17-4 2.42s ± 1% 2.42s ± 1% ~ (p=0.980 n=21+21) Fannkuch11-4 3.00s ± 1% 3.18s ± 4% +6.10% (p=0.000 n=22+24) FmtFprintfEmpty-4 40.5ns ± 1% 40.3ns ± 3% ~ (p=0.692 n=22+25) FmtFprintfString-4 65.9ns ± 3% 64.6ns ± 1% -1.98% (p=0.000 n=24+23) FmtFprintfInt-4 69.6ns ± 1% 68.0ns ± 7% -2.30% (p=0.001 n=21+22) FmtFprintfIntInt-4 102ns ± 2% 99ns ± 1% -3.07% (p=0.000 n=23+23) FmtFprintfPrefixedInt-4 126ns ± 0% 125ns ± 0% -0.79% (p=0.000 n=19+17) FmtFprintfFloat-4 206ns ± 2% 205ns ± 1% ~ (p=0.671 n=23+21) FmtManyArgs-4 441ns ± 1% 445ns ± 1% +0.88% (p=0.000 n=22+23) GobDecode-4 5.73ms ± 1% 5.86ms ± 1% +2.37% (p=0.000 n=23+22) GobEncode-4 4.51ms ± 1% 4.89ms ± 1% +8.32% (p=0.000 n=22+22) Gzip-4 197ms ± 0% 202ms ± 1% +2.75% (p=0.000 n=23+24) Gunzip-4 32.9ms ± 8% 32.7ms ± 2% ~ (p=0.466 n=23+24) HTTPClientServer-4 57.3µs ± 1% 56.7µs ± 1% -0.94% (p=0.000 n=21+22) JSONEncode-4 13.8ms ± 1% 13.9ms ± 2% +1.14% (p=0.000 n=22+23) JSONDecode-4 47.4ms ± 1% 48.1ms ± 1% +1.49% (p=0.000 n=23+23) Mandelbrot200-4 3.92ms ± 0% 3.92ms ± 1% +0.21% (p=0.000 n=22+22) GoParse-4 2.89ms ± 1% 2.87ms ± 1% -0.68% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 73.6ns ± 1% 72.0ns ± 2% -2.15% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 173ns ± 1% 173ns ± 1% ~ (p=0.847 n=22+24) RegexpMatchEasy1_32-4 71.9ns ± 1% 69.8ns ± 1% -2.99% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 314ns ± 1% 308ns ± 1% -1.91% (p=0.000 n=22+23) RegexpMatchMedium_32-4 106ns ± 0% 105ns ± 1% -0.58% (p=0.000 n=19+21) RegexpMatchMedium_1K-4 34.3µs ± 1% 34.3µs ± 1% ~ (p=0.871 n=23+22) RegexpMatchHard_32-4 1.67µs ± 1% 1.67µs ± 7% ~ (p=0.224 n=22+23) RegexpMatchHard_1K-4 51.5µs ± 1% 50.4µs ± 1% -1.99% (p=0.000 n=22+23) Revcomp-4 383ms ± 1% 415ms ± 0% +8.51% (p=0.000 n=22+22) Template-4 51.5ms ± 1% 51.5ms ± 1% ~ (p=0.555 n=20+23) TimeParse-4 279ns ± 2% 277ns ± 1% -0.95% (p=0.000 n=24+22) TimeFormat-4 294ns ± 1% 296ns ± 1% +0.58% (p=0.003 n=24+23) [Geo mean] 43.7µs 43.8µs +0.32% name old speed new speed delta GobDecode-4 134MB/s ± 1% 131MB/s ± 1% -2.32% (p=0.000 n=23+22) GobEncode-4 170MB/s ± 1% 157MB/s ± 1% -7.68% (p=0.000 n=22+22) Gzip-4 98.7MB/s ± 0% 96.1MB/s ± 1% -2.68% (p=0.000 n=23+24) Gunzip-4 590MB/s ± 7% 593MB/s ± 2% ~ (p=0.466 n=23+24) JSONEncode-4 141MB/s ± 1% 139MB/s ± 2% -1.13% (p=0.000 n=22+23) JSONDecode-4 40.9MB/s ± 1% 40.3MB/s ± 0% -1.47% (p=0.000 n=23+23) GoParse-4 20.1MB/s ± 1% 20.2MB/s ± 1% +0.69% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 435MB/s ± 1% 444MB/s ± 2% +2.21% (p=0.000 n=21+22) RegexpMatchEasy0_1K-4 5.89GB/s ± 1% 5.89GB/s ± 1% ~ (p=0.439 n=22+24) RegexpMatchEasy1_32-4 445MB/s ± 1% 459MB/s ± 1% +3.06% (p=0.000 n=23+20) RegexpMatchEasy1_1K-4 3.26GB/s ± 1% 3.32GB/s ± 1% +1.97% (p=0.000 n=22+23) RegexpMatchMedium_32-4 9.40MB/s ± 1% 9.44MB/s ± 1% +0.43% (p=0.000 n=23+21) RegexpMatchMedium_1K-4 29.8MB/s ± 1% 29.8MB/s ± 1% ~ (p=0.826 n=23+22) RegexpMatchHard_32-4 19.1MB/s ± 1% 19.1MB/s ± 7% ~ (p=0.233 n=22+23) RegexpMatchHard_1K-4 19.9MB/s ± 1% 20.3MB/s ± 1% +2.03% (p=0.000 n=22+23) Revcomp-4 664MB/s ± 1% 612MB/s ± 0% -7.85% (p=0.000 n=22+22) Template-4 37.6MB/s ± 1% 37.7MB/s ± 1% ~ (p=0.558 n=20+23) [Geo mean] 134MB/s 133MB/s -0.76% Tue Mar 28 22:16:54 EDT 2017 Change-Id: I4a4f5c2b53d3fb85ef76c98522d3ed5cf8ae5b7e Reviewed-on: https://go-review.googlesource.com/38732 Reviewed-by: Russ Cox <rsc@golang.org>
2017-03-27 12:20:35 -06:00
if t.spanKey.npages != t.npagesKey {
throw("span and treap node npages do not match")
}
// Rotate t down to be leaf of tree for removal, respecting priorities.
for t.right != nil || t.left != nil {
if t.right == nil || t.left != nil && t.left.priority < t.right.priority {
root.rotateRight(t)
} else {
root.rotateLeft(t)
}
}
// Remove t, now a leaf.
if t.parent != nil {
if t.parent.left == t {
t.parent.left = nil
} else {
t.parent.right = nil
}
} else {
root.treap = nil
}
// Return the found treapNode's span after freeing the treapNode.
t.spanKey = nil
t.npagesKey = 0
mheap_.treapalloc.free(unsafe.Pointer(t))
}
// remove searches for, finds, removes from the treap, and returns the smallest
// span that can hold npages. If no span has at least npages return nil.
// This is slightly more complicated than a simple binary tree search
// since if an exact match is not found the next larger node is
// returned.
// If the last node inspected > npagesKey not holding
// a left node (a smaller npages) is the "best fit" node.
func (root *mTreap) remove(npages uintptr) *mspan {
t := root.treap
for t != nil {
if t.spanKey == nil {
throw("treap node with nil spanKey found")
}
if t.npagesKey < npages {
t = t.right
} else if t.left != nil && t.left.npagesKey >= npages {
t = t.left
} else {
result := t.spanKey
root.removeNode(t)
return result
}
}
return nil
}
// removeSpan searches for, finds, deletes span along with
// the associated treap node. If the span is not in the treap
// then t will eventually be set to nil and the t.spanKey
// will throw.
func (root *mTreap) removeSpan(span *mspan) {
npages := span.npages
t := root.treap
for t.spanKey != span {
if t.npagesKey < npages {
t = t.right
} else if t.npagesKey > npages {
t = t.left
} else if uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(span)) {
t = t.right
} else if uintptr(unsafe.Pointer(t.spanKey)) > uintptr(unsafe.Pointer(span)) {
t = t.left
}
}
root.removeNode(t)
}
// scavengetreap visits each node in the treap and scavenges the
// treapNode's span.
func scavengetreap(treap *treapNode, now, limit uint64) uintptr {
if treap == nil {
return 0
}
return scavengeTreapNode(treap, now, limit) +
scavengetreap(treap.left, now, limit) +
scavengetreap(treap.right, now, limit)
}
// rotateLeft rotates the tree rooted at node x.
// turning (x a (y b c)) into (y (x a b) c).
func (root *mTreap) rotateLeft(x *treapNode) {
// p -> (x a (y b c))
p := x.parent
a, y := x.left, x.right
b, c := y.left, y.right
y.left = x
x.parent = y
y.right = c
if c != nil {
c.parent = y
}
x.left = a
if a != nil {
a.parent = x
}
x.right = b
if b != nil {
b.parent = x
}
y.parent = p
if p == nil {
root.treap = y
} else if p.left == x {
p.left = y
} else {
if p.right != x {
throw("large span treap rotateLeft")
}
p.right = y
}
}
// rotateRight rotates the tree rooted at node y.
// turning (y (x a b) c) into (x a (y b c)).
func (root *mTreap) rotateRight(y *treapNode) {
// p -> (y (x a b) c)
p := y.parent
x, c := y.left, y.right
a, b := x.left, x.right
x.left = a
if a != nil {
a.parent = x
}
x.right = y
y.parent = x
y.left = b
if b != nil {
b.parent = y
}
y.right = c
if c != nil {
c.parent = y
}
x.parent = p
if p == nil {
root.treap = x
} else if p.left == y {
p.left = x
} else {
if p.right != y {
throw("large span treap rotateRight")
}
p.right = x
}
}