1
0
mirror of https://github.com/golang/go synced 2024-11-17 16:04:47 -07:00

cmd/compile: add cache of sizeable objects so they can be reused

We kind of have this mechanism already, just normalizing it and
using it in a bunch of places. Previously a bunch of places cached
slices only for the duration of a single function compilation. Now
we can reuse slices across a whole compiler run.

Use a sync.Pool of powers-of-two sizes. This lets us use not
too much memory, and avoid holding onto memory we're no longer
using when a GC happens.

There's a few different types we need, so generate the code for it.
Generics would be useful here, but we can't use generics in the
compiler because of bootstrapping.

Change-Id: I6cf37e7b7b2e802882aaa723a0b29770511ccd82
Reviewed-on: https://go-review.googlesource.com/c/go/+/444820
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Keith Randall 2022-10-18 16:07:36 -07:00 committed by Keith Randall
parent 7ddc45263c
commit 68bd383368
21 changed files with 656 additions and 213 deletions

View File

@ -0,0 +1,198 @@
package main
// TODO: should we share backing storage for similarly-shaped types?
// e.g. []*Value and []*Block, or even []int32 and []bool.
import (
"bytes"
"fmt"
"go/format"
"io"
"log"
"os"
)
type allocator struct {
name string // name for alloc/free functions
typ string // the type they return/accept
mak string // code to make a new object (takes power-of-2 size as fmt arg)
capacity string // code to calculate the capacity of an object. Should always report a power of 2.
resize string // code to shrink to sub-power-of-two size (takes size as fmt arg)
clear string // code for clearing object before putting it on the free list
minLog int // log_2 of minimum allocation size
maxLog int // log_2 of maximum allocation size
}
func genAllocators() {
allocators := []allocator{
{
name: "ValueSlice",
typ: "[]*Value",
capacity: "cap(%s)",
mak: "make([]*Value, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = nil\n}",
minLog: 5,
maxLog: 32,
},
{
name: "BlockSlice",
typ: "[]*Block",
capacity: "cap(%s)",
mak: "make([]*Block, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = nil\n}",
minLog: 5,
maxLog: 32,
},
{
name: "BoolSlice",
typ: "[]bool",
capacity: "cap(%s)",
mak: "make([]bool, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = false\n}",
minLog: 8,
maxLog: 32,
},
{
name: "IntSlice",
typ: "[]int",
capacity: "cap(%s)",
mak: "make([]int, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}",
minLog: 5,
maxLog: 32,
},
{
name: "Int32Slice",
typ: "[]int32",
capacity: "cap(%s)",
mak: "make([]int32, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}",
minLog: 6,
maxLog: 32,
},
{
name: "Int8Slice",
typ: "[]int8",
capacity: "cap(%s)",
mak: "make([]int8, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}",
minLog: 8,
maxLog: 32,
},
{
name: "IDSlice",
typ: "[]ID",
capacity: "cap(%s)",
mak: "make([]ID, %s)",
resize: "%s[:%s]",
clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}",
minLog: 6,
maxLog: 32,
},
{
name: "SparseSet",
typ: "*sparseSet",
capacity: "%s.cap()",
mak: "newSparseSet(%s)",
resize: "", // larger-sized sparse sets are ok
clear: "%s.clear()",
minLog: 5,
maxLog: 32,
},
{
name: "SparseMap",
typ: "*sparseMap",
capacity: "%s.cap()",
mak: "newSparseMap(%s)",
resize: "", // larger-sized sparse maps are ok
clear: "%s.clear()",
minLog: 5,
maxLog: 32,
},
{
name: "SparseMapPos",
typ: "*sparseMapPos",
capacity: "%s.cap()",
mak: "newSparseMapPos(%s)",
resize: "", // larger-sized sparse maps are ok
clear: "%s.clear()",
minLog: 5,
maxLog: 32,
},
}
w := new(bytes.Buffer)
fmt.Fprintf(w, "// Code generated from _gen/allocators.go; DO NOT EDIT.\n")
fmt.Fprintln(w)
fmt.Fprintln(w, "package ssa")
fmt.Fprintln(w, "import (")
fmt.Fprintln(w, "\"math/bits\"")
fmt.Fprintln(w, "\"sync\"")
fmt.Fprintln(w, ")")
for _, a := range allocators {
genAllocator(w, a)
}
// gofmt result
b := w.Bytes()
var err error
b, err = format.Source(b)
if err != nil {
fmt.Printf("%s\n", w.Bytes())
panic(err)
}
if err := os.WriteFile("../allocators.go", b, 0666); err != nil {
log.Fatalf("can't write output: %v\n", err)
}
}
func genAllocator(w io.Writer, a allocator) {
fmt.Fprintf(w, "var poolFree%s [%d]sync.Pool\n", a.name, a.maxLog-a.minLog)
fmt.Fprintf(w, "func (c *Cache) alloc%s(n int) %s {\n", a.name, a.typ)
fmt.Fprintf(w, "var s %s\n", a.typ)
fmt.Fprintf(w, "n2 := n\n")
fmt.Fprintf(w, "if n2 < %d { n2 = %d }\n", 1<<a.minLog, 1<<a.minLog)
fmt.Fprintf(w, "b := bits.Len(uint(n2-1))\n")
fmt.Fprintf(w, "v := poolFree%s[b-%d].Get()\n", a.name, a.minLog)
fmt.Fprintf(w, "if v == nil {\n")
fmt.Fprintf(w, " s = %s\n", fmt.Sprintf(a.mak, "1<<b"))
fmt.Fprintf(w, "} else {\n")
if a.typ[0] == '*' {
fmt.Fprintf(w, "s = v.(%s)\n", a.typ)
} else {
fmt.Fprintf(w, "sp := v.(*%s)\n", a.typ)
fmt.Fprintf(w, "s = *sp\n")
fmt.Fprintf(w, "*sp = nil\n")
fmt.Fprintf(w, "c.hdr%s = append(c.hdr%s, sp)\n", a.name, a.name)
}
fmt.Fprintf(w, "}\n")
if a.resize != "" {
fmt.Fprintf(w, "s = %s\n", fmt.Sprintf(a.resize, "s", "n"))
}
fmt.Fprintf(w, "return s\n")
fmt.Fprintf(w, "}\n")
fmt.Fprintf(w, "func (c *Cache) free%s(s %s) {\n", a.name, a.typ)
fmt.Fprintf(w, "%s\n", fmt.Sprintf(a.clear, "s"))
fmt.Fprintf(w, "b := bits.Len(uint(%s) - 1)\n", fmt.Sprintf(a.capacity, "s"))
if a.typ[0] == '*' {
fmt.Fprintf(w, "poolFree%s[b-%d].Put(s)\n", a.name, a.minLog)
} else {
fmt.Fprintf(w, "var sp *%s\n", a.typ)
fmt.Fprintf(w, "if len(c.hdr%s) == 0 {\n", a.name)
fmt.Fprintf(w, " sp = new(%s)\n", a.typ)
fmt.Fprintf(w, "} else {\n")
fmt.Fprintf(w, " sp = c.hdr%s[len(c.hdr%s)-1]\n", a.name, a.name)
fmt.Fprintf(w, " c.hdr%s[len(c.hdr%s)-1] = nil\n", a.name, a.name)
fmt.Fprintf(w, " c.hdr%s = c.hdr%s[:len(c.hdr%s)-1]\n", a.name, a.name, a.name)
fmt.Fprintf(w, "}\n")
fmt.Fprintf(w, "*sp = s\n")
fmt.Fprintf(w, "poolFree%s[b-%d].Put(sp)\n", a.name, a.minLog)
}
fmt.Fprintf(w, "}\n")
}

View File

@ -153,6 +153,7 @@ func main() {
tasks := []func(){
genOp,
genAllocators,
}
for _, a := range archs {
a := a // the funcs are ran concurrently at a later time

View File

@ -0,0 +1,343 @@
// Code generated from _gen/allocators.go; DO NOT EDIT.
package ssa
import (
"math/bits"
"sync"
)
var poolFreeValueSlice [27]sync.Pool
func (c *Cache) allocValueSlice(n int) []*Value {
var s []*Value
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeValueSlice[b-5].Get()
if v == nil {
s = make([]*Value, 1<<b)
} else {
sp := v.(*[]*Value)
s = *sp
*sp = nil
c.hdrValueSlice = append(c.hdrValueSlice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeValueSlice(s []*Value) {
for i := range s {
s[i] = nil
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]*Value
if len(c.hdrValueSlice) == 0 {
sp = new([]*Value)
} else {
sp = c.hdrValueSlice[len(c.hdrValueSlice)-1]
c.hdrValueSlice[len(c.hdrValueSlice)-1] = nil
c.hdrValueSlice = c.hdrValueSlice[:len(c.hdrValueSlice)-1]
}
*sp = s
poolFreeValueSlice[b-5].Put(sp)
}
var poolFreeBlockSlice [27]sync.Pool
func (c *Cache) allocBlockSlice(n int) []*Block {
var s []*Block
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeBlockSlice[b-5].Get()
if v == nil {
s = make([]*Block, 1<<b)
} else {
sp := v.(*[]*Block)
s = *sp
*sp = nil
c.hdrBlockSlice = append(c.hdrBlockSlice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeBlockSlice(s []*Block) {
for i := range s {
s[i] = nil
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]*Block
if len(c.hdrBlockSlice) == 0 {
sp = new([]*Block)
} else {
sp = c.hdrBlockSlice[len(c.hdrBlockSlice)-1]
c.hdrBlockSlice[len(c.hdrBlockSlice)-1] = nil
c.hdrBlockSlice = c.hdrBlockSlice[:len(c.hdrBlockSlice)-1]
}
*sp = s
poolFreeBlockSlice[b-5].Put(sp)
}
var poolFreeBoolSlice [24]sync.Pool
func (c *Cache) allocBoolSlice(n int) []bool {
var s []bool
n2 := n
if n2 < 256 {
n2 = 256
}
b := bits.Len(uint(n2 - 1))
v := poolFreeBoolSlice[b-8].Get()
if v == nil {
s = make([]bool, 1<<b)
} else {
sp := v.(*[]bool)
s = *sp
*sp = nil
c.hdrBoolSlice = append(c.hdrBoolSlice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeBoolSlice(s []bool) {
for i := range s {
s[i] = false
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]bool
if len(c.hdrBoolSlice) == 0 {
sp = new([]bool)
} else {
sp = c.hdrBoolSlice[len(c.hdrBoolSlice)-1]
c.hdrBoolSlice[len(c.hdrBoolSlice)-1] = nil
c.hdrBoolSlice = c.hdrBoolSlice[:len(c.hdrBoolSlice)-1]
}
*sp = s
poolFreeBoolSlice[b-8].Put(sp)
}
var poolFreeIntSlice [27]sync.Pool
func (c *Cache) allocIntSlice(n int) []int {
var s []int
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeIntSlice[b-5].Get()
if v == nil {
s = make([]int, 1<<b)
} else {
sp := v.(*[]int)
s = *sp
*sp = nil
c.hdrIntSlice = append(c.hdrIntSlice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeIntSlice(s []int) {
for i := range s {
s[i] = 0
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]int
if len(c.hdrIntSlice) == 0 {
sp = new([]int)
} else {
sp = c.hdrIntSlice[len(c.hdrIntSlice)-1]
c.hdrIntSlice[len(c.hdrIntSlice)-1] = nil
c.hdrIntSlice = c.hdrIntSlice[:len(c.hdrIntSlice)-1]
}
*sp = s
poolFreeIntSlice[b-5].Put(sp)
}
var poolFreeInt32Slice [26]sync.Pool
func (c *Cache) allocInt32Slice(n int) []int32 {
var s []int32
n2 := n
if n2 < 64 {
n2 = 64
}
b := bits.Len(uint(n2 - 1))
v := poolFreeInt32Slice[b-6].Get()
if v == nil {
s = make([]int32, 1<<b)
} else {
sp := v.(*[]int32)
s = *sp
*sp = nil
c.hdrInt32Slice = append(c.hdrInt32Slice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeInt32Slice(s []int32) {
for i := range s {
s[i] = 0
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]int32
if len(c.hdrInt32Slice) == 0 {
sp = new([]int32)
} else {
sp = c.hdrInt32Slice[len(c.hdrInt32Slice)-1]
c.hdrInt32Slice[len(c.hdrInt32Slice)-1] = nil
c.hdrInt32Slice = c.hdrInt32Slice[:len(c.hdrInt32Slice)-1]
}
*sp = s
poolFreeInt32Slice[b-6].Put(sp)
}
var poolFreeInt8Slice [24]sync.Pool
func (c *Cache) allocInt8Slice(n int) []int8 {
var s []int8
n2 := n
if n2 < 256 {
n2 = 256
}
b := bits.Len(uint(n2 - 1))
v := poolFreeInt8Slice[b-8].Get()
if v == nil {
s = make([]int8, 1<<b)
} else {
sp := v.(*[]int8)
s = *sp
*sp = nil
c.hdrInt8Slice = append(c.hdrInt8Slice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeInt8Slice(s []int8) {
for i := range s {
s[i] = 0
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]int8
if len(c.hdrInt8Slice) == 0 {
sp = new([]int8)
} else {
sp = c.hdrInt8Slice[len(c.hdrInt8Slice)-1]
c.hdrInt8Slice[len(c.hdrInt8Slice)-1] = nil
c.hdrInt8Slice = c.hdrInt8Slice[:len(c.hdrInt8Slice)-1]
}
*sp = s
poolFreeInt8Slice[b-8].Put(sp)
}
var poolFreeIDSlice [26]sync.Pool
func (c *Cache) allocIDSlice(n int) []ID {
var s []ID
n2 := n
if n2 < 64 {
n2 = 64
}
b := bits.Len(uint(n2 - 1))
v := poolFreeIDSlice[b-6].Get()
if v == nil {
s = make([]ID, 1<<b)
} else {
sp := v.(*[]ID)
s = *sp
*sp = nil
c.hdrIDSlice = append(c.hdrIDSlice, sp)
}
s = s[:n]
return s
}
func (c *Cache) freeIDSlice(s []ID) {
for i := range s {
s[i] = 0
}
b := bits.Len(uint(cap(s)) - 1)
var sp *[]ID
if len(c.hdrIDSlice) == 0 {
sp = new([]ID)
} else {
sp = c.hdrIDSlice[len(c.hdrIDSlice)-1]
c.hdrIDSlice[len(c.hdrIDSlice)-1] = nil
c.hdrIDSlice = c.hdrIDSlice[:len(c.hdrIDSlice)-1]
}
*sp = s
poolFreeIDSlice[b-6].Put(sp)
}
var poolFreeSparseSet [27]sync.Pool
func (c *Cache) allocSparseSet(n int) *sparseSet {
var s *sparseSet
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeSparseSet[b-5].Get()
if v == nil {
s = newSparseSet(1 << b)
} else {
s = v.(*sparseSet)
}
return s
}
func (c *Cache) freeSparseSet(s *sparseSet) {
s.clear()
b := bits.Len(uint(s.cap()) - 1)
poolFreeSparseSet[b-5].Put(s)
}
var poolFreeSparseMap [27]sync.Pool
func (c *Cache) allocSparseMap(n int) *sparseMap {
var s *sparseMap
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeSparseMap[b-5].Get()
if v == nil {
s = newSparseMap(1 << b)
} else {
s = v.(*sparseMap)
}
return s
}
func (c *Cache) freeSparseMap(s *sparseMap) {
s.clear()
b := bits.Len(uint(s.cap()) - 1)
poolFreeSparseMap[b-5].Put(s)
}
var poolFreeSparseMapPos [27]sync.Pool
func (c *Cache) allocSparseMapPos(n int) *sparseMapPos {
var s *sparseMapPos
n2 := n
if n2 < 32 {
n2 = 32
}
b := bits.Len(uint(n2 - 1))
v := poolFreeSparseMapPos[b-5].Get()
if v == nil {
s = newSparseMapPos(1 << b)
} else {
s = v.(*sparseMapPos)
}
return s
}
func (c *Cache) freeSparseMapPos(s *sparseMapPos) {
s.clear()
b := bits.Len(uint(s.cap()) - 1)
poolFreeSparseMapPos[b-5].Put(s)
}

View File

@ -21,18 +21,8 @@ type Cache struct {
// See stackalloc.go's {new,put}StackAllocState.
stackAllocState *stackAllocState
domblockstore []ID // scratch space for computing dominators
scrSparseSet []*sparseSet // scratch sparse sets to be re-used.
scrSparseMap []*sparseMap // scratch sparse maps to be re-used.
scrSparseMapPos []*sparseMapPos // scratch sparse maps to be re-used.
scrPoset []*poset // scratch poset to be reused
// deadcode contains reusable slices specifically for the deadcode pass.
// It gets special treatment because of the frequency with which it is run.
deadcode struct {
liveOrderStmts []*Value
live []bool
q []*Value
}
// Reusable regalloc state.
regallocValues []valState
@ -40,6 +30,16 @@ type Cache struct {
debugState debugState
Liveness interface{} // *gc.livenessFuncCache
// Free "headers" for use by the allocators in allocators.go.
// Used to put slices in sync.Pools without allocation.
hdrValueSlice []*[]*Value
hdrBlockSlice []*[]*Block
hdrBoolSlice []*[]bool
hdrIntSlice []*[]int
hdrInt32Slice []*[]int32
hdrInt8Slice []*[]int8
hdrIDSlice []*[]ID
}
func (c *Cache) Reset() {
@ -64,19 +64,4 @@ func (c *Cache) Reset() {
for i := range c.regallocValues {
c.regallocValues[i] = valState{}
}
// liveOrderStmts gets used multiple times during compilation of a function.
// We don't know where the high water mark was, so reslice to cap and search.
c.deadcode.liveOrderStmts = c.deadcode.liveOrderStmts[:cap(c.deadcode.liveOrderStmts)]
no := sort.Search(len(c.deadcode.liveOrderStmts), func(i int) bool { return c.deadcode.liveOrderStmts[i] == nil })
xo := c.deadcode.liveOrderStmts[:no]
for i := range xo {
xo[i] = nil
}
c.deadcode.q = c.deadcode.q[:cap(c.deadcode.q)]
nq := sort.Search(len(c.deadcode.q), func(i int) bool { return c.deadcode.q[i] == nil })
xq := c.deadcode.q[:nq]
for i := range xq {
xq[i] = nil
}
}

View File

@ -9,7 +9,8 @@ package ssa
// Regalloc wants a critical-edge-free CFG so it can implement phi values.
func critical(f *Func) {
// maps from phi arg ID to the new block created for that argument
blocks := make([]*Block, f.NumValues())
blocks := f.Cache.allocBlockSlice(f.NumValues())
defer f.Cache.freeBlockSlice(blocks)
// need to iterate over f.Blocks without range, as we might
// need to split critical edges on newly constructed blocks
for j := 0; j < len(f.Blocks); j++ {

View File

@ -31,7 +31,9 @@ func cse(f *Func) {
// until it reaches a fixed point.
// Make initial coarse partitions by using a subset of the conditions above.
a := make([]*Value, 0, f.NumValues())
a := f.Cache.allocValueSlice(f.NumValues())
defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a
a = a[:0]
if f.auxmap == nil {
f.auxmap = auxmap{}
}
@ -49,7 +51,8 @@ func cse(f *Func) {
partition := partitionValues(a, f.auxmap)
// map from value id back to eqclass id
valueEqClass := make([]ID, f.NumValues())
valueEqClass := f.Cache.allocIDSlice(f.NumValues())
defer f.Cache.freeIDSlice(valueEqClass)
for _, b := range f.Blocks {
for _, v := range b.Values {
// Use negative equivalence class #s for unique values.
@ -159,7 +162,8 @@ func cse(f *Func) {
// Compute substitutions we would like to do. We substitute v for w
// if v and w are in the same equivalence class and v dominates w.
rewrite := make([]*Value, f.NumValues())
rewrite := f.Cache.allocValueSlice(f.NumValues())
defer f.Cache.freeValueSlice(rewrite)
byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs
for _, e := range partition {
byDom.a = e

View File

@ -9,12 +9,12 @@ import (
)
// findlive returns the reachable blocks and live values in f.
// The caller should call f.retDeadcodeLive(live) when it is done with it.
// The caller should call f.Cache.freeBoolSlice(live) when it is done with it.
func findlive(f *Func) (reachable []bool, live []bool) {
reachable = ReachableBlocks(f)
var order []*Value
live, order = liveValues(f, reachable)
f.retDeadcodeLiveOrderStmts(order)
f.Cache.freeValueSlice(order)
return
}
@ -51,21 +51,11 @@ func ReachableBlocks(f *Func) []bool {
// to be statements in reversed data flow order.
// The second result is used to help conserve statement boundaries for debugging.
// reachable is a map from block ID to whether the block is reachable.
// The caller should call f.retDeadcodeLive(live) and f.retDeadcodeLiveOrderStmts(liveOrderStmts)
// The caller should call f.Cache.freeBoolSlice(live) and f.Cache.freeValueSlice(liveOrderStmts).
// when they are done with the return values.
func liveValues(f *Func, reachable []bool) (live []bool, liveOrderStmts []*Value) {
live = f.newDeadcodeLive()
if cap(live) < f.NumValues() {
live = make([]bool, f.NumValues())
} else {
live = live[:f.NumValues()]
for i := range live {
live[i] = false
}
}
liveOrderStmts = f.newDeadcodeLiveOrderStmts()
liveOrderStmts = liveOrderStmts[:0]
live = f.Cache.allocBoolSlice(f.NumValues())
liveOrderStmts = f.Cache.allocValueSlice(f.NumValues())[:0]
// After regalloc, consider all values to be live.
// See the comment at the top of regalloc.go and in deadcode for details.
@ -101,8 +91,8 @@ func liveValues(f *Func, reachable []bool) (live []bool, liveOrderStmts []*Value
}
// Find all live values
q := f.Cache.deadcode.q[:0]
defer func() { f.Cache.deadcode.q = q }()
q := f.Cache.allocValueSlice(f.NumValues())[:0]
defer f.Cache.freeValueSlice(q)
// Starting set: all control values of reachable blocks are live.
// Calls are live (because callee can observe the memory state).
@ -149,6 +139,7 @@ func liveValues(f *Func, reachable []bool) (live []bool, liveOrderStmts []*Value
for len(q) > 0 {
// pop a reachable value
v := q[len(q)-1]
q[len(q)-1] = nil
q = q[:len(q)-1]
for i, x := range v.Args {
if v.Op == OpPhi && !reachable[v.Block.Preds[i].b.ID] {
@ -213,8 +204,8 @@ func deadcode(f *Func) {
// Find live values.
live, order := liveValues(f, reachable)
defer f.retDeadcodeLive(live)
defer f.retDeadcodeLiveOrderStmts(order)
defer func() { f.Cache.freeBoolSlice(live) }()
defer func() { f.Cache.freeValueSlice(order) }()
// Remove dead & duplicate entries from namedValues map.
s := f.newSparseSet(f.NumValues())

View File

@ -21,7 +21,8 @@ type blockAndIndex struct {
// postorderWithNumbering provides a DFS postordering.
// This seems to make loop-finding more robust.
func postorderWithNumbering(f *Func, ponums []int32) []*Block {
seen := make([]bool, f.NumBlocks())
seen := f.Cache.allocBoolSlice(f.NumBlocks())
defer f.Cache.freeBoolSlice(seen)
// result ordering
order := make([]*Block, 0, len(f.Blocks))
@ -56,44 +57,6 @@ func postorderWithNumbering(f *Func, ponums []int32) []*Block {
type linkedBlocks func(*Block) []Edge
const nscratchslices = 7
// experimentally, functions with 512 or fewer blocks account
// for 75% of memory (size) allocation for dominator computation
// in make.bash.
const minscratchblocks = 512
func (cache *Cache) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g []ID) {
tot := maxBlockID * nscratchslices
scratch := cache.domblockstore
if len(scratch) < tot {
// req = min(1.5*tot, nscratchslices*minscratchblocks)
// 50% padding allows for graph growth in later phases.
req := (tot * 3) >> 1
if req < nscratchslices*minscratchblocks {
req = nscratchslices * minscratchblocks
}
scratch = make([]ID, req)
cache.domblockstore = scratch
} else {
// Clear as much of scratch as we will (re)use
scratch = scratch[0:tot]
for i := range scratch {
scratch[i] = 0
}
}
a = scratch[0*maxBlockID : 1*maxBlockID]
b = scratch[1*maxBlockID : 2*maxBlockID]
c = scratch[2*maxBlockID : 3*maxBlockID]
d = scratch[3*maxBlockID : 4*maxBlockID]
e = scratch[4*maxBlockID : 5*maxBlockID]
f = scratch[5*maxBlockID : 6*maxBlockID]
g = scratch[6*maxBlockID : 7*maxBlockID]
return
}
func dominators(f *Func) []*Block {
preds := func(b *Block) []Edge { return b.Preds }
succs := func(b *Block) []Edge { return b.Succs }
@ -110,12 +73,21 @@ func (f *Func) dominatorsLTOrig(entry *Block, predFn linkedBlocks, succFn linked
// Adapted directly from the original TOPLAS article's "simple" algorithm
maxBlockID := entry.Func.NumBlocks()
semi, vertex, label, parent, ancestor, bucketHead, bucketLink := f.Cache.scratchBlocksForDom(maxBlockID)
scratch := f.Cache.allocIDSlice(7 * maxBlockID)
defer f.Cache.freeIDSlice(scratch)
semi := scratch[0*maxBlockID : 1*maxBlockID]
vertex := scratch[1*maxBlockID : 2*maxBlockID]
label := scratch[2*maxBlockID : 3*maxBlockID]
parent := scratch[3*maxBlockID : 4*maxBlockID]
ancestor := scratch[4*maxBlockID : 5*maxBlockID]
bucketHead := scratch[5*maxBlockID : 6*maxBlockID]
bucketLink := scratch[6*maxBlockID : 7*maxBlockID]
// This version uses integers for most of the computation,
// to make the work arrays smaller and pointer-free.
// fromID translates from ID to *Block where that is needed.
fromID := make([]*Block, maxBlockID)
fromID := f.Cache.allocBlockSlice(maxBlockID)
defer f.Cache.freeBlockSlice(fromID)
for _, v := range f.Blocks {
fromID[v.ID] = v
}
@ -243,7 +215,8 @@ func dominatorsSimple(f *Func) []*Block {
post := f.postorder()
// Make map from block id to order index (for intersect call)
postnum := make([]int, f.NumBlocks())
postnum := f.Cache.allocIntSlice(f.NumBlocks())
defer f.Cache.freeIntSlice(postnum)
for i, b := range post {
postnum[b.ID] = i
}

View File

@ -11,7 +11,8 @@ func flagalloc(f *Func) {
// Compute the in-register flag value we want at the end of
// each block. This is basically a best-effort live variable
// analysis, so it can be much simpler than a full analysis.
end := make([]*Value, f.NumBlocks())
end := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(end)
po := f.postorder()
for n := 0; n < 2; n++ {
for _, b := range po {

View File

@ -105,74 +105,35 @@ func (f *Func) NumValues() int {
// newSparseSet returns a sparse set that can store at least up to n integers.
func (f *Func) newSparseSet(n int) *sparseSet {
for i, scr := range f.Cache.scrSparseSet {
if scr != nil && scr.cap() >= n {
f.Cache.scrSparseSet[i] = nil
scr.clear()
return scr
}
}
return newSparseSet(n)
return f.Cache.allocSparseSet(n)
}
// retSparseSet returns a sparse set to the config's cache of sparse
// sets to be reused by f.newSparseSet.
func (f *Func) retSparseSet(ss *sparseSet) {
for i, scr := range f.Cache.scrSparseSet {
if scr == nil {
f.Cache.scrSparseSet[i] = ss
return
}
}
f.Cache.scrSparseSet = append(f.Cache.scrSparseSet, ss)
f.Cache.freeSparseSet(ss)
}
// newSparseMap returns a sparse map that can store at least up to n integers.
func (f *Func) newSparseMap(n int) *sparseMap {
for i, scr := range f.Cache.scrSparseMap {
if scr != nil && scr.cap() >= n {
f.Cache.scrSparseMap[i] = nil
scr.clear()
return scr
}
}
return newSparseMap(n)
return f.Cache.allocSparseMap(n)
}
// retSparseMap returns a sparse map to the config's cache of sparse
// sets to be reused by f.newSparseMap.
func (f *Func) retSparseMap(ss *sparseMap) {
for i, scr := range f.Cache.scrSparseMap {
if scr == nil {
f.Cache.scrSparseMap[i] = ss
return
}
}
f.Cache.scrSparseMap = append(f.Cache.scrSparseMap, ss)
f.Cache.freeSparseMap(ss)
}
// newSparseMapPos returns a sparse map that can store at least up to n integers.
func (f *Func) newSparseMapPos(n int) *sparseMapPos {
for i, scr := range f.Cache.scrSparseMapPos {
if scr != nil && scr.cap() >= n {
f.Cache.scrSparseMapPos[i] = nil
scr.clear()
return scr
}
}
return newSparseMapPos(n)
return f.Cache.allocSparseMapPos(n)
}
// retSparseMapPos returns a sparse map to the config's cache of sparse
// sets to be reused by f.newSparseMapPos.
func (f *Func) retSparseMapPos(ss *sparseMapPos) {
for i, scr := range f.Cache.scrSparseMapPos {
if scr == nil {
f.Cache.scrSparseMapPos[i] = ss
return
}
}
f.Cache.scrSparseMapPos = append(f.Cache.scrSparseMapPos, ss)
f.Cache.freeSparseMapPos(ss)
}
// newPoset returns a new poset from the internal cache
@ -190,33 +151,6 @@ func (f *Func) retPoset(po *poset) {
f.Cache.scrPoset = append(f.Cache.scrPoset, po)
}
// newDeadcodeLive returns a slice for the
// deadcode pass to use to indicate which values are live.
func (f *Func) newDeadcodeLive() []bool {
r := f.Cache.deadcode.live
f.Cache.deadcode.live = nil
return r
}
// retDeadcodeLive returns a deadcode live value slice for re-use.
func (f *Func) retDeadcodeLive(live []bool) {
f.Cache.deadcode.live = live
}
// newDeadcodeLiveOrderStmts returns a slice for the
// deadcode pass to use to indicate which values
// need special treatment for statement boundaries.
func (f *Func) newDeadcodeLiveOrderStmts() []*Value {
r := f.Cache.deadcode.liveOrderStmts
f.Cache.deadcode.liveOrderStmts = nil
return r
}
// retDeadcodeLiveOrderStmts returns a deadcode liveOrderStmts slice for re-use.
func (f *Func) retDeadcodeLiveOrderStmts(liveOrderStmts []*Value) {
f.Cache.deadcode.liveOrderStmts = liveOrderStmts
}
func (f *Func) localSlotAddr(slot LocalSlot) *LocalSlot {
a, ok := f.CanonicalLocalSlots[slot]
if !ok {

View File

@ -20,9 +20,12 @@ func layoutRegallocOrder(f *Func) []*Block {
func layoutOrder(f *Func) []*Block {
order := make([]*Block, 0, f.NumBlocks())
scheduled := make([]bool, f.NumBlocks())
idToBlock := make([]*Block, f.NumBlocks())
indegree := make([]int, f.NumBlocks())
scheduled := f.Cache.allocBoolSlice(f.NumBlocks())
defer f.Cache.freeBoolSlice(scheduled)
idToBlock := f.Cache.allocBlockSlice(f.NumBlocks())
defer f.Cache.freeBlockSlice(idToBlock)
indegree := f.Cache.allocIntSlice(f.NumBlocks())
defer f.Cache.freeIntSlice(indegree)
posdegree := f.newSparseSet(f.NumBlocks()) // blocks with positive remaining degree
defer f.retSparseSet(posdegree)
// blocks with zero remaining degree. Use slice to simulate a LIFO queue to implement

View File

@ -117,8 +117,10 @@ func likelyadjust(f *Func) {
// in their rank order. 0 is default, more positive
// is less likely. It's possible to assign a negative
// unlikeliness (though not currently the case).
certain := make([]int8, f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit
local := make([]int8, f.NumBlocks()) // for our immediate predecessors.
certain := f.Cache.allocInt8Slice(f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit
defer f.Cache.freeInt8Slice(certain)
local := f.Cache.allocInt8Slice(f.NumBlocks()) // for our immediate predecessors.
defer f.Cache.freeInt8Slice(local)
po := f.postorder()
nest := f.loopnest()
@ -277,7 +279,8 @@ func loopnestfor(f *Func) *loopnest {
sdom := f.Sdom()
b2l := make([]*loop, f.NumBlocks())
loops := make([]*loop, 0)
visited := make([]bool, f.NumBlocks())
visited := f.Cache.allocBoolSlice(f.NumBlocks())
defer f.Cache.freeBoolSlice(visited)
sawIrred := false
if f.pass.debug > 2 {
@ -369,7 +372,8 @@ func loopnestfor(f *Func) *loopnest {
ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
// Calculate containsUnavoidableCall for regalloc
dominatedByCall := make([]bool, f.NumBlocks())
dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks())
defer f.Cache.freeBoolSlice(dominatedByCall)
for _, b := range po {
if checkContainsCall(b) {
dominatedByCall[b.ID] = true

View File

@ -94,7 +94,8 @@ func insertLoopReschedChecks(f *Func) {
lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
}
memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
defer f.Cache.freeValueSlice(memDefsAtBlockEnds)
// Propagate last mem definitions forward through successor blocks.
for i := len(po) - 1; i >= 0; i-- {
@ -404,7 +405,8 @@ outer:
func findLastMems(f *Func) []*Value {
var stores []*Value
lastMems := make([]*Value, f.NumBlocks())
lastMems := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(lastMems)
storeUse := f.newSparseSet(f.NumValues())
defer f.retSparseSet(storeUse)
for _, b := range f.Blocks {

View File

@ -30,7 +30,8 @@ func loopRotate(f *Func) {
return
}
idToIdx := make([]int, f.NumBlocks())
idToIdx := f.Cache.allocIntSlice(f.NumBlocks())
defer f.Cache.freeIntSlice(idToIdx)
for i, b := range f.Blocks {
idToIdx[b.ID] = i
}
@ -92,20 +93,21 @@ func loopRotate(f *Func) {
// Some blocks that are not part of a loop may be placed
// between loop blocks. In order to avoid these blocks from
// being overwritten, use a temporary slice.
newOrder := make([]*Block, 0, f.NumBlocks())
for _, b := range f.Blocks {
oldOrder := f.Cache.allocBlockSlice(len(f.Blocks))
defer f.Cache.freeBlockSlice(oldOrder)
copy(oldOrder, f.Blocks)
for _, b := range oldOrder {
if _, ok := move[b.ID]; ok {
continue
}
newOrder = append(newOrder, b)
f.Blocks[j] = b
j++
for _, a := range after[b.ID] {
newOrder = append(newOrder, a)
f.Blocks[j] = a
j++
}
}
if j != len(f.Blocks) {
if j != len(oldOrder) {
f.Fatalf("bad reordering in looprotate")
}
f.Blocks = newOrder
}

View File

@ -41,7 +41,8 @@ func nilcheckelim(f *Func) {
// map from value ID to bool indicating if value is known to be non-nil
// in the current dominator path being walked. This slice is updated by
// walkStates to maintain the known non-nil values.
nonNilValues := make([]bool, f.NumValues())
nonNilValues := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(nonNilValues)
// make an initial pass identifying any non-nil values
for _, b := range f.Blocks {
@ -86,7 +87,8 @@ func nilcheckelim(f *Func) {
// allocate auxiliary date structures for computing store order
sset := f.newSparseSet(f.NumValues())
defer f.retSparseSet(sset)
storeNumber := make([]int32, f.NumValues())
storeNumber := f.Cache.allocInt32Slice(f.NumValues())
defer f.Cache.freeInt32Slice(storeNumber)
// perform a depth first walk of the dominee tree
for len(work) > 0 {

View File

@ -124,7 +124,7 @@ func (p stringFuncPrinter) named(n LocalSlot, vals []*Value) {
func fprintFunc(p funcPrinter, f *Func) {
reachable, live := findlive(f)
defer f.retDeadcodeLive(live)
defer f.Cache.freeBoolSlice(live)
p.header(f)
printed := make([]bool, f.NumValues())
for _, b := range f.Blocks {

View File

@ -146,6 +146,7 @@ func regalloc(f *Func) {
var s regAllocState
s.init(f)
s.regalloc(f)
s.close()
}
type register uint8
@ -357,6 +358,12 @@ func (s *regAllocState) clobberRegs(m regMask) {
// setOrig records that c's original value is the same as
// v's original value.
func (s *regAllocState) setOrig(c *Value, v *Value) {
if int(c.ID) >= cap(s.orig) {
x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
copy(x, s.orig)
s.f.Cache.freeValueSlice(s.orig)
s.orig = x
}
for int(c.ID) >= len(s.orig) {
s.orig = append(s.orig, nil)
}
@ -664,7 +671,7 @@ func (s *regAllocState) init(f *Func) {
s.f.Cache.regallocValues = make([]valState, nv)
}
s.values = s.f.Cache.regallocValues
s.orig = make([]*Value, nv)
s.orig = s.f.Cache.allocValueSlice(nv)
s.copies = make(map[*Value]bool)
for _, b := range s.visitOrder {
for _, v := range b.Values {
@ -728,6 +735,10 @@ func (s *regAllocState) init(f *Func) {
}
}
func (s *regAllocState) close() {
s.f.Cache.freeValueSlice(s.orig)
}
// Adds a use record for id at distance dist from the start of the block.
// All calls to addUse must happen with nonincreasing dist.
func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {

View File

@ -94,13 +94,15 @@ func (op Op) isLoweredGetClosurePtr() bool {
func schedule(f *Func) {
// For each value, the number of times it is used in the block
// by values that have not been scheduled yet.
uses := make([]int32, f.NumValues())
uses := f.Cache.allocInt32Slice(f.NumValues())
defer f.Cache.freeInt32Slice(uses)
// reusable priority queue
priq := new(ValHeap)
// "priority" for a value
score := make([]int8, f.NumValues())
score := f.Cache.allocInt8Slice(f.NumValues())
defer f.Cache.freeInt8Slice(score)
// scheduling order. We queue values in this list in reverse order.
// A constant bound allows this to be stack-allocated. 64 is
@ -108,7 +110,8 @@ func schedule(f *Func) {
order := make([]*Value, 0, 64)
// maps mem values to the next live memory value
nextMem := make([]*Value, f.NumValues())
nextMem := f.Cache.allocValueSlice(f.NumValues())
defer f.Cache.freeValueSlice(nextMem)
// additional pretend arguments for each Value. Used to enforce load/store ordering.
additionalArgs := make([][]*Value, f.NumValues())

View File

@ -25,8 +25,6 @@ type stackAllocState struct {
values []stackValState
interfere [][]ID // interfere[v.id] = values that interfere with v.
names []LocalSlot
slots []int
used []bool
nArgSlot, // Number of Values sourced to arg slot
nNotNeed, // Number of Values not needing a stack slot
@ -57,12 +55,6 @@ func putStackAllocState(s *stackAllocState) {
for i := range s.names {
s.names[i] = LocalSlot{}
}
for i := range s.slots {
s.slots[i] = 0
}
for i := range s.used {
s.used[i] = false
}
s.f.Cache.stackAllocState = s
s.f = nil
s.live = nil
@ -218,25 +210,15 @@ func (s *stackAllocState) stackalloc() {
// Each time we assign a stack slot to a value v, we remember
// the slot we used via an index into locations[v.Type].
slots := s.slots
if n := f.NumValues(); cap(slots) >= n {
slots = slots[:n]
} else {
slots = make([]int, n)
s.slots = slots
}
slots := f.Cache.allocIntSlice(f.NumValues())
defer f.Cache.freeIntSlice(slots)
for i := range slots {
slots[i] = -1
}
// Pick a stack slot for each value needing one.
var used []bool
if n := f.NumValues(); cap(s.used) >= n {
used = s.used[:n]
} else {
used = make([]bool, n)
s.used = used
}
used := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(used)
for _, b := range f.Blocks {
for _, v := range b.Values {
if !s.values[v.ID].needSlot {

View File

@ -10,7 +10,8 @@ package ssa
// A Value can be moved to any block that
// dominates all blocks in which it is used.
func tighten(f *Func) {
canMove := make([]bool, f.NumValues())
canMove := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(canMove)
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op.isLoweredGetClosurePtr() {
@ -52,7 +53,8 @@ func tighten(f *Func) {
lca := makeLCArange(f)
// For each moveable value, record the block that dominates all uses found so far.
target := make([]*Block, f.NumValues())
target := f.Cache.allocBlockSlice(f.NumValues())
defer f.Cache.freeBlockSlice(target)
// Grab loop information.
// We use this to make sure we don't tighten a value into a (deeper) loop.

View File

@ -139,7 +139,8 @@ func writebarrier(f *Func) {
// allocate auxiliary data structures for computing store order
sset = f.newSparseSet(f.NumValues())
defer f.retSparseSet(sset)
storeNumber = make([]int32, f.NumValues())
storeNumber = f.Cache.allocInt32Slice(f.NumValues())
defer f.Cache.freeInt32Slice(storeNumber)
}
// order values in store order