1
0
mirror of https://github.com/golang/go synced 2024-11-11 18:21:40 -07:00

cmd/compile: enhance tighten pass for memory values

This CL enhances the tighten pass. Previously if a value has memory arg,
then the tighten pass won't move it, actually if the memory state is
consistent among definition and use block, we can move the value. This
CL optimizes this case. This is useful for the following situation:
b1:
  x = load(...mem)
  if(...) goto b2 else b3
b2:
  use(x)
b3:
  some_op_not_use_x

For the micro-benchmark mentioned in #56620, the performance improvement
is about 15%.
There's no noticeable performance change in the go1 benchmark.

Fixes #56620

Change-Id: I9b152754f27231f583a6995fc7cd8472aa7d390c
Reviewed-on: https://go-review.googlesource.com/c/go/+/458755
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
This commit is contained in:
erifan01 2022-12-15 16:39:07 +08:00 committed by Gopher Robot
parent c486f74eeb
commit ea8f037996
3 changed files with 127 additions and 7 deletions

View File

@ -602,6 +602,11 @@ func isLeaf(f *Func) bool {
return true
}
// needRegister reports whether v needs a register.
func (v *Value) needRegister() bool {
return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
}
func (s *regAllocState) init(f *Func) {
s.f = f
s.f.RegAlloc = s.f.Cache.locs[:0]
@ -702,7 +707,7 @@ func (s *regAllocState) init(f *Func) {
s.copies = make(map[*Value]bool)
for _, b := range s.visitOrder {
for _, v := range b.Values {
if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
if v.needRegister() {
s.values[v.ID].needReg = true
s.values[v.ID].rematerializeable = v.rematerializeable()
s.orig[v.ID] = v

View File

@ -21,6 +21,14 @@ func tighten(f *Func) {
canMove := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(canMove)
// Compute the memory states of each block.
startMem := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(startMem)
endMem := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(endMem)
memState(f, startMem, endMem)
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op.isLoweredGetClosurePtr() {
@ -35,15 +43,12 @@ func tighten(f *Func) {
// SelectN is typically, ultimately, a register.
continue
}
if v.MemoryArg() != nil {
// We can't move values which have a memory arg - it might
// make two memory values live across a block boundary.
continue
}
// Count arguments which will need a register.
narg := 0
for _, a := range v.Args {
if !a.rematerializeable() {
// SP and SB are special registers and have no effect on
// the allocation of general-purpose registers.
if a.needRegister() && a.Op != OpSB && a.Op != OpSP {
narg++
}
}
@ -138,6 +143,16 @@ func tighten(f *Func) {
// v is not moveable, or is already in correct place.
continue
}
if mem := v.MemoryArg(); mem != nil {
if startMem[t.ID] != mem {
// We can't move a value with a memory arg unless the target block
// has that memory arg as its starting memory.
continue
}
}
if f.pass.debug > 0 {
b.Func.Warnl(v.Pos, "%v is moved", v.Op)
}
// Move v to the block which dominates its uses.
t.Values = append(t.Values, v)
v.Block = t
@ -174,3 +189,81 @@ func phiTighten(f *Func) {
}
}
}
// memState computes the memory state at the beginning and end of each block of
// the function. The memory state is represented by a value of mem type.
// The returned result is stored in startMem and endMem, and endMem is nil for
// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not
// suitable for infinite loop blocks that do not contain any mem operations.
// For example:
// b1:
//
// (some values)
//
// plain -> b2
// b2: <- b1 b2
// Plain -> b2
//
// Algorithm introduction:
// 1. The start memory state of a block is InitMem, a Phi node of type mem or
// an incoming memory value.
// 2. The start memory state of a block is consistent with the end memory state
// of its parent nodes. If the start memory state of a block is a Phi value,
// then the end memory state of its parent nodes is consistent with the
// corresponding argument value of the Phi node.
// 3. The algorithm first obtains the memory state of some blocks in the tree
// in the first step. Then floods the known memory state to other nodes in
// the second step.
func memState(f *Func, startMem, endMem []*Value) {
// This slice contains the set of blocks that have had their startMem set but this
// startMem value has not yet been propagated to the endMem of its predecessors
changed := make([]*Block, 0)
// First step, init the memory state of some blocks.
for _, b := range f.Blocks {
for _, v := range b.Values {
var mem *Value
if v.Op == OpPhi {
if v.Type.IsMemory() {
mem = v
}
} else if v.Op == OpInitMem {
mem = v // This is actually not needed.
} else if a := v.MemoryArg(); a != nil && a.Block != b {
// The only incoming memory value doesn't belong to this block.
mem = a
}
if mem != nil {
if old := startMem[b.ID]; old != nil {
if old == mem {
continue
}
f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem)
}
startMem[b.ID] = mem
changed = append(changed, b)
}
}
}
// Second step, floods the known memory state of some blocks to others.
for len(changed) != 0 {
top := changed[0]
changed = changed[1:]
mem := startMem[top.ID]
for i, p := range top.Preds {
pb := p.b
if endMem[pb.ID] != nil {
continue
}
if mem.Op == OpPhi && mem.Block == top {
endMem[pb.ID] = mem.Args[i]
} else {
endMem[pb.ID] = mem
}
if startMem[pb.ID] == nil {
startMem[pb.ID] = endMem[pb.ID]
changed = append(changed, pb)
}
}
}
}

22
test/tighten.go Normal file
View File

@ -0,0 +1,22 @@
// errorcheck -0 -d=ssa/tighten/debug=1
//go:build arm64
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
var (
e any
ts uint16
)
func moveValuesWithMemoryArg(len int) {
for n := 0; n < len; n++ {
// Load of e.data is lowed as a MOVDload op, which has a memory
// argument. It's moved near where it's used.
_ = e != ts // ERROR "MOVDload is moved$" "MOVDaddr is moved$"
}
}