1
0
mirror of https://github.com/golang/go synced 2024-11-26 08:17:59 -07:00

cmd/compile/ssa: optimize the derivable known branch of If block

When the control value of a If block is known for a particular inbound edge
because its value can be inferred from the control value of its predecessor,
then this inbound edge can be redirected to the known successor directly,
This CL optimizes this kind of cases to eliminate unnecessary comparision.

For example, the following piece of code comes from runtime.atoi,
if !neg && un > uint(maxInt) {
	return 0, false
}
if neg && un > uint(maxInt)+1 {
	return 0, false
}

Before this optimization, if the first "if" statement does not return, both
conditions of the second "if" statement will be checked. But obviously the
value of neg is known through the first "if" statement, and there is no need
to check neg repeatedly.

After this optimization, this redundancy check is eliminated, and the execution
logic becomes as follows.
if !neg {
	if un > uint(maxInt) {
		return 0, false
	}
} else {
	if un > uint(maxInt)+1 {
		return 0, false
	}
}

This CL does not bring significant performance changes, but it makes the code
structure look more reasonable.

Statistical data from tool compilecmp on Linux/amd64:
name                      old time/op       new time/op       delta
Template                        380ms ± 4%        385ms ± 3%  +1.16%  (p=0.000 n=50+49)
Unicode                         168ms ± 9%        169ms ± 9%    ~     (p=0.421 n=49+46)
GoTypes                         1.99s ± 4%        2.02s ± 4%  +1.48%  (p=0.000 n=49+49)
Compiler                        188ms ± 8%        188ms ± 9%    ~     (p=0.997 n=49+50)
SSA                             11.8s ± 2%        12.0s ± 2%  +1.24%  (p=0.000 n=48+50)
Flate                           242ms ± 6%        244ms ± 9%    ~     (p=0.307 n=46+49)
GoParser                        361ms ± 3%        366ms ± 4%  +1.23%  (p=0.000 n=48+49)
Reflect                         836ms ± 3%        842ms ± 3%  +0.70%  (p=0.004 n=48+48)
Tar                             335ms ± 3%        340ms ± 4%  +1.47%  (p=0.000 n=49+46)
XML                             432ms ± 4%        437ms ± 4%  +1.11%  (p=0.002 n=49+49)
LinkCompiler                    701ms ± 4%        704ms ± 5%    ~     (p=0.278 n=49+50)
ExternalLinkCompiler            1.83s ± 3%        1.84s ± 3%  +0.51%  (p=0.034 n=48+49)
LinkWithoutDebugCompiler        436ms ± 6%        438ms ± 6%    ~     (p=0.419 n=48+49)
[Geo mean]                      612ms             617ms       +0.84%

name                      old alloc/op      new alloc/op      delta
Template                       38.7MB ± 1%       39.1MB ± 1%  +1.19%  (p=0.000 n=50+50)
Unicode                        28.1MB ± 0%       28.1MB ± 0%  +0.20%  (p=0.000 n=49+45)
GoTypes                         168MB ± 1%        170MB ± 1%  +1.05%  (p=0.000 n=48+49)
Compiler                       23.0MB ± 1%       23.1MB ± 1%  +0.63%  (p=0.000 n=50+50)
SSA                            1.54GB ± 1%       1.55GB ± 1%  +0.85%  (p=0.000 n=50+50)
Flate                          23.6MB ± 1%       23.9MB ± 1%  +1.36%  (p=0.000 n=43+46)
GoParser                       35.0MB ± 1%       35.3MB ± 1%  +0.94%  (p=0.000 n=50+50)
Reflect                        84.7MB ± 1%       86.1MB ± 1%  +1.72%  (p=0.000 n=49+49)
Tar                            34.5MB ± 1%       34.9MB ± 1%  +1.07%  (p=0.000 n=47+48)
XML                            44.2MB ± 3%       44.6MB ± 3%  +0.70%  (p=0.003 n=50+49)
LinkCompiler                    128MB ± 0%        128MB ± 0%  +0.01%  (p=0.004 n=49+50)
ExternalLinkCompiler            120MB ± 0%        120MB ± 0%  +0.01%  (p=0.000 n=49+50)
LinkWithoutDebugCompiler       77.3MB ± 0%       77.3MB ± 0%  +0.02%  (p=0.000 n=50+50)
[Geo mean]                     69.1MB            69.6MB       +0.75%

file      before    after     Δ       %
addr2line 4049276   4051308   +2032   +0.050%
api       5248940   5248996   +56     +0.001%
asm       4868093   4868037   -56     -0.001%
buildid   2627666   2626026   -1640   -0.062%
cgo       4614432   4615040   +608    +0.013%
compile   23298888  23301267  +2379   +0.010%
cover     4591609   4591161   -448    -0.010%
dist      3449638   3450254   +616    +0.018%
doc       3925667   3926363   +696    +0.018%
fix       3322936   3323464   +528    +0.016%
link      6628632   6629560   +928    +0.014%
nm        3991753   3996497   +4744   +0.119%
objdump   4396119   4395615   -504    -0.011%
pack      2399719   2399535   -184    -0.008%
pprof     13616418  13622866  +6448   +0.047%
test2json 2646121   2646081   -40     -0.002%
trace     10233087  10226359  -6728   -0.066%
vet       7117994   7121066   +3072   +0.043%
total     111026988 111039495 +12507  +0.011%

On linux arm64:
name                      old time/op       new time/op       delta
Template                        284ms ± 1%        286ms ± 1%  +0.70%  (p=0.000 n=49+50)
Unicode                         125ms ± 3%        125ms ± 2%    ~     (p=0.548 n=50+50)
GoTypes                         1.69s ± 1%        1.71s ± 1%  +1.02%  (p=0.000 n=49+49)
Compiler                        125ms ± 1%        124ms ± 2%  -0.35%  (p=0.020 n=50+50)
SSA                             12.7s ± 1%        12.8s ± 1%  +1.21%  (p=0.000 n=49+49)
Flate                           172ms ± 1%        173ms ± 1%  +0.20%  (p=0.047 n=50+50)
GoParser                        265ms ± 1%        266ms ± 1%  +0.64%  (p=0.000 n=50+50)
Reflect                         651ms ± 1%        650ms ± 1%    ~     (p=0.079 n=48+48)
Tar                             246ms ± 1%        246ms ± 1%    ~     (p=0.202 n=50+46)
XML                             328ms ± 1%        332ms ± 1%  +1.28%  (p=0.000 n=50+49)
LinkCompiler                    600ms ± 1%        599ms ± 1%    ~     (p=0.264 n=50+50)
ExternalLinkCompiler            1.88s ± 1%        1.90s ± 0%  +1.36%  (p=0.000 n=50+50)
LinkWithoutDebugCompiler        365ms ± 1%        365ms ± 1%    ~     (p=0.602 n=50+46)
[Geo mean]                      490ms             492ms       +0.47%

name                      old alloc/op      new alloc/op      delta
Template                       38.8MB ± 1%       39.1MB ± 1%  +0.92%  (p=0.000 n=44+42)
Unicode                        28.4MB ± 0%       28.4MB ± 0%  +0.22%  (p=0.000 n=44+45)
GoTypes                         169MB ± 1%        171MB ± 1%  +1.12%  (p=0.000 n=50+50)
Compiler                       23.2MB ± 1%       23.3MB ± 1%  +0.56%  (p=0.000 n=42+43)
SSA                            1.55GB ± 0%       1.56GB ± 0%  +0.91%  (p=0.000 n=48+49)
Flate                          23.7MB ± 2%       24.0MB ± 1%  +1.20%  (p=0.000 n=50+50)
GoParser                       35.3MB ± 1%       35.6MB ± 1%  +0.88%  (p=0.000 n=50+50)
Reflect                        85.0MB ± 0%       86.5MB ± 0%  +1.70%  (p=0.000 n=49+48)
Tar                            34.5MB ± 1%       34.9MB ± 1%  +1.03%  (p=0.000 n=47+50)
XML                            43.8MB ± 2%       44.0MB ± 0%  +0.41%  (p=0.002 n=49+38)
LinkCompiler                    136MB ± 0%        136MB ± 0%  +0.01%  (p=0.006 n=50+49)
ExternalLinkCompiler            127MB ± 0%        127MB ± 0%  +0.02%  (p=0.000 n=49+50)
LinkWithoutDebugCompiler       84.1MB ± 0%       84.1MB ± 0%    ~     (p=0.534 n=50+50)
[Geo mean]                     70.4MB            70.9MB       +0.69%

file      before    after     Δ       %
addr2line 4006004   4004556   -1448   -0.036%
api       5029716   5028828   -888    -0.018%
asm       4936863   4934943   -1920   -0.039%
buildid   2594947   2594099   -848    -0.033%
cgo       4399702   4399502   -200    -0.005%
compile   22233139  22230486  -2653   -0.012%
cover     4443681   4443881   +200    +0.005%
dist      3365902   3365486   -416    -0.012%
doc       3776175   3776151   -24     -0.001%
fix       3218624   3218600   -24     -0.001%
link      6365001   6361409   -3592   -0.056%
nm        3923345   3923065   -280    -0.007%
objdump   4295473   4296673   +1200   +0.028%
pack      2390561   2389393   -1168   -0.049%
pprof     12866419  12865115  -1304   -0.010%
test2json 2587113   2585561   -1552   -0.060%
trace     9609814   9610846   +1032   +0.011%
vet       6790272   6789760   -512    -0.008%
total     106832751 106818354 -14397  -0.013%

Update: #37608

Change-Id: I2831238b12e3af5aef2261f64f804bf0a8b43f86
Reviewed-on: https://go-review.googlesource.com/c/go/+/244737
Reviewed-by: eric fang <eric.fang@arm.com>
Reviewed-by: Keith Randall <khr@golang.org>
Trust: eric fang <eric.fang@arm.com>
Run-TryBot: eric fang <eric.fang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
eric fang 2020-07-09 11:27:03 +00:00
parent 9726c78539
commit 92d1afe989
4 changed files with 321 additions and 13 deletions

View File

@ -11,8 +11,8 @@ import (
// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange). // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange).
func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) } func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) }
// fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf). // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf) } func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
type fuseType uint8 type fuseType uint8
@ -20,6 +20,7 @@ const (
fuseTypePlain fuseType = 1 << iota fuseTypePlain fuseType = 1 << iota
fuseTypeIf fuseTypeIf
fuseTypeIntInRange fuseTypeIntInRange
fuseTypeBranchRedirect
fuseTypeShortCircuit fuseTypeShortCircuit
) )
@ -43,6 +44,9 @@ func fuse(f *Func, typ fuseType) {
changed = shortcircuitBlock(b) || changed changed = shortcircuitBlock(b) || changed
} }
} }
if typ&fuseTypeBranchRedirect != 0 {
changed = fuseBranchRedirect(f) || changed
}
if changed { if changed {
f.invalidateCFG() f.invalidateCFG()
} }

View File

@ -0,0 +1,110 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
// fuseBranchRedirect checks for a CFG in which the outbound branch
// of an If block can be derived from its predecessor If block, in
// some such cases, we can redirect the predecessor If block to the
// corresponding successor block directly. For example:
// p:
// v11 = Less64 <bool> v10 v8
// If v11 goto b else u
// b: <- p ...
// v17 = Leq64 <bool> v10 v8
// If v17 goto s else o
// We can redirect p to s directly.
//
// The implementation here borrows the framework of the prove pass.
// 1, Traverse all blocks of function f to find If blocks.
// 2, For any If block b, traverse all its predecessors to find If blocks.
// 3, For any If block predecessor p, update relationship p->b.
// 4, Traverse all successors of b.
// 5, For any successor s of b, try to update relationship b->s, if a
// contradiction is found then redirect p to another successor of b.
func fuseBranchRedirect(f *Func) bool {
ft := newFactsTable(f)
ft.checkpoint()
changed := false
for i := len(f.Blocks) - 1; i >= 0; i-- {
b := f.Blocks[i]
if b.Kind != BlockIf {
continue
}
// b is either empty or only contains the control value.
// TODO: if b contains only OpCopy or OpNot related to b.Controls,
// such as Copy(Not(Copy(Less64(v1, v2)))), perhaps it can be optimized.
bCtl := b.Controls[0]
if bCtl.Block != b && len(b.Values) != 0 || (len(b.Values) != 1 || bCtl.Uses != 1) && bCtl.Block == b {
continue
}
for k := 0; k < len(b.Preds); k++ {
pk := b.Preds[k]
p := pk.b
if p.Kind != BlockIf || p == b {
continue
}
pbranch := positive
if pk.i == 1 {
pbranch = negative
}
ft.checkpoint()
// Assume branch p->b is taken.
addBranchRestrictions(ft, p, pbranch)
// Check if any outgoing branch is unreachable based on the above condition.
parent := b
for j, bbranch := range [...]branch{positive, negative} {
ft.checkpoint()
// Try to update relationship b->child, and check if the contradiction occurs.
addBranchRestrictions(ft, parent, bbranch)
unsat := ft.unsat
ft.restore()
if !unsat {
continue
}
// This branch is impossible,so redirect p directly to another branch.
out := 1 ^ j
child := parent.Succs[out].b
if child == b {
continue
}
b.removePred(k)
p.Succs[pk.i] = Edge{child, len(child.Preds)}
// Fix up Phi value in b to have one less argument.
for _, v := range b.Values {
if v.Op != OpPhi {
continue
}
v.RemoveArg(k)
phielimValue(v)
}
// Fix up child to have one more predecessor.
child.Preds = append(child.Preds, Edge{p, pk.i})
ai := b.Succs[out].i
for _, v := range child.Values {
if v.Op != OpPhi {
continue
}
v.AddArg(v.Args[ai])
}
if b.Func.pass.debug > 0 {
b.Func.Warnl(b.Controls[0].Pos, "Redirect %s based on %s", b.Controls[0].Op, p.Controls[0].Op)
}
changed = true
k--
break
}
ft.restore()
}
if len(b.Preds) == 0 && b != f.Entry {
// Block is now dead.
b.Kind = BlockInvalid
}
}
ft.restore()
ft.cleanup(f)
return changed
}

View File

@ -726,6 +726,20 @@ var (
} }
) )
// cleanup returns the posets to the free list
func (ft *factsTable) cleanup(f *Func) {
for _, po := range []*poset{ft.orderS, ft.orderU} {
// Make sure it's empty as it should be. A non-empty poset
// might cause errors and miscompilations if reused.
if checkEnabled {
if err := po.CheckEmpty(); err != nil {
f.Fatalf("poset not empty after function %s: %v", f.Name, err)
}
}
f.retPoset(po)
}
}
// prove removes redundant BlockIf branches that can be inferred // prove removes redundant BlockIf branches that can be inferred
// from previous dominating comparisons. // from previous dominating comparisons.
// //
@ -917,17 +931,7 @@ func prove(f *Func) {
ft.restore() ft.restore()
// Return the posets to the free list ft.cleanup(f)
for _, po := range []*poset{ft.orderS, ft.orderU} {
// Make sure it's empty as it should be. A non-empty poset
// might cause errors and miscompilations if reused.
if checkEnabled {
if err := po.CheckEmpty(); err != nil {
f.Fatalf("prove poset not empty after function %s: %v", f.Name, err)
}
}
f.retPoset(po)
}
} }
// getBranch returns the range restrictions added by p // getBranch returns the range restrictions added by p

190
test/fuse.go Normal file
View File

@ -0,0 +1,190 @@
// +build amd64 arm64
// errorcheck -0 -d=ssa/late_fuse/debug=1
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "strings"
const Cf2 = 2.0
func fEqEq(a int, f float64) bool {
return a == 0 && f > Cf2 || a == 0 && f < -Cf2 // ERROR "Redirect Eq64 based on Eq64$"
}
func fEqNeq(a int32, f float64) bool {
return a == 0 && f > Cf2 || a != 0 && f < -Cf2 // ERROR "Redirect Neq32 based on Eq32$"
}
func fEqLess(a int8, f float64) bool {
return a == 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fEqLeq(a float64, f float64) bool {
return a == 0 && f > Cf2 || a <= 0 && f < -Cf2
}
func fEqLessU(a uint, f float64) bool {
return a == 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fEqLeqU(a uint64, f float64) bool {
return a == 0 && f > Cf2 || a <= 0 && f < -Cf2 // ERROR "Redirect Leq64U based on Eq64$"
}
func fNeqEq(a int, f float64) bool {
return a != 0 && f > Cf2 || a == 0 && f < -Cf2 // ERROR "Redirect Eq64 based on Neq64$"
}
func fNeqNeq(a int32, f float64) bool {
return a != 0 && f > Cf2 || a != 0 && f < -Cf2 // ERROR "Redirect Neq32 based on Neq32$"
}
func fNeqLess(a float32, f float64) bool {
// TODO: Add support for floating point numbers in prove
return a != 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fNeqLeq(a int16, f float64) bool {
return a != 0 && f > Cf2 || a <= 0 && f < -Cf2 // ERROR "Redirect Leq16 based on Neq16$"
}
func fNeqLessU(a uint, f float64) bool {
return a != 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fNeqLeqU(a uint32, f float64) bool {
return a != 0 && f > Cf2 || a <= 0 && f < -Cf2 // ERROR "Redirect Leq32U based on Neq32$"
}
func fLessEq(a int, f float64) bool {
return a < 0 && f > Cf2 || a == 0 && f < -Cf2
}
func fLessNeq(a int32, f float64) bool {
return a < 0 && f > Cf2 || a != 0 && f < -Cf2
}
func fLessLess(a float32, f float64) bool {
return a < 0 && f > Cf2 || a < 0 && f < -Cf2 // ERROR "Redirect Less32F based on Less32F$"
}
func fLessLeq(a float64, f float64) bool {
return a < 0 && f > Cf2 || a <= 0 && f < -Cf2
}
func fLeqEq(a float64, f float64) bool {
return a <= 0 && f > Cf2 || a == 0 && f < -Cf2
}
func fLeqNeq(a int16, f float64) bool {
return a <= 0 && f > Cf2 || a != 0 && f < -Cf2 // ERROR "Redirect Neq16 based on Leq16$"
}
func fLeqLess(a float32, f float64) bool {
return a <= 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fLeqLeq(a int8, f float64) bool {
return a <= 0 && f > Cf2 || a <= 0 && f < -Cf2 // ERROR "Redirect Leq8 based on Leq8$"
}
func fLessUEq(a uint8, f float64) bool {
return a < 0 && f > Cf2 || a == 0 && f < -Cf2
}
func fLessUNeq(a uint16, f float64) bool {
return a < 0 && f > Cf2 || a != 0 && f < -Cf2
}
func fLessULessU(a uint32, f float64) bool {
return a < 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fLessULeqU(a uint64, f float64) bool {
return a < 0 && f > Cf2 || a <= 0 && f < -Cf2
}
func fLeqUEq(a uint8, f float64) bool {
return a <= 0 && f > Cf2 || a == 0 && f < -Cf2 // ERROR "Redirect Eq8 based on Leq8U$"
}
func fLeqUNeq(a uint16, f float64) bool {
return a <= 0 && f > Cf2 || a != 0 && f < -Cf2 // ERROR "Redirect Neq16 based on Leq16U$"
}
func fLeqLessU(a uint32, f float64) bool {
return a <= 0 && f > Cf2 || a < 0 && f < -Cf2
}
func fLeqLeqU(a uint64, f float64) bool {
return a <= 0 && f > Cf2 || a <= 0 && f < -Cf2 // ERROR "Redirect Leq64U based on Leq64U$"
}
// Arg tests are disabled because the op name is different on amd64 and arm64.
func fEqPtrEqPtr(a, b *int, f float64) bool {
return a == b && f > Cf2 || a == b && f < -Cf2 // ERROR "Redirect EqPtr based on EqPtr$"
}
func fEqPtrNeqPtr(a, b *int, f float64) bool {
return a == b && f > Cf2 || a != b && f < -Cf2 // ERROR "Redirect NeqPtr based on EqPtr$"
}
func fNeqPtrEqPtr(a, b *int, f float64) bool {
return a != b && f > Cf2 || a == b && f < -Cf2 // ERROR "Redirect EqPtr based on NeqPtr$"
}
func fNeqPtrNeqPtr(a, b *int, f float64) bool {
return a != b && f > Cf2 || a != b && f < -Cf2 // ERROR "Redirect NeqPtr based on NeqPtr$"
}
func fEqInterEqInter(a interface{}, f float64) bool {
return a == nil && f > Cf2 || a == nil && f < -Cf2 // ERROR "Redirect IsNonNil based on IsNonNil$"
}
func fEqInterNeqInter(a interface{}, f float64) bool {
return a == nil && f > Cf2 || a != nil && f < -Cf2
}
func fNeqInterEqInter(a interface{}, f float64) bool {
return a != nil && f > Cf2 || a == nil && f < -Cf2
}
func fNeqInterNeqInter(a interface{}, f float64) bool {
return a != nil && f > Cf2 || a != nil && f < -Cf2 // ERROR "Redirect IsNonNil based on IsNonNil$"
}
func fEqSliceEqSlice(a []int, f float64) bool {
return a == nil && f > Cf2 || a == nil && f < -Cf2 // ERROR "Redirect IsNonNil based on IsNonNil$"
}
func fEqSliceNeqSlice(a []int, f float64) bool {
return a == nil && f > Cf2 || a != nil && f < -Cf2
}
func fNeqSliceEqSlice(a []int, f float64) bool {
return a != nil && f > Cf2 || a == nil && f < -Cf2
}
func fNeqSliceNeqSlice(a []int, f float64) bool {
return a != nil && f > Cf2 || a != nil && f < -Cf2 // ERROR "Redirect IsNonNil based on IsNonNil$"
}
func fPhi(a, b string) string {
aslash := strings.HasSuffix(a, "/") // ERROR "Redirect Phi based on Phi$"
bslash := strings.HasPrefix(b, "/")
switch {
case aslash && bslash:
return a + b[1:]
case !aslash && !bslash:
return a + "/" + b
}
return a + b
}
func main() {
}