From ba6bd967d2445c4322ef6e37b3144d630109cdfc Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Tue, 28 Jul 2020 10:29:06 +0800 Subject: [PATCH] cmd/compile/internal/ssa: strengthen phiopt pass The current phiopt pass just transforms the following code x := false if b { x = true} into x = b But we find code in runtime.atoi like this: neg := false if s[0] == '-' { neg = true s = s[1:] } The current phiopt pass does not covert it into code like: neg := s[0] == '-' if neg { s = s[1:] } Therefore, this patch strengthens the phiopt pass so that the boolean Phi value "neg" can be replaced with a copy of control value "s[0] == '-'", thereby using "cmp+cset" instead of a branch. But in some cases even replacing the boolean Phis cannot eliminate this branch. In the following case, this patch replaces "d" with a copy of "a<0", but the regalloc pass will insert the "Load {c}" value into an empty block to split the live ranges, which causes the branch to not be eliminated. For example: func test(a, b, c int) (bool, int) { d := false if (a<0) { if (b<0) { c = c+1 } d = true } return d, c } The optimized assembly code: MOVD "".a(FP), R0 TBZ $63, R0, 48 MOVD "".c+16(FP), R1 ADD $1, R1, R2 MOVD "".b+8(FP), R3 CMP ZR, R3 CSEL LT, R2, R1, R1 CMP ZR, R0 CSET LT, R0 MOVB R0, "".~r3+24(FP) MOVD R1, "".~r4+32(FP) RET (R30) MOVD "".c+16(FP), R1 JMP 28 The benchmark: name old time/op new time/op delta pkg:cmd/compile/internal/ssa goos:linux goarch:arm64 PhioptPass 117783.250000ns +- 1% 117219.111111ns +- 1% ~ (p=0.074 n=8+9) Statistical data from compilecmp tool: compilecmp local/master -> HEAD local/master (a826f7dc45): debug/dwarf: support DW_FORM_rnglistx aka formRnglistx HEAD (e57e003c10): cmd/compile/internal/ssa: strengthen phiopt pass benchstat -geomean /tmp/2516644532 /tmp/1075915815 completed 50 of 50, estimated time remaining 0s (ETA 7:10PM) name old time/op new time/op delta Template 554ms _ 3% 553ms _ 3% ~ (p=0.986 n=49+48) Unicode 252ms _ 4% 249ms _ 4% -1.33% (p=0.002 n=47+49) GoTypes 3.16s _ 3% 3.18s _ 3% +0.77% (p=0.022 n=44+48) Compiler 257ms _ 4% 258ms _ 4% ~ (p=0.121 n=50+49) SSA 24.2s _ 4% 24.2s _ 5% ~ (p=0.694 n=49+50) Flate 338ms _ 4% 338ms _ 4% ~ (p=0.592 n=43+46) GoParser 506ms _ 3% 507ms _ 3% ~ (p=0.942 n=49+50) Reflect 1.37s _ 4% 1.37s _ 5% ~ (p=0.408 n=50+50) Tar 486ms _ 3% 487ms _ 4% ~ (p=0.911 n=47+50) XML 619ms _ 2% 619ms _ 3% ~ (p=0.368 n=46+48) LinkCompiler 1.29s _31% 1.32s _23% ~ (p=0.306 n=49+44) ExternalLinkCompiler 3.39s _10% 3.36s _ 6% ~ (p=0.311 n=48+46) LinkWithoutDebugCompiler 846ms _37% 793ms _24% -6.29% (p=0.040 n=50+49) [Geo mean] 974ms 971ms -0.36% name old user-time/op new user-time/op delta Template 910ms _12% 893ms _13% ~ (p=0.098 n=49+49) Unicode 495ms _28% 492ms _18% ~ (p=0.562 n=50+46) GoTypes 4.42s _15% 4.39s _13% ~ (p=0.684 n=49+50) Compiler 419ms _22% 422ms _16% ~ (p=0.579 n=48+50) SSA 36.5s _ 7% 36.6s _ 8% ~ (p=0.465 n=50+47) Flate 521ms _21% 523ms _16% ~ (p=0.889 n=50+47) GoParser 810ms _12% 792ms _15% ~ (p=0.149 n=50+50) Reflect 1.98s _13% 2.02s _13% ~ (p=0.144 n=47+50) Tar 826ms _15% 806ms _19% ~ (p=0.115 n=49+49) XML 988ms _14% 1003ms _14% ~ (p=0.179 n=50+50) LinkCompiler 1.79s _ 8% 1.84s _11% +2.81% (p=0.001 n=49+49) ExternalLinkCompiler 3.69s _ 4% 3.71s _ 3% ~ (p=0.261 n=50+50) LinkWithoutDebugCompiler 838ms _10% 827ms _11% ~ (p=0.323 n=50+48) [Geo mean] 1.44s 1.44s -0.05% name old alloc/op new alloc/op delta Template 39.0MB _ 1% 39.0MB _ 1% ~ (p=0.445 n=50+49) Unicode 28.5MB _ 0% 28.5MB _ 0% ~ (p=0.460 n=50+50) GoTypes 169MB _ 1% 169MB _ 1% ~ (p=0.092 n=48+50) Compiler 23.4MB _ 1% 23.4MB _ 1% -0.19% (p=0.032 n=50+49) SSA 1.54GB _ 0% 1.55GB _ 1% +0.14% (p=0.001 n=50+50) Flate 23.8MB _ 1% 23.8MB _ 2% ~ (p=0.702 n=49+49) GoParser 35.4MB _ 1% 35.4MB _ 1% ~ (p=0.786 n=50+50) Reflect 85.3MB _ 1% 85.3MB _ 1% ~ (p=0.298 n=50+50) Tar 34.6MB _ 2% 34.6MB _ 2% ~ (p=0.683 n=50+50) XML 44.5MB _ 3% 44.0MB _ 2% -1.05% (p=0.000 n=50+46) LinkCompiler 136MB _ 0% 136MB _ 0% +0.01% (p=0.005 n=50+50) ExternalLinkCompiler 128MB _ 0% 128MB _ 0% ~ (p=0.179 n=50+50) LinkWithoutDebugCompiler 84.3MB _ 0% 84.3MB _ 0% +0.01% (p=0.006 n=50+50) [Geo mean] 70.7MB 70.6MB -0.07% name old allocs/op new allocs/op delta Template 410k _ 0% 410k _ 0% ~ (p=0.606 n=48+49) Unicode 310k _ 0% 310k _ 0% ~ (p=0.674 n=50+50) GoTypes 1.81M _ 0% 1.81M _ 0% ~ (p=0.674 n=50+50) Compiler 202k _ 0% 202k _ 0% +0.02% (p=0.046 n=50+50) SSA 16.3M _ 0% 16.3M _ 0% +0.10% (p=0.000 n=50+50) Flate 244k _ 0% 244k _ 0% ~ (p=0.834 n=49+50) GoParser 380k _ 0% 380k _ 0% ~ (p=0.410 n=50+50) Reflect 1.08M _ 0% 1.08M _ 0% ~ (p=0.782 n=48+50) Tar 368k _ 0% 368k _ 0% ~ (p=0.585 n=50+49) XML 453k _ 0% 453k _ 0% -0.01% (p=0.025 n=49+49) LinkCompiler 713k _ 0% 713k _ 0% +0.01% (p=0.044 n=50+50) ExternalLinkCompiler 794k _ 0% 794k _ 0% +0.01% (p=0.000 n=50+49) LinkWithoutDebugCompiler 251k _ 0% 251k _ 0% ~ (p=0.092 n=47+50) [Geo mean] 615k 615k +0.01% name old maxRSS/op new maxRSS/op delta Template 37.0M _ 4% 37.2M _ 3% ~ (p=0.062 n=48+48) Unicode 36.9M _ 5% 37.3M _ 4% +1.10% (p=0.021 n=50+47) GoTypes 94.3M _ 3% 94.9M _ 4% +0.69% (p=0.022 n=45+46) Compiler 33.4M _ 3% 33.4M _ 5% ~ (p=0.964 n=49+50) SSA 741M _ 3% 738M _ 3% ~ (p=0.164 n=50+50) Flate 28.5M _ 6% 28.8M _ 4% +1.07% (p=0.009 n=50+49) GoParser 35.0M _ 3% 35.3M _ 4% +0.83% (p=0.010 n=50+48) Reflect 57.2M _ 6% 57.1M _ 4% ~ (p=0.815 n=50+49) Tar 34.9M _ 3% 35.0M _ 3% ~ (p=0.134 n=49+48) XML 39.5M _ 5% 40.0M _ 3% +1.35% (p=0.001 n=50+48) LinkCompiler 220M _ 2% 220M _ 2% ~ (p=0.547 n=49+48) ExternalLinkCompiler 235M _ 2% 236M _ 2% ~ (p=0.538 n=47+44) LinkWithoutDebugCompiler 179M _ 1% 179M _ 1% ~ (p=0.775 n=50+50) [Geo mean] 74.9M 75.2M +0.43% name old text-bytes new text-bytes delta HelloSize 784kB _ 0% 784kB _ 0% +0.01% (p=0.000 n=50+50) name old data-bytes new data-bytes delta HelloSize 13.1kB _ 0% 13.1kB _ 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 206kB _ 0% 206kB _ 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.28MB _ 0% 1.28MB _ 0% +0.00% (p=0.000 n=50+50) file before after _ % addr2line 4006300 4004484 -1816 -0.045% api 5029956 5029324 -632 -0.013% asm 4936311 4939423 +3112 +0.063% buildid 2595059 2595291 +232 +0.009% cgo 4401029 4397333 -3696 -0.084% compile 22246677 22246863 +186 +0.001% cover 4443825 4443065 -760 -0.017% dist 3366078 3365838 -240 -0.007% doc 3776391 3776615 +224 +0.006% fix 3218800 3218648 -152 -0.005% link 6365321 6365345 +24 +0.000% nm 3923625 3923857 +232 +0.006% objdump 4295569 4295041 -528 -0.012% pack 2390745 2389217 -1528 -0.064% pprof 12870094 12866942 -3152 -0.024% test2json 2587265 2587073 -192 -0.007% trace 9612629 9613981 +1352 +0.014% vet 6791008 6792072 +1064 +0.016% total 106856682 106850412 -6270 -0.006% Update #37608 Change-Id: Ic6206b22fd1faf570be9fd3c2511aa6c4ce38cdb Reviewed-on: https://go-review.googlesource.com/c/go/+/252937 Trust: fannie zhang Run-TryBot: fannie zhang TryBot-Result: Go Bot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/bench_test.go | 32 +++++ src/cmd/compile/internal/ssa/phiopt.go | 149 ++++++++++++++++++++- src/cmd/compile/internal/ssa/sparsetree.go | 6 + test/phiopt.go | 26 +++- 4 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 src/cmd/compile/internal/ssa/bench_test.go diff --git a/src/cmd/compile/internal/ssa/bench_test.go b/src/cmd/compile/internal/ssa/bench_test.go new file mode 100644 index 0000000000..0971667507 --- /dev/null +++ b/src/cmd/compile/internal/ssa/bench_test.go @@ -0,0 +1,32 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package ssa + +import ( + "math/rand" + "testing" +) + +var d int + +//go:noinline +func fn(a, b int) bool { + c := false + if a > 0 { + if b < 0 { + d = d + 1 + } + c = true + } + return c +} + +func BenchmarkPhioptPass(b *testing.B) { + for i := 0; i < b.N; i++ { + a := rand.Perm(i/10 + 10) + for i := 1; i < len(a)/2; i++ { + fn(a[i]-a[i-1], a[i+len(a)/2-2]-a[i+len(a)/2-1]) + } + } +} diff --git a/src/cmd/compile/internal/ssa/phiopt.go b/src/cmd/compile/internal/ssa/phiopt.go index db7b02275c..ee583d0225 100644 --- a/src/cmd/compile/internal/ssa/phiopt.go +++ b/src/cmd/compile/internal/ssa/phiopt.go @@ -46,7 +46,6 @@ func phiopt(f *Func) { continue } // b0 is the if block giving the boolean value. - // reverse is the predecessor from which the truth value comes. var reverse int if b0.Succs[0].b == pb0 && b0.Succs[1].b == pb1 { @@ -120,6 +119,141 @@ func phiopt(f *Func) { } } } + // strengthen phi optimization. + // Main use case is to transform: + // x := false + // if c { + // x = true + // ... + // } + // into + // x := c + // if x { ... } + // + // For example, in SSA code a case appears as + // b0 + // If c -> b, sb0 + // sb0 + // If d -> sd0, sd1 + // sd1 + // ... + // sd0 + // Plain -> b + // b + // x = (OpPhi (ConstBool [true]) (ConstBool [false])) + // + // In this case we can also replace x with a copy of c. + // + // The optimization idea: + // 1. block b has a phi value x, x = OpPhi (ConstBool [true]) (ConstBool [false]), + // and len(b.Preds) is equal to 2. + // 2. find the common dominator(b0) of the predecessors(pb0, pb1) of block b, and the + // dominator(b0) is a If block. + // Special case: one of the predecessors(pb0 or pb1) is the dominator(b0). + // 3. the successors(sb0, sb1) of the dominator need to dominate the predecessors(pb0, pb1) + // of block b respectively. + // 4. replace this boolean Phi based on dominator block. + // + // b0(pb0) b0(pb1) b0 + // | \ / | / \ + // | sb1 sb0 | sb0 sb1 + // | ... ... | ... ... + // | pb1 pb0 | pb0 pb1 + // | / \ | \ / + // b b b + // + var lca *lcaRange + for _, b := range f.Blocks { + if len(b.Preds) != 2 || len(b.Values) == 0 { + // TODO: handle more than 2 predecessors, e.g. a || b || c. + continue + } + + for _, v := range b.Values { + // find a phi value v = OpPhi (ConstBool [true]) (ConstBool [false]). + // TODO: v = OpPhi (ConstBool [true]) (Arg {value}) + if v.Op != OpPhi { + continue + } + if v.Args[0].Op != OpConstBool || v.Args[1].Op != OpConstBool { + continue + } + if v.Args[0].AuxInt == v.Args[1].AuxInt { + continue + } + + pb0 := b.Preds[0].b + pb1 := b.Preds[1].b + if pb0.Kind == BlockIf && pb0 == sdom.Parent(b) { + // special case: pb0 is the dominator block b0. + // b0(pb0) + // | \ + // | sb1 + // | ... + // | pb1 + // | / + // b + // if another successor sb1 of b0(pb0) dominates pb1, do replace. + ei := b.Preds[0].i + sb1 := pb0.Succs[1-ei].b + if sdom.IsAncestorEq(sb1, pb1) { + convertPhi(pb0, v, ei) + break + } + } else if pb1.Kind == BlockIf && pb1 == sdom.Parent(b) { + // special case: pb1 is the dominator block b0. + // b0(pb1) + // / | + // sb0 | + // ... | + // pb0 | + // \ | + // b + // if another successor sb0 of b0(pb0) dominates pb0, do replace. + ei := b.Preds[1].i + sb0 := pb1.Succs[1-ei].b + if sdom.IsAncestorEq(sb0, pb0) { + convertPhi(pb1, v, ei-1) + break + } + } else { + // b0 + // / \ + // sb0 sb1 + // ... ... + // pb0 pb1 + // \ / + // b + // + // Build data structure for fast least-common-ancestor queries. + if lca == nil { + lca = makeLCArange(f) + } + b0 := lca.find(pb0, pb1) + if b0.Kind != BlockIf { + break + } + sb0 := b0.Succs[0].b + sb1 := b0.Succs[1].b + var reverse int + if sdom.IsAncestorEq(sb0, pb0) && sdom.IsAncestorEq(sb1, pb1) { + reverse = 0 + } else if sdom.IsAncestorEq(sb1, pb0) && sdom.IsAncestorEq(sb0, pb1) { + reverse = 1 + } else { + break + } + if len(sb0.Preds) != 1 || len(sb1.Preds) != 1 { + // we can not replace phi value x in the following case. + // if gp == nil || sp < lo { x = true} + // if a || b { x = true } + // so the if statement can only have one condition. + break + } + convertPhi(b0, v, reverse) + } + } + } } func phioptint(v *Value, b0 *Block, reverse int) { @@ -174,3 +308,16 @@ func phioptint(v *Value, b0 *Block, reverse int) { f.Warnl(v.Block.Pos, "converted OpPhi bool -> int%d", v.Type.Size()*8) } } + +// b is the If block giving the boolean value. +// v is the phi value v = (OpPhi (ConstBool [true]) (ConstBool [false])). +// reverse is the predecessor from which the truth value comes. +func convertPhi(b *Block, v *Value, reverse int) { + f := b.Func + ops := [2]Op{OpNot, OpCopy} + v.reset(ops[v.Args[reverse].AuxInt]) + v.AddArg(b.Controls[0]) + if f.pass.debug > 0 { + f.Warnl(b.Pos, "converted OpPhi to %v", v.Op) + } +} diff --git a/src/cmd/compile/internal/ssa/sparsetree.go b/src/cmd/compile/internal/ssa/sparsetree.go index 1be20b2cda..be914c8644 100644 --- a/src/cmd/compile/internal/ssa/sparsetree.go +++ b/src/cmd/compile/internal/ssa/sparsetree.go @@ -178,6 +178,12 @@ func (t SparseTree) Child(x *Block) *Block { return t[x.ID].child } +// Parent returns the parent of x in the dominator tree, or +// nil if x is the function's entry. +func (t SparseTree) Parent(x *Block) *Block { + return t[x.ID].parent +} + // isAncestorEq reports whether x is an ancestor of or equal to y. func (t SparseTree) IsAncestorEq(x, y *Block) bool { if x == y { diff --git a/test/phiopt.go b/test/phiopt.go index 98a7b75d10..e04373eb72 100644 --- a/test/phiopt.go +++ b/test/phiopt.go @@ -1,4 +1,4 @@ -// +build amd64 s390x +// +build amd64 s390x arm64 // errorcheck -0 -d=ssa/phiopt/debug=3 // Copyright 2016 The Go Authors. All rights reserved. @@ -104,5 +104,29 @@ func f7and(a bool, b bool) bool { return a && b // ERROR "converted OpPhi to AndB$" } +//go:noinline +func f8(s string) (string, bool) { + neg := false + if s[0] == '-' { // ERROR "converted OpPhi to Copy$" + neg = true + s = s[1:] + } + return s, neg +} + +var d int + +//go:noinline +func f9(a, b int) bool { + c := false + if a < 0 { // ERROR "converted OpPhi to Copy$" + if b < 0 { + d = d + 1 + } + c = true + } + return c +} + func main() { }