1
0
mirror of https://github.com/golang/go synced 2024-11-23 21:40:05 -07:00

cmd/compile: optimize integer-in-range checks

This CL incorporates code from CL 201206 by Josh Bleecher Snyder
(thanks Josh).

This CL restores the integer-in-range optimizations in the SSA
backend. The fuse pass is enhanced to detect inequalities that
could be merged and fuse their associated blocks while the generic
rules optimize them into a single unsigned comparison.

For example, the inequality `x >= 0 && x < 10` will now be optimized
to `unsigned(x) < 10`.

Overall has a fairly positive impact on binary sizes.

name                      old time/op       new time/op       delta
Template                        192ms ± 1%        192ms ± 1%    ~     (p=0.757 n=17+18)
Unicode                        76.6ms ± 2%       76.5ms ± 2%    ~     (p=0.603 n=19+19)
GoTypes                         694ms ± 1%        693ms ± 1%    ~     (p=0.569 n=19+20)
Compiler                        3.26s ± 0%        3.27s ± 0%  +0.25%  (p=0.000 n=20+20)
SSA                             7.41s ± 0%        7.49s ± 0%  +1.10%  (p=0.000 n=17+19)
Flate                           120ms ± 1%        120ms ± 1%  +0.38%  (p=0.003 n=19+19)
GoParser                        152ms ± 1%        152ms ± 1%    ~     (p=0.061 n=17+19)
Reflect                         422ms ± 1%        425ms ± 2%  +0.76%  (p=0.001 n=18+20)
Tar                             167ms ± 1%        167ms ± 0%    ~     (p=0.730 n=18+19)
XML                             233ms ± 4%        231ms ± 1%    ~     (p=0.752 n=20+17)
LinkCompiler                    927ms ± 8%        928ms ± 8%    ~     (p=0.857 n=19+20)
ExternalLinkCompiler            1.81s ± 2%        1.81s ± 2%    ~     (p=0.513 n=19+20)
LinkWithoutDebugCompiler        556ms ±10%        583ms ±13%  +4.95%  (p=0.007 n=20+20)
[Geo mean]                      478ms             481ms       +0.52%

name                      old user-time/op  new user-time/op  delta
Template                        270ms ± 5%        269ms ± 7%    ~     (p=0.925 n=20+20)
Unicode                         134ms ± 7%        131ms ±14%    ~     (p=0.593 n=18+20)
GoTypes                         981ms ± 3%        987ms ± 2%  +0.63%  (p=0.049 n=19+18)
Compiler                        4.50s ± 2%        4.50s ± 1%    ~     (p=0.588 n=19+20)
SSA                             10.6s ± 2%        10.6s ± 1%    ~     (p=0.141 n=20+19)
Flate                           164ms ± 8%        165ms ±10%    ~     (p=0.738 n=20+20)
GoParser                        202ms ± 5%        203ms ± 6%    ~     (p=0.820 n=20+20)
Reflect                         587ms ± 6%        597ms ± 3%    ~     (p=0.087 n=20+18)
Tar                             230ms ± 6%        228ms ± 8%    ~     (p=0.569 n=19+20)
XML                             311ms ± 6%        314ms ± 5%    ~     (p=0.369 n=20+20)
LinkCompiler                    878ms ± 8%        887ms ± 7%    ~     (p=0.289 n=20+20)
ExternalLinkCompiler            1.60s ± 7%        1.60s ± 7%    ~     (p=0.820 n=20+20)
LinkWithoutDebugCompiler        498ms ±12%        489ms ±11%    ~     (p=0.398 n=20+20)
[Geo mean]                      611ms             611ms       +0.05%

name                      old alloc/op      new alloc/op      delta
Template                       36.1MB ± 0%       36.0MB ± 0%  -0.32%  (p=0.000 n=20+20)
Unicode                        28.3MB ± 0%       28.3MB ± 0%  -0.03%  (p=0.000 n=19+20)
GoTypes                         121MB ± 0%        121MB ± 0%    ~     (p=0.226 n=16+20)
Compiler                        563MB ± 0%        563MB ± 0%    ~     (p=0.166 n=20+19)
SSA                            1.32GB ± 0%       1.33GB ± 0%  +0.88%  (p=0.000 n=20+19)
Flate                          22.7MB ± 0%       22.7MB ± 0%  -0.02%  (p=0.033 n=19+20)
GoParser                       27.9MB ± 0%       27.9MB ± 0%  -0.02%  (p=0.001 n=20+20)
Reflect                        78.3MB ± 0%       78.2MB ± 0%  -0.01%  (p=0.019 n=20+20)
Tar                            34.0MB ± 0%       34.0MB ± 0%  -0.04%  (p=0.000 n=20+20)
XML                            43.9MB ± 0%       43.9MB ± 0%  -0.07%  (p=0.000 n=20+19)
LinkCompiler                    205MB ± 0%        205MB ± 0%  +0.44%  (p=0.000 n=20+18)
ExternalLinkCompiler            223MB ± 0%        223MB ± 0%  +0.03%  (p=0.000 n=20+20)
LinkWithoutDebugCompiler        139MB ± 0%        142MB ± 0%  +1.75%  (p=0.000 n=20+20)
[Geo mean]                     93.7MB            93.9MB       +0.20%

name                      old allocs/op     new allocs/op     delta
Template                         363k ± 0%         361k ± 0%  -0.58%  (p=0.000 n=20+19)
Unicode                          329k ± 0%         329k ± 0%  -0.06%  (p=0.000 n=19+20)
GoTypes                         1.28M ± 0%        1.28M ± 0%  -0.01%  (p=0.000 n=20+20)
Compiler                        5.40M ± 0%        5.40M ± 0%  -0.01%  (p=0.000 n=20+20)
SSA                             12.7M ± 0%        12.8M ± 0%  +0.80%  (p=0.000 n=20+20)
Flate                            228k ± 0%         228k ± 0%    ~     (p=0.194 n=20+20)
GoParser                         295k ± 0%         295k ± 0%  -0.04%  (p=0.000 n=20+20)
Reflect                          949k ± 0%         949k ± 0%  -0.01%  (p=0.000 n=20+20)
Tar                              337k ± 0%         337k ± 0%  -0.06%  (p=0.000 n=20+20)
XML                              418k ± 0%         417k ± 0%  -0.17%  (p=0.000 n=20+20)
LinkCompiler                     553k ± 0%         554k ± 0%  +0.22%  (p=0.000 n=20+19)
ExternalLinkCompiler            1.52M ± 0%        1.52M ± 0%  +0.27%  (p=0.000 n=20+20)
LinkWithoutDebugCompiler         186k ± 0%         186k ± 0%  +0.06%  (p=0.000 n=20+20)
[Geo mean]                       723k              723k       +0.03%

name                      old text-bytes    new text-bytes    delta
HelloSize                       828kB ± 0%        828kB ± 0%  -0.01%  (p=0.000 n=20+20)

name                      old data-bytes    new data-bytes    delta
HelloSize                      13.4kB ± 0%       13.4kB ± 0%    ~     (all equal)

name                      old bss-bytes     new bss-bytes     delta
HelloSize                       180kB ± 0%        180kB ± 0%    ~     (all equal)

name                      old exe-bytes     new exe-bytes     delta
HelloSize                      1.23MB ± 0%       1.23MB ± 0%  -0.33%  (p=0.000 n=20+20)

file      before    after     Δ       %
addr2line 4320075   4311883   -8192   -0.190%
asm       5191932   5187836   -4096   -0.079%
buildid   2835338   2831242   -4096   -0.144%
compile   20531717  20569099  +37382  +0.182%
cover     5322511   5318415   -4096   -0.077%
dist      3723749   3719653   -4096   -0.110%
doc       4743515   4739419   -4096   -0.086%
fix       3413960   3409864   -4096   -0.120%
link      6690119   6686023   -4096   -0.061%
nm        4269616   4265520   -4096   -0.096%
pprof     14942189  14929901  -12288  -0.082%
trace     11807164  11790780  -16384  -0.139%
vet       8384104   8388200   +4096   +0.049%
go        15339076  15334980  -4096   -0.027%
total     132258257 132226007 -32250  -0.024%

Fixes #30645.

Change-Id: If551ac5996097f3685870d083151b5843170aab0
Reviewed-on: https://go-review.googlesource.com/c/go/+/165998
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Munday 2019-05-20 11:55:56 -07:00
parent c9ece81cc8
commit e37cc29863
9 changed files with 3080 additions and 27 deletions

View File

@ -148,7 +148,7 @@ func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
// the number of useless instructions executed.
const maxfuseinsts = 2
if len(simple.Values) > maxfuseinsts || !allTrivial(simple) {
if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
return false
}
@ -305,10 +305,10 @@ func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
return false
}
yes, no := b.Succs[0].Block(), b.Succs[1].Block()
if !isLeafPlain(yes) || len(yes.Values) > 1 || !allTrivial(yes) {
if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
return false
}
if !isLeafPlain(no) || len(no.Values) > 1 || !allTrivial(no) {
if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
return false
}
if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
@ -415,7 +415,15 @@ func shouldElimIfElse(no, yes, post *Block, arch string) bool {
}
}
func allTrivial(b *Block) bool {
// canSpeculativelyExecute reports whether every value in the block can
// be evaluated without causing any observable side effects (memory
// accesses, panics and so on) except for execution time changes. It
// also ensures that the block does not contain any phis which we can't
// speculatively execute.
// Warning: this function cannot currently detect values that represent
// instructions the execution of which need to be guarded with CPU
// hardware feature checks. See issue #34950.
func canSpeculativelyExecute(b *Block) bool {
// don't fuse memory ops, Phi ops, divides (can panic),
// or anything else with side-effects
for _, v := range b.Values {

View File

@ -428,7 +428,7 @@ var passes = [...]pass{
{name: "gcse deadcode", fn: deadcode, required: true}, // clean out after cse and phiopt
{name: "nilcheckelim", fn: nilcheckelim},
{name: "prove", fn: prove},
{name: "fuse plain", fn: fusePlain},
{name: "early fuse", fn: fuseEarly},
{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
{name: "softfloat", fn: softfloat, required: true},
{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
@ -436,7 +436,7 @@ var passes = [...]pass{
{name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
{name: "check bce", fn: checkbce},
{name: "branchelim", fn: branchelim},
{name: "fuse", fn: fuseAll},
{name: "late fuse", fn: fuseLate},
{name: "dse", fn: dse},
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
{name: "insert resched checks", fn: insertLoopReschedChecks,
@ -491,7 +491,7 @@ var passOrder = [...]constraint{
// allow deadcode to clean up after nilcheckelim
{"nilcheckelim", "generic deadcode"},
// nilcheckelim generates sequences of plain basic blocks
{"nilcheckelim", "fuse"},
{"nilcheckelim", "late fuse"},
// nilcheckelim relies on opt to rewrite user nil checks
{"opt", "nilcheckelim"},
// tighten will be most effective when as many values have been removed as possible

View File

@ -8,18 +8,18 @@ import (
"cmd/internal/src"
)
// fusePlain runs fuse(f, fuseTypePlain).
func fusePlain(f *Func) { fuse(f, fuseTypePlain) }
// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange).
func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) }
// fuseAll runs fuse(f, fuseTypeAll).
func fuseAll(f *Func) { fuse(f, fuseTypeAll) }
// fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf).
func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf) }
type fuseType uint8
const (
fuseTypePlain fuseType = 1 << iota
fuseTypeIf
fuseTypeAll = fuseTypePlain | fuseTypeIf
fuseTypeIntInRange
)
// fuse simplifies control flow by joining basic blocks.
@ -32,6 +32,9 @@ func fuse(f *Func, typ fuseType) {
if typ&fuseTypeIf != 0 {
changed = fuseBlockIf(b) || changed
}
if typ&fuseTypeIntInRange != 0 {
changed = fuseIntegerComparisons(b) || changed
}
if typ&fuseTypePlain != 0 {
changed = fuseBlockPlain(b) || changed
}

View File

@ -0,0 +1,157 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
// fuseIntegerComparisons optimizes inequalities such as '1 <= x && x < 5',
// which can be optimized to 'unsigned(x-1) < 4'.
//
// Look for branch structure like:
//
// p
// |\
// | b
// |/ \
// s0 s1
//
// In our example, p has control '1 <= x', b has control 'x < 5',
// and s0 and s1 are the if and else results of the comparison.
//
// This will be optimized into:
//
// p
// \
// b
// / \
// s0 s1
//
// where b has the combined control value 'unsigned(x-1) < 4'.
// Later passes will then fuse p and b.
func fuseIntegerComparisons(b *Block) bool {
if len(b.Preds) != 1 {
return false
}
p := b.Preds[0].Block()
if b.Kind != BlockIf || p.Kind != BlockIf {
return false
}
// Don't merge control values if b is likely to be bypassed anyway.
if p.Likely == BranchLikely && p.Succs[0].Block() != b {
return false
}
if p.Likely == BranchUnlikely && p.Succs[1].Block() != b {
return false
}
// Check if the control values combine to make an integer inequality that
// can be further optimized later.
bc := b.Controls[0]
pc := p.Controls[0]
if !areMergeableInequalities(bc, pc) {
return false
}
// If the first (true) successors match then we have a disjunction (||).
// If the second (false) successors match then we have a conjunction (&&).
for i, op := range [2]Op{OpOrB, OpAndB} {
if p.Succs[i].Block() != b.Succs[i].Block() {
continue
}
// TODO(mundaym): should we also check the cost of executing b?
// Currently we might speculatively execute b even if b contains
// a lot of instructions. We could just check that len(b.Values)
// is lower than a fixed amount. Bear in mind however that the
// other optimization passes might yet reduce the cost of b
// significantly so we shouldn't be overly conservative.
if !canSpeculativelyExecute(b) {
return false
}
// Logically combine the control values for p and b.
v := b.NewValue0(bc.Pos, op, bc.Type)
v.AddArg(pc)
v.AddArg(bc)
// Set the combined control value as the control value for b.
b.SetControl(v)
// Modify p so that it jumps directly to b.
p.removeEdge(i)
p.Kind = BlockPlain
p.Likely = BranchUnknown
p.ResetControls()
return true
}
// TODO: could negate condition(s) to merge controls.
return false
}
// getConstIntArgIndex returns the index of the first argument that is a
// constant integer or -1 if no such argument exists.
func getConstIntArgIndex(v *Value) int {
for i, a := range v.Args {
switch a.Op {
case OpConst8, OpConst16, OpConst32, OpConst64:
return i
}
}
return -1
}
// isSignedInequality reports whether op represents the inequality < or ≤
// in the signed domain.
func isSignedInequality(v *Value) bool {
switch v.Op {
case OpLess64, OpLess32, OpLess16, OpLess8,
OpLeq64, OpLeq32, OpLeq16, OpLeq8:
return true
}
return false
}
// isUnsignedInequality reports whether op represents the inequality < or ≤
// in the unsigned domain.
func isUnsignedInequality(v *Value) bool {
switch v.Op {
case OpLess64U, OpLess32U, OpLess16U, OpLess8U,
OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
return true
}
return false
}
func areMergeableInequalities(x, y *Value) bool {
// We need both inequalities to be either in the signed or unsigned domain.
// TODO(mundaym): it would also be good to merge when we have an Eq op that
// could be transformed into a Less/Leq. For example in the unsigned
// domain 'x == 0 || 3 < x' is equivalent to 'x <= 0 || 3 < x'
inequalityChecks := [...]func(*Value) bool{
isSignedInequality,
isUnsignedInequality,
}
for _, f := range inequalityChecks {
if !f(x) || !f(y) {
continue
}
// Check that both inequalities are comparisons with constants.
xi := getConstIntArgIndex(x)
if xi < 0 {
return false
}
yi := getConstIntArgIndex(y)
if yi < 0 {
return false
}
// Check that the non-constant arguments to the inequalities
// are the same.
return x.Args[xi^1] == y.Args[yi^1]
}
return false
}

View File

@ -26,7 +26,7 @@ func TestFuseEliminatesOneBranch(t *testing.T) {
Exit("mem")))
CheckFunc(fun.f)
fuseAll(fun.f)
fuseLate(fun.f)
for _, b := range fun.f.Blocks {
if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@ -56,7 +56,7 @@ func TestFuseEliminatesBothBranches(t *testing.T) {
Exit("mem")))
CheckFunc(fun.f)
fuseAll(fun.f)
fuseLate(fun.f)
for _, b := range fun.f.Blocks {
if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@ -90,7 +90,7 @@ func TestFuseHandlesPhis(t *testing.T) {
Exit("mem")))
CheckFunc(fun.f)
fuseAll(fun.f)
fuseLate(fun.f)
for _, b := range fun.f.Blocks {
if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@ -122,7 +122,7 @@ func TestFuseEliminatesEmptyBlocks(t *testing.T) {
))
CheckFunc(fun.f)
fuseAll(fun.f)
fuseLate(fun.f)
for k, b := range fun.blocks {
if k[:1] == "z" && b.Kind != BlockInvalid {
@ -153,7 +153,7 @@ func TestFuseSideEffects(t *testing.T) {
Goto("loop")))
CheckFunc(fun.f)
fuseAll(fun.f)
fuseLate(fun.f)
for _, b := range fun.f.Blocks {
if b == fun.blocks["then"] && b.Kind == BlockInvalid {
@ -196,7 +196,7 @@ func BenchmarkFuse(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
fun := c.Fun("entry", blocks...)
fuseAll(fun.f)
fuseLate(fun.f)
}
})
}

View File

@ -254,6 +254,54 @@
(Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
(Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
// signed integer range: ( c <= x && x (<|<=) d ) -> ( unsigned(x-c) (<|<=) unsigned(d-c) )
(AndB (Leq64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c -> ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c]))
(AndB (Leq32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c -> ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c]))
(AndB (Leq16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c -> ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [d-c]))
(AndB (Leq8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c -> ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [d-c]))
// signed integer range: ( c < x && x (<|<=) d ) -> ( unsigned(x-(c+1)) (<|<=) unsigned(d-(c+1)) )
(AndB (Less64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c+1 && int64(c+1) > int64(c) -> ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1]))
(AndB (Less32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c+1 && int32(c+1) > int32(c) -> ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c+1])) (Const32 <x.Type> [d-c-1]))
(AndB (Less16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c+1 && int16(c+1) > int16(c) -> ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c+1])) (Const16 <x.Type> [d-c-1]))
(AndB (Less8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c+1 && int8(c+1) > int8(c) -> ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c+1])) (Const8 <x.Type> [d-c-1]))
// unsigned integer range: ( c <= x && x (<|<=) d ) -> ( x-c (<|<=) d-c )
(AndB (Leq64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c) -> ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c]))
(AndB (Leq32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c) -> ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [int64(int32(d-c))]))
(AndB (Leq16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c) -> ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [int64(int16(d-c))]))
(AndB (Leq8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c) -> ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [int64(int8(d-c))]))
// unsigned integer range: ( c < x && x (<|<=) d ) -> ( x-(c+1) (<|<=) d-(c+1) )
(AndB (Less64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c+1) && uint64(c+1) > uint64(c) -> ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1]))
(AndB (Less32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c+1) && uint32(c+1) > uint32(c) -> ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [int64(int32(c+1))])) (Const32 <x.Type> [int64(int32(d-c-1))]))
(AndB (Less16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c+1) && uint16(c+1) > uint16(c) -> ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [int64(int16(c+1))])) (Const16 <x.Type> [int64(int16(d-c-1))]))
(AndB (Less8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c+1) && uint8(c+1) > uint8(c) -> ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [int64(int8(c+1))])) (Const8 <x.Type> [int64(int8(d-c-1))]))
// signed integer range: ( c (<|<=) x || x < d ) -> ( unsigned(c-d) (<|<=) unsigned(x-d) )
(OrB ((Less|Leq)64 (Const64 [c]) x) (Less64 x (Const64 [d]))) && c >= d -> ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
(OrB ((Less|Leq)32 (Const32 [c]) x) (Less32 x (Const32 [d]))) && c >= d -> ((Less|Leq)32U (Const32 <x.Type> [c-d]) (Sub32 <x.Type> x (Const32 <x.Type> [d])))
(OrB ((Less|Leq)16 (Const16 [c]) x) (Less16 x (Const16 [d]))) && c >= d -> ((Less|Leq)16U (Const16 <x.Type> [c-d]) (Sub16 <x.Type> x (Const16 <x.Type> [d])))
(OrB ((Less|Leq)8 (Const8 [c]) x) (Less8 x (Const8 [d]))) && c >= d -> ((Less|Leq)8U (Const8 <x.Type> [c-d]) (Sub8 <x.Type> x (Const8 <x.Type> [d])))
// signed integer range: ( c (<|<=) x || x <= d ) -> ( unsigned(c-(d+1)) (<|<=) unsigned(x-(d+1)) )
(OrB ((Less|Leq)64 (Const64 [c]) x) (Leq64 x (Const64 [d]))) && c >= d+1 && int64(d+1) > int64(d) -> ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1])))
(OrB ((Less|Leq)32 (Const32 [c]) x) (Leq32 x (Const32 [d]))) && c >= d+1 && int32(d+1) > int32(d) -> ((Less|Leq)32U (Const32 <x.Type> [c-d-1]) (Sub32 <x.Type> x (Const32 <x.Type> [d+1])))
(OrB ((Less|Leq)16 (Const16 [c]) x) (Leq16 x (Const16 [d]))) && c >= d+1 && int16(d+1) > int16(d) -> ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
(OrB ((Less|Leq)8 (Const8 [c]) x) (Leq8 x (Const8 [d]))) && c >= d+1 && int8(d+1) > int8(d) -> ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1])))
// unsigned integer range: ( c (<|<=) x || x < d ) -> ( c-d (<|<=) x-d )
(OrB ((Less|Leq)64U (Const64 [c]) x) (Less64U x (Const64 [d]))) && uint64(c) >= uint64(d) -> ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
(OrB ((Less|Leq)32U (Const32 [c]) x) (Less32U x (Const32 [d]))) && uint32(c) >= uint32(d) -> ((Less|Leq)32U (Const32 <x.Type> [int64(int32(c-d))]) (Sub32 <x.Type> x (Const32 <x.Type> [d])))
(OrB ((Less|Leq)16U (Const16 [c]) x) (Less16U x (Const16 [d]))) && uint16(c) >= uint16(d) -> ((Less|Leq)16U (Const16 <x.Type> [int64(int16(c-d))]) (Sub16 <x.Type> x (Const16 <x.Type> [d])))
(OrB ((Less|Leq)8U (Const8 [c]) x) (Less8U x (Const8 [d]))) && uint8(c) >= uint8(d) -> ((Less|Leq)8U (Const8 <x.Type> [int64( int8(c-d))]) (Sub8 <x.Type> x (Const8 <x.Type> [d])))
// unsigned integer range: ( c (<|<=) x || x <= d ) -> ( c-(d+1) (<|<=) x-(d+1) )
(OrB ((Less|Leq)64U (Const64 [c]) x) (Leq64U x (Const64 [d]))) && uint64(c) >= uint64(d+1) && uint64(d+1) > uint64(d) -> ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1])))
(OrB ((Less|Leq)32U (Const32 [c]) x) (Leq32U x (Const32 [d]))) && uint32(c) >= uint32(d+1) && uint32(d+1) > uint32(d) -> ((Less|Leq)32U (Const32 <x.Type> [int64(int32(c-d-1))]) (Sub32 <x.Type> x (Const32 <x.Type> [int64(int32(d+1))])))
(OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) -> ((Less|Leq)16U (Const16 <x.Type> [int64(int16(c-d-1))]) (Sub16 <x.Type> x (Const16 <x.Type> [int64(int16(d+1))])))
(OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) -> ((Less|Leq)8U (Const8 <x.Type> [int64( int8(c-d-1))]) (Sub8 <x.Type> x (Const8 <x.Type> [int64( int8(d+1))])))
// Canonicalize x-const to x+(-const)
(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [-c]) x)
(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [int64(int32(-c))]) x)

View File

@ -87,7 +87,7 @@ func TestNilcheckSimple(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -124,7 +124,7 @@ func TestNilcheckDomOrder(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -157,7 +157,7 @@ func TestNilcheckAddr(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -191,7 +191,7 @@ func TestNilcheckAddPtr(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -235,7 +235,7 @@ func TestNilcheckPhi(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -276,7 +276,7 @@ func TestNilcheckKeepRemove(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -323,7 +323,7 @@ func TestNilcheckInFalseBranch(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -374,7 +374,7 @@ func TestNilcheckUser(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)
@ -418,7 +418,7 @@ func TestNilcheckBug(t *testing.T) {
nilcheckelim(fun.f)
// clean up the removed nil check
fusePlain(fun.f)
fuse(fun.f, fuseTypePlain)
deadcode(fun.f)
CheckFunc(fun.f)

File diff suppressed because it is too large Load Diff

197
test/codegen/fuse.go Normal file
View File

@ -0,0 +1,197 @@
// asmcheck
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// Notes:
// - these examples use channels to provide a source of
// unknown values that cannot be optimized away
// - these examples use for loops to force branches
// backward (predicted taken)
// ---------------------------------- //
// signed integer range (conjunction) //
// ---------------------------------- //
func si1c(c <-chan int64) {
// amd64:"CMPQ\t.+, [$]256"
// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255"
for x := <-c; x >= 0 && x < 256; x = <-c {
}
}
func si2c(c <-chan int32) {
// amd64:"CMPL\t.+, [$]256"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
for x := <-c; x >= 0 && x < 256; x = <-c {
}
}
func si3c(c <-chan int16) {
// amd64:"CMPW\t.+, [$]256"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
for x := <-c; x >= 0 && x < 256; x = <-c {
}
}
func si4c(c <-chan int8) {
// amd64:"CMPB\t.+, [$]10"
// s390x:"CLIJ\t[$]4, R[0-9]+, [$]10"
for x := <-c; x >= 0 && x < 10; x = <-c {
}
}
func si5c(c <-chan int64) {
// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
for x := <-c; x < 256 && x > 4; x = <-c {
}
}
func si6c(c <-chan int32) {
// amd64:"CMPL\t.+, [$]255","DECL\t"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
for x := <-c; x > 0 && x <= 256; x = <-c {
}
}
func si7c(c <-chan int16) {
// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]60","ADDW\t[$]10,"
for x := <-c; x >= -10 && x <= 50; x = <-c {
}
}
func si8c(c <-chan int8) {
// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
// s390x:"CLIJ\t[$]4, R[0-9]+, [$]126","ADDW\t[$]126,"
for x := <-c; x >= -126 && x < 0; x = <-c {
}
}
// ---------------------------------- //
// signed integer range (disjunction) //
// ---------------------------------- //
func si1d(c <-chan int64) {
// amd64:"CMPQ\t.+, [$]256"
// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255"
for x := <-c; x < 0 || x >= 256; x = <-c {
}
}
func si2d(c <-chan int32) {
// amd64:"CMPL\t.+, [$]256"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
for x := <-c; x < 0 || x >= 256; x = <-c {
}
}
func si3d(c <-chan int16) {
// amd64:"CMPW\t.+, [$]256"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
for x := <-c; x < 0 || x >= 256; x = <-c {
}
}
func si4d(c <-chan int8) {
// amd64:"CMPB\t.+, [$]10"
// s390x:"CLIJ\t[$]10, R[0-9]+, [$]10"
for x := <-c; x < 0 || x >= 10; x = <-c {
}
}
func si5d(c <-chan int64) {
// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
for x := <-c; x >= 256 || x <= 4; x = <-c {
}
}
func si6d(c <-chan int32) {
// amd64:"CMPL\t.+, [$]255","DECL\t"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255","ADDW\t[$]-1,"
for x := <-c; x <= 0 || x > 256; x = <-c {
}
}
func si7d(c <-chan int16) {
// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]60","ADDW\t[$]10,"
for x := <-c; x < -10 || x > 50; x = <-c {
}
}
func si8d(c <-chan int8) {
// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
// s390x:"CLIJ\t[$]10, R[0-9]+, [$]126","ADDW\t[$]126,"
for x := <-c; x < -126 || x >= 0; x = <-c {
}
}
// ------------------------------------ //
// unsigned integer range (conjunction) //
// ------------------------------------ //
func ui1c(c <-chan uint64) {
// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
for x := <-c; x < 256 && x > 4; x = <-c {
}
}
func ui2c(c <-chan uint32) {
// amd64:"CMPL\t.+, [$]255","DECL\t"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
for x := <-c; x > 0 && x <= 256; x = <-c {
}
}
func ui3c(c <-chan uint16) {
// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]40","ADDW\t[$]-10,"
for x := <-c; x >= 10 && x <= 50; x = <-c {
}
}
func ui4c(c <-chan uint8) {
// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
// s390x:"CLIJ\t[$]4, R[0-9]+, [$]2","ADDW\t[$]-126,"
for x := <-c; x >= 126 && x < 128; x = <-c {
}
}
// ------------------------------------ //
// unsigned integer range (disjunction) //
// ------------------------------------ //
func ui1d(c <-chan uint64) {
// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
for x := <-c; x >= 256 || x <= 4; x = <-c {
}
}
func ui2d(c <-chan uint32) {
// amd64:"CMPL\t.+, [$]254","ADDL\t[$]-2,"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]254","ADDW\t[$]-2,"
for x := <-c; x <= 1 || x > 256; x = <-c {
}
}
func ui3d(c <-chan uint16) {
// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]40","ADDW\t[$]-10,"
for x := <-c; x < 10 || x > 50; x = <-c {
}
}
func ui4d(c <-chan uint8) {
// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
// s390x:"CLIJ\t[$]10, R[0-9]+, [$]2","ADDW\t[$]-126,"
for x := <-c; x < 126 || x >= 128; x = <-c {
}
}