1
0
mirror of https://github.com/golang/go synced 2024-11-23 00:50:05 -07:00

cmd/compile: implement CMOV on amd64

This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.

Benchmark results on Xeon E5630 (Westmere EP):

name                      old time/op    new time/op    delta
BinaryTree17-16              4.99s ± 9%     4.66s ± 2%     ~     (p=0.095 n=5+5)
Fannkuch11-16                4.93s ± 3%     5.04s ± 2%     ~     (p=0.548 n=5+5)
FmtFprintfEmpty-16          58.8ns ± 7%    61.4ns ±14%     ~     (p=0.579 n=5+5)
FmtFprintfString-16          114ns ± 2%     114ns ± 4%     ~     (p=0.603 n=5+5)
FmtFprintfInt-16             181ns ± 4%     125ns ± 3%  -30.90%  (p=0.008 n=5+5)
FmtFprintfIntInt-16          263ns ± 2%     217ns ± 2%  -17.34%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16     230ns ± 1%     212ns ± 1%   -7.99%  (p=0.008 n=5+5)
FmtFprintfFloat-16           411ns ± 3%     344ns ± 5%  -16.43%  (p=0.008 n=5+5)
FmtManyArgs-16               828ns ± 4%     790ns ± 2%   -4.59%  (p=0.032 n=5+5)
GobDecode-16                10.9ms ± 4%    10.8ms ± 5%     ~     (p=0.548 n=5+5)
GobEncode-16                9.52ms ± 5%    9.46ms ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                      334ms ± 2%     337ms ± 2%     ~     (p=0.548 n=5+5)
Gunzip-16                   64.4ms ± 1%    65.0ms ± 1%   +1.00%  (p=0.008 n=5+5)
HTTPClientServer-16          156µs ± 3%     155µs ± 3%     ~     (p=0.690 n=5+5)
JSONEncode-16               21.0ms ± 1%    21.8ms ± 0%   +3.76%  (p=0.016 n=5+4)
JSONDecode-16               95.1ms ± 0%    95.7ms ± 1%     ~     (p=0.151 n=5+5)
Mandelbrot200-16            6.38ms ± 1%    6.42ms ± 1%     ~     (p=0.095 n=5+5)
GoParse-16                  5.47ms ± 2%    5.36ms ± 1%   -1.95%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16       111ns ± 1%     111ns ± 1%     ~     (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16       408ns ± 1%     411ns ± 2%     ~     (p=0.087 n=5+5)
RegexpMatchEasy1_32-16       103ns ± 1%     104ns ± 1%     ~     (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16       659ns ± 2%     652ns ± 1%     ~     (p=0.571 n=5+5)
RegexpMatchMedium_32-16      176ns ± 2%     174ns ± 1%     ~     (p=0.476 n=5+5)
RegexpMatchMedium_1K-16     58.6µs ± 4%    57.7µs ± 4%     ~     (p=0.548 n=5+5)
RegexpMatchHard_32-16       3.07µs ± 3%    3.04µs ± 4%     ~     (p=0.421 n=5+5)
RegexpMatchHard_1K-16       89.2µs ± 1%    87.9µs ± 2%   -1.52%  (p=0.032 n=5+5)
Revcomp-16                   575ms ± 0%     587ms ± 2%   +2.12%  (p=0.032 n=4+5)
Template-16                  110ms ± 1%     107ms ± 3%   -3.00%  (p=0.032 n=5+5)
TimeParse-16                 463ns ± 0%     462ns ± 0%     ~     (p=0.810 n=5+4)
TimeFormat-16                538ns ± 0%     535ns ± 0%   -0.63%  (p=0.024 n=5+5)

name                      old speed      new speed      delta
GobDecode-16              70.7MB/s ± 4%  71.4MB/s ± 5%     ~     (p=0.452 n=5+5)
GobEncode-16              80.7MB/s ± 5%  81.2MB/s ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                   58.2MB/s ± 2%  57.7MB/s ± 2%     ~     (p=0.452 n=5+5)
Gunzip-16                  302MB/s ± 1%   299MB/s ± 1%   -0.99%  (p=0.008 n=5+5)
JSONEncode-16             92.4MB/s ± 1%  89.1MB/s ± 0%   -3.63%  (p=0.016 n=5+4)
JSONDecode-16             20.4MB/s ± 0%  20.3MB/s ± 1%     ~     (p=0.135 n=5+5)
GoParse-16                10.6MB/s ± 2%  10.8MB/s ± 1%   +2.00%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16     286MB/s ± 1%   285MB/s ± 3%     ~     (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16    2.51GB/s ± 1%  2.49GB/s ± 2%     ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-16     309MB/s ± 1%   307MB/s ± 1%     ~     (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16    1.55GB/s ± 2%  1.57GB/s ± 1%     ~     (p=0.690 n=5+5)
RegexpMatchMedium_32-16   5.68MB/s ± 2%  5.73MB/s ± 1%     ~     (p=0.579 n=5+5)
RegexpMatchMedium_1K-16   17.5MB/s ± 4%  17.8MB/s ± 4%     ~     (p=0.500 n=5+5)
RegexpMatchHard_32-16     10.4MB/s ± 3%  10.5MB/s ± 4%     ~     (p=0.460 n=5+5)
RegexpMatchHard_1K-16     11.5MB/s ± 1%  11.7MB/s ± 2%   +1.57%  (p=0.032 n=5+5)
Revcomp-16                 442MB/s ± 0%   433MB/s ± 2%   -2.05%  (p=0.032 n=4+5)
Template-16               17.7MB/s ± 1%  18.2MB/s ± 3%   +3.12%  (p=0.032 n=5+5)

Change-Id: I6972e8f35f2b31f9a42ac473a6bf419a18022558
Reviewed-on: https://go-review.googlesource.com/100935
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Giovanni Bajo 2018-03-05 20:59:40 +01:00
parent 423111081b
commit a35ec9a59e
9 changed files with 5238 additions and 117 deletions

View File

@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ^ZERO || PARITY
// Generate:
// CMOV*NE SRC,DST
// CMOV*PS SRC,DST
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = r
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQNEF {
q = s.Prog(x86.ACMOVQPS)
} else if v.Op == ssa.OpAMD64CMOVLNEF {
q = s.Prog(x86.ACMOVLPS)
} else {
q = s.Prog(x86.ACMOVWPS)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = v.Args[1].Reg()
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
// Flag condition: ZERO && !PARITY
// Generate:
// MOV SRC,AX
// CMOV*NE DST,AX
// CMOV*PC AX,DST
//
// TODO(rasky): we could generate:
// CMOV*NE DST,SRC
// CMOV*PC SRC,DST
// But this requires a way for regalloc to know that SRC might be
// clobbered by this instruction.
if v.Args[1].Reg() != x86.REG_AX {
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
}
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQEQF {
q = s.Prog(x86.ACMOVQPC)
} else if v.Op == ssa.OpAMD64CMOVLEQF {
q = s.Prog(x86.ACMOVLPC)
} else {
q = s.Prog(x86.ACMOVWPC)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = x86.REG_AX
q.To.Type = obj.TYPE_REG
q.To.Reg = r
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
r := v.Reg()
p := s.Prog(v.Op.Asm())

View File

@ -19,7 +19,10 @@ package ssa
// rewrite Phis in the postdominator as CondSelects.
func branchelim(f *Func) {
// FIXME: add support for lowering CondSelects on more architectures
if f.Config.arch != "arm64" {
switch f.Config.arch {
case "arm64", "amd64":
// implemented
default:
return
}
@ -32,10 +35,22 @@ func branchelim(f *Func) {
}
}
func canCondSelect(v *Value) bool {
func canCondSelect(v *Value, arch string) bool {
// For now, stick to simple scalars that fit in registers
sz := v.Type.Size()
return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped())
switch {
case v.Type.Size() > v.Block.Func.Config.RegSize:
return false
case v.Type.IsPtrShaped():
return true
case v.Type.IsInteger():
if arch == "amd64" && v.Type.Size() < 2 {
// amd64 doesn't support CMOV with byte registers
return false
}
return true
default:
return false
}
}
func elimIf(f *Func, dom *Block) bool {
@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool {
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v) {
if !canCondSelect(v, f.Config.arch) {
return false
}
}
@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool {
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v) {
if !canCondSelect(v, f.Config.arch) {
return false
}
}

View File

@ -11,128 +11,162 @@ import (
// Test that a trivial 'if' is eliminated
func TestBranchElimIf(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
var testData = []struct {
arch string
intType string
ok bool
}{
{"arm64", "int32", true},
{"amd64", "int32", true},
{"amd64", "int8", false},
}
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
for _, data := range testData {
t.Run(data.arch+"/"+data.intType, func(t *testing.T) {
c := testConfigArch(t, data.arch)
boolType := c.config.Types.Bool
var intType *types.Type
switch data.intType {
case "int32":
intType = c.config.Types.Int32
case "int8":
intType = c.config.Types.Int8
default:
t.Fatal("invalid integer type:", data.intType)
}
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b3")),
Bloc("b3",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
if len(fun.f.Blocks) != 1 {
t.Errorf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
if data.ok {
if len(fun.f.Blocks) != 1 {
t.Fatalf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
} else {
if len(fun.f.Blocks) != 3 {
t.Fatalf("expected 3 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
}
}
})
}
}
// Test that a trivial if/else is eliminated
func TestBranchElimIfElse(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b4")),
Bloc("b3",
Goto("b4")),
Bloc("b4",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
for _, arch := range []string{"arm64", "amd64"} {
t.Run(arch, func(t *testing.T) {
c := testConfigArch(t, arch)
boolType := c.config.Types.Bool
intType := c.config.Types.Int32
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const1", OpConst32, intType, 1, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
If("cond", "b2", "b3")),
Bloc("b2",
Goto("b4")),
Bloc("b3",
Goto("b4")),
Bloc("b4",
Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
Exit("retstore")))
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
Deadcode(fun.f)
CheckFunc(fun.f)
if len(fun.f.Blocks) != 1 {
t.Errorf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
if len(fun.f.Blocks) != 1 {
t.Fatalf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpCondSelect {
t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
if fun.values["phi"].Args[2] != fun.values["cond"] {
t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
}
if fun.blocks["entry"].Kind != BlockExit {
t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
}
})
}
}
// Test that an if/else CFG that loops back
// into itself does *not* get eliminated.
func TestNoBranchElimLoop(t *testing.T) {
c := testConfig(t)
c.config.arch = "arm64" // FIXME
boolType := types.New(types.TBOOL)
intType := types.New(types.TINT32)
for _, arch := range []string{"arm64", "amd64"} {
t.Run(arch, func(t *testing.T) {
c := testConfigArch(t, arch)
boolType := c.config.Types.Bool
intType := c.config.Types.Int32
// The control flow here is totally bogus,
// but a dead cycle seems like the only plausible
// way to arrive at a diamond CFG that is also a loop.
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("const3", OpConst32, intType, 3, nil),
Goto("b5")),
Bloc("b2",
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
If("cond", "b3", "b4")),
Bloc("b3",
Goto("b2")),
Bloc("b4",
Goto("b2")),
Bloc("b5",
Exit("start")))
// The control flow here is totally bogus,
// but a dead cycle seems like the only plausible
// way to arrive at a diamond CFG that is also a loop.
fun := c.Fun("entry",
Bloc("entry",
Valu("start", OpInitMem, types.TypeMem, 0, nil),
Valu("sb", OpSB, types.TypeInvalid, 0, nil),
Valu("const2", OpConst32, intType, 2, nil),
Valu("const3", OpConst32, intType, 3, nil),
Goto("b5")),
Bloc("b2",
Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
If("cond", "b3", "b4")),
Bloc("b3",
Goto("b2")),
Bloc("b4",
Goto("b2")),
Bloc("b5",
Exit("start")))
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
CheckFunc(fun.f)
branchelim(fun.f)
CheckFunc(fun.f)
if len(fun.f.Blocks) != 5 {
t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpPhi {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
if len(fun.f.Blocks) != 5 {
t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
}
if fun.values["phi"].Op != OpPhi {
t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
}
})
}
}

View File

@ -7,6 +7,7 @@ package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/arm64"
"cmd/internal/obj/s390x"
"cmd/internal/obj/x86"
"cmd/internal/src"
@ -22,6 +23,7 @@ var Copyelim = copyelim
var testCtxts = map[string]*obj.Link{
"amd64": obj.Linknew(&x86.Linkamd64),
"s390x": obj.Linknew(&s390x.Links390x),
"arm64": obj.Linknew(&arm64.Linkarm64),
}
func testConfig(tb testing.TB) *Conf { return testConfigArch(tb, "amd64") }

View File

@ -475,6 +475,52 @@
(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
// Lowering conditional moves
// If the condition is a SETxx, we can just run a CMOV from the comparison that was
// setting the flags.
// Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
-> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
-> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
-> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
// If the condition does not set the flags, we need to generate a comparison.
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
-> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
-> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
-> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
-> (CMOVQNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
-> (CMOVLNE y x (CMPQconst [0] check))
(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
-> (CMOVWNE y x (CMPQconst [0] check))
// Absorb InvertFlags
(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
// Absorb constants generated during lower
(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x
(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y
(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x
(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y
(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x
(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y
(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x
(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y
(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x
(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y
// Miscellaneous
(Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
(Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
@ -1350,6 +1396,10 @@
(CMPLconst x [0]) -> (TESTL x x)
(CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x)
(TESTQconst [-1] x) -> (TESTQ x x)
(TESTLconst [-1] x) -> (TESTL x x)
(TESTWconst [-1] x) -> (TESTW x x)
(TESTBconst [-1] x) -> (TESTB x x)
// Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is

View File

@ -132,6 +132,7 @@ func init() {
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
@ -340,10 +341,57 @@ func init() {
{name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
{name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
// Note ASM for ops moves whole register
//
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
// CMOV instructions: 64, 32 and 16-bit sizes.
// if arg2 encodes a true result, return arg1, else arg0
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
{name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
{name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
{name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
{name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
{name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
{name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
{name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
{name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
{name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
{name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
{name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
{name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
{name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
{name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
{name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
{name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
{name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
{name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
{name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
// CMOV with floating point instructions. We need separate pseudo-op to handle
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
// code generation in amd64/ssa.go.
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
@ -578,7 +626,6 @@ func init() {
{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
//arg0=ptr,arg1=mem, returns void. Faults if ptr is nil.
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary, but may clobber others.
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},

View File

@ -560,7 +560,47 @@ const (
OpAMD64BSRQ
OpAMD64BSRL
OpAMD64CMOVQEQ
OpAMD64CMOVQNE
OpAMD64CMOVQLT
OpAMD64CMOVQGT
OpAMD64CMOVQLE
OpAMD64CMOVQGE
OpAMD64CMOVQLS
OpAMD64CMOVQHI
OpAMD64CMOVQCC
OpAMD64CMOVQCS
OpAMD64CMOVLEQ
OpAMD64CMOVLNE
OpAMD64CMOVLLT
OpAMD64CMOVLGT
OpAMD64CMOVLLE
OpAMD64CMOVLGE
OpAMD64CMOVLLS
OpAMD64CMOVLHI
OpAMD64CMOVLCC
OpAMD64CMOVLCS
OpAMD64CMOVWEQ
OpAMD64CMOVWNE
OpAMD64CMOVWLT
OpAMD64CMOVWGT
OpAMD64CMOVWLE
OpAMD64CMOVWGE
OpAMD64CMOVWLS
OpAMD64CMOVWHI
OpAMD64CMOVWCC
OpAMD64CMOVWCS
OpAMD64CMOVQEQF
OpAMD64CMOVQNEF
OpAMD64CMOVQGTF
OpAMD64CMOVQGEF
OpAMD64CMOVLEQF
OpAMD64CMOVLNEF
OpAMD64CMOVLGTF
OpAMD64CMOVLGEF
OpAMD64CMOVWEQF
OpAMD64CMOVWNEF
OpAMD64CMOVWGTF
OpAMD64CMOVWGEF
OpAMD64BSWAPQ
OpAMD64BSWAPL
OpAMD64POPCNTQ
@ -6808,6 +6848,141 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "CMOVQNE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQLT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQLT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQGT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQGT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQLE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQLE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQGE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQGE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQLS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQLS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQHI",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQCC",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQCS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQCS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLEQ",
argLen: 3,
@ -6823,6 +6998,472 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "CMOVLNE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLLT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLLT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLGT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLGT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLLE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLLE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLGE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLGE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLLS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLLS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLHI",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLCC",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLCS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLCS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWEQ",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWEQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWNE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWLT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWLT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWGT",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWGT,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWLE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWLE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWGE",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWGE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWLS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWLS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWHI",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWCC",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWCS",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWCS,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQEQF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQNEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQGTF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQGEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVQCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLEQF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLNEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLGTF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLGEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVLCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWEQF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWNEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWGTF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWHI,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWGEF",
argLen: 3,
resultInArg0: true,
asm: x86.ACMOVWCC,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BSWAPQ",
argLen: 1,

File diff suppressed because it is too large Load Diff

178
test/codegen/condmove.go Normal file
View File

@ -0,0 +1,178 @@
// asmcheck
package codegen
func cmovint(c int) int {
x := c + 4
if x < 0 {
x = 182
}
// amd64:"CMOVQLT"
// arm64:"CSEL\tLT"
return x
}
func cmovchan(x, y chan int) chan int {
if x != y {
x = y
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return x
}
func cmovuintptr(x, y uintptr) uintptr {
if x < y {
x = -y
}
// amd64:"CMOVQCS"
// arm64:"CSEL\tLO"
return x
}
func cmov32bit(x, y uint32) uint32 {
if x < y {
x = -y
}
// amd64:"CMOVLCS"
// arm64:"CSEL\tLO"
return x
}
func cmov16bit(x, y uint16) uint16 {
if x < y {
x = -y
}
// amd64:"CMOVWCS"
// arm64:"CSEL\tLO"
return x
}
// Floating point comparison. For EQ/NE, we must
// generate special code to handle NaNs.
func cmovfloateq(x, y float64) int {
a := 128
if x == y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPC"
// arm64:"CSEL\tEQ"
return a
}
func cmovfloatne(x, y float64) int {
a := 128
if x != y {
a = 256
}
// amd64:"CMOVQNE","CMOVQPS"
// arm64:"CSEL\tNE"
return a
}
//go:noinline
func frexp(f float64) (frac float64, exp int) {
return 1.0, 4
}
//go:noinline
func ldexp(frac float64, exp int) float64 {
return 1.0
}
// Generate a CMOV with a floating comparison and integer move.
func cmovfloatint2(x, y float64) float64 {
yfr, yexp := 4.0, 5
r := x
for r >= y {
rfr, rexp := frexp(r)
if rfr < yfr {
rexp = rexp - 1
}
// amd64:"CMOVQHI"
// arm64:"CSEL\tGT"
r = r - ldexp(y, (rexp-yexp))
}
return r
}
func cmovloaded(x [4]int, y int) int {
if x[2] != 0 {
y = x[2]
} else {
y = y >> 2
}
// amd64:"CMOVQNE"
// arm64:"CSEL\tNE"
return y
}
func cmovuintptr2(x, y uintptr) uintptr {
a := x * 2
if a == 0 {
a = 256
}
// amd64:"CMOVQEQ"
// arm64:"CSEL\tEQ"
return a
}
// Floating point CMOVs are not supported by amd64/arm64
func cmovfloatmove(x, y int) float64 {
a := 1.0
if x <= y {
a = 2.0
}
// amd64:-"CMOV"
// arm64:-"CSEL"
return a
}
// On amd64, the following patterns trigger comparison inversion.
// Test that we correctly invert the CMOV condition
var gsink int64
var gusink uint64
func cmovinvert1(x, y int64) int64 {
if x < gsink {
y = -y
}
// amd64:"CMOVQGT"
return y
}
func cmovinvert2(x, y int64) int64 {
if x <= gsink {
y = -y
}
// amd64:"CMOVQGE"
return y
}
func cmovinvert3(x, y int64) int64 {
if x == gsink {
y = -y
}
// amd64:"CMOVQEQ"
return y
}
func cmovinvert4(x, y int64) int64 {
if x != gsink {
y = -y
}
// amd64:"CMOVQNE"
return y
}
func cmovinvert5(x, y uint64) uint64 {
if x > gusink {
y = -y
}
// amd64:"CMOVQCS"
return y
}
func cmovinvert6(x, y uint64) uint64 {
if x >= gusink {
y = -y
}
// amd64:"CMOVQLS"
return y
}