mirror of
https://github.com/golang/go
synced 2024-11-18 15:54:42 -07:00
cmd/compile: ppc64x intrinsics for math/bits
This adds math/bits intrinsics for OnesCount, Len, TrailingZeros on ppc64x. benchmark old ns/op new ns/op delta BenchmarkLeadingZeros-16 4.26 1.71 -59.86% BenchmarkLeadingZeros16-16 3.04 1.83 -39.80% BenchmarkLeadingZeros32-16 3.31 1.82 -45.02% BenchmarkLeadingZeros64-16 3.69 1.71 -53.66% BenchmarkTrailingZeros-16 2.55 1.62 -36.47% BenchmarkTrailingZeros32-16 2.55 1.77 -30.59% BenchmarkTrailingZeros64-16 2.78 1.62 -41.73% BenchmarkOnesCount-16 3.19 0.93 -70.85% BenchmarkOnesCount32-16 2.55 1.18 -53.73% BenchmarkOnesCount64-16 3.22 0.93 -71.12% Update #18616 I also made a change to bits_test.go because when debugging some failures the output was not quite providing the right argument information. Change-Id: Ia58d31d1777cf4582a4505f85b11a1202ca07d3e Reviewed-on: https://go-review.googlesource.com/41630 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
a4864094f0
commit
8304d10763
@ -2730,12 +2730,12 @@ func init() {
|
|||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpCtz64, types.Types[TINT], args[0])
|
return s.newValue1(ssa.OpCtz64, types.Types[TINT], args[0])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
addF("math/bits", "TrailingZeros32",
|
addF("math/bits", "TrailingZeros32",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpCtz32, types.Types[TINT], args[0])
|
return s.newValue1(ssa.OpCtz32, types.Types[TINT], args[0])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
addF("math/bits", "TrailingZeros16",
|
addF("math/bits", "TrailingZeros16",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
x := s.newValue1(ssa.OpZeroExt16to32, types.Types[TUINT32], args[0])
|
x := s.newValue1(ssa.OpZeroExt16to32, types.Types[TUINT32], args[0])
|
||||||
@ -2776,7 +2776,7 @@ func init() {
|
|||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
|
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
addF("math/bits", "Len32",
|
addF("math/bits", "Len32",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
if s.config.PtrSize == 4 {
|
if s.config.PtrSize == 4 {
|
||||||
@ -2785,7 +2785,7 @@ func init() {
|
|||||||
x := s.newValue1(ssa.OpZeroExt32to64, types.Types[TUINT64], args[0])
|
x := s.newValue1(ssa.OpZeroExt32to64, types.Types[TUINT64], args[0])
|
||||||
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
addF("math/bits", "Len16",
|
addF("math/bits", "Len16",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
if s.config.PtrSize == 4 {
|
if s.config.PtrSize == 4 {
|
||||||
@ -2795,7 +2795,7 @@ func init() {
|
|||||||
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
|
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
|
||||||
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
// Note: disabled on AMD64 because the Go code is faster!
|
// Note: disabled on AMD64 because the Go code is faster!
|
||||||
addF("math/bits", "Len8",
|
addF("math/bits", "Len8",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
@ -2806,7 +2806,7 @@ func init() {
|
|||||||
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
|
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
|
||||||
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
|
||||||
},
|
},
|
||||||
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
|
|
||||||
addF("math/bits", "Len",
|
addF("math/bits", "Len",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
@ -2815,7 +2815,7 @@ func init() {
|
|||||||
}
|
}
|
||||||
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
|
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
|
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
|
||||||
// LeadingZeros is handled because it trivially calls Len.
|
// LeadingZeros is handled because it trivially calls Len.
|
||||||
addF("math/bits", "Reverse64",
|
addF("math/bits", "Reverse64",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
@ -2845,7 +2845,7 @@ func init() {
|
|||||||
return s.newValue1(ssa.OpBitRev64, types.Types[TINT], args[0])
|
return s.newValue1(ssa.OpBitRev64, types.Types[TINT], args[0])
|
||||||
},
|
},
|
||||||
sys.ARM64)
|
sys.ARM64)
|
||||||
makeOnesCount := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
aux := s.lookupSymbol(n, &ssa.ExternSymbol{Sym: syslook("support_popcnt").Sym.Linksym()})
|
aux := s.lookupSymbol(n, &ssa.ExternSymbol{Sym: syslook("support_popcnt").Sym.Linksym()})
|
||||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), aux, s.sb)
|
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), aux, s.sb)
|
||||||
@ -2881,17 +2881,27 @@ func init() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
addF("math/bits", "OnesCount64",
|
addF("math/bits", "OnesCount64",
|
||||||
makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount64),
|
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount64),
|
||||||
|
sys.AMD64)
|
||||||
|
addF("math/bits", "OnesCount64",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
return s.newValue1(ssa.OpPopCount64, types.Types[TINT], args[0])
|
||||||
|
},
|
||||||
|
sys.PPC64)
|
||||||
|
addF("math/bits", "OnesCount32",
|
||||||
|
makeOnesCountAMD64(ssa.OpPopCount32, ssa.OpPopCount32),
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
addF("math/bits", "OnesCount32",
|
addF("math/bits", "OnesCount32",
|
||||||
makeOnesCount(ssa.OpPopCount32, ssa.OpPopCount32),
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
sys.AMD64)
|
return s.newValue1(ssa.OpPopCount32, types.Types[TINT], args[0])
|
||||||
|
},
|
||||||
|
sys.PPC64)
|
||||||
addF("math/bits", "OnesCount16",
|
addF("math/bits", "OnesCount16",
|
||||||
makeOnesCount(ssa.OpPopCount16, ssa.OpPopCount16),
|
makeOnesCountAMD64(ssa.OpPopCount16, ssa.OpPopCount16),
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
// Note: no OnesCount8, the Go implementation is faster - just a table load.
|
// Note: no OnesCount8, the Go implementation is faster - just a table load.
|
||||||
addF("math/bits", "OnesCount",
|
addF("math/bits", "OnesCount",
|
||||||
makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount32),
|
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
|
|
||||||
/******** sync/atomic ********/
|
/******** sync/atomic ********/
|
||||||
|
@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
|
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
|
||||||
|
|
||||||
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP:
|
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
|
@ -244,6 +244,17 @@
|
|||||||
// (Addr {sym} base) -> (ADDconst {sym} base)
|
// (Addr {sym} base) -> (ADDconst {sym} base)
|
||||||
(OffPtr [off] ptr) -> (ADD (MOVDconst <typ.Int64> [off]) ptr)
|
(OffPtr [off] ptr) -> (ADD (MOVDconst <typ.Int64> [off]) ptr)
|
||||||
|
|
||||||
|
(Ctz64 x) -> (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
|
||||||
|
(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
|
||||||
|
|
||||||
|
(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
|
||||||
|
(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
|
||||||
|
|
||||||
|
(PopCount64 x) -> (POPCNTD x)
|
||||||
|
(PopCount32 x) -> (POPCNTW (MOVWZreg x))
|
||||||
|
(PopCount16 x) -> (POPCNTW (MOVHZreg x))
|
||||||
|
(PopCount8 x) -> (POPCNTB (MOVBreg x))
|
||||||
|
|
||||||
(And64 x y) -> (AND x y)
|
(And64 x y) -> (AND x y)
|
||||||
(And32 x y) -> (AND x y)
|
(And32 x y) -> (AND x y)
|
||||||
(And16 x y) -> (AND x y)
|
(And16 x y) -> (AND x y)
|
||||||
|
@ -198,6 +198,13 @@ func init() {
|
|||||||
{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits
|
{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits
|
||||||
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
|
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
|
||||||
|
|
||||||
|
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
|
||||||
|
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
|
||||||
|
|
||||||
|
{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
|
||||||
|
{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
|
||||||
|
{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
|
||||||
|
|
||||||
{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"}, // arg0/arg1
|
{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"}, // arg0/arg1
|
||||||
{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
|
{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
|
||||||
|
|
||||||
|
@ -1294,6 +1294,11 @@ const (
|
|||||||
OpPPC64SLWconst
|
OpPPC64SLWconst
|
||||||
OpPPC64ROTLconst
|
OpPPC64ROTLconst
|
||||||
OpPPC64ROTLWconst
|
OpPPC64ROTLWconst
|
||||||
|
OpPPC64CNTLZD
|
||||||
|
OpPPC64CNTLZW
|
||||||
|
OpPPC64POPCNTD
|
||||||
|
OpPPC64POPCNTW
|
||||||
|
OpPPC64POPCNTB
|
||||||
OpPPC64FDIV
|
OpPPC64FDIV
|
||||||
OpPPC64FDIVS
|
OpPPC64FDIVS
|
||||||
OpPPC64DIVD
|
OpPPC64DIVD
|
||||||
@ -16568,6 +16573,73 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "CNTLZD",
|
||||||
|
argLen: 1,
|
||||||
|
clobberFlags: true,
|
||||||
|
asm: ppc64.ACNTLZD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "CNTLZW",
|
||||||
|
argLen: 1,
|
||||||
|
clobberFlags: true,
|
||||||
|
asm: ppc64.ACNTLZW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "POPCNTD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: ppc64.APOPCNTD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "POPCNTW",
|
||||||
|
argLen: 1,
|
||||||
|
asm: ppc64.APOPCNTW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "POPCNTB",
|
||||||
|
argLen: 1,
|
||||||
|
asm: ppc64.APOPCNTB,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "FDIV",
|
name: "FDIV",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
@ -69,6 +69,10 @@ func rewriteValuePPC64(v *Value) bool {
|
|||||||
return rewriteValuePPC64_OpAtomicStore64_0(v)
|
return rewriteValuePPC64_OpAtomicStore64_0(v)
|
||||||
case OpAvg64u:
|
case OpAvg64u:
|
||||||
return rewriteValuePPC64_OpAvg64u_0(v)
|
return rewriteValuePPC64_OpAvg64u_0(v)
|
||||||
|
case OpBitLen32:
|
||||||
|
return rewriteValuePPC64_OpBitLen32_0(v)
|
||||||
|
case OpBitLen64:
|
||||||
|
return rewriteValuePPC64_OpBitLen64_0(v)
|
||||||
case OpClosureCall:
|
case OpClosureCall:
|
||||||
return rewriteValuePPC64_OpClosureCall_0(v)
|
return rewriteValuePPC64_OpClosureCall_0(v)
|
||||||
case OpCom16:
|
case OpCom16:
|
||||||
@ -97,6 +101,10 @@ func rewriteValuePPC64(v *Value) bool {
|
|||||||
return rewriteValuePPC64_OpConstNil_0(v)
|
return rewriteValuePPC64_OpConstNil_0(v)
|
||||||
case OpConvert:
|
case OpConvert:
|
||||||
return rewriteValuePPC64_OpConvert_0(v)
|
return rewriteValuePPC64_OpConvert_0(v)
|
||||||
|
case OpCtz32:
|
||||||
|
return rewriteValuePPC64_OpCtz32_0(v)
|
||||||
|
case OpCtz64:
|
||||||
|
return rewriteValuePPC64_OpCtz64_0(v)
|
||||||
case OpCvt32Fto32:
|
case OpCvt32Fto32:
|
||||||
return rewriteValuePPC64_OpCvt32Fto32_0(v)
|
return rewriteValuePPC64_OpCvt32Fto32_0(v)
|
||||||
case OpCvt32Fto64:
|
case OpCvt32Fto64:
|
||||||
@ -465,6 +473,14 @@ func rewriteValuePPC64(v *Value) bool {
|
|||||||
return rewriteValuePPC64_OpPPC64XOR_0(v)
|
return rewriteValuePPC64_OpPPC64XOR_0(v)
|
||||||
case OpPPC64XORconst:
|
case OpPPC64XORconst:
|
||||||
return rewriteValuePPC64_OpPPC64XORconst_0(v)
|
return rewriteValuePPC64_OpPPC64XORconst_0(v)
|
||||||
|
case OpPopCount16:
|
||||||
|
return rewriteValuePPC64_OpPopCount16_0(v)
|
||||||
|
case OpPopCount32:
|
||||||
|
return rewriteValuePPC64_OpPopCount32_0(v)
|
||||||
|
case OpPopCount64:
|
||||||
|
return rewriteValuePPC64_OpPopCount64_0(v)
|
||||||
|
case OpPopCount8:
|
||||||
|
return rewriteValuePPC64_OpPopCount8_0(v)
|
||||||
case OpRound32F:
|
case OpRound32F:
|
||||||
return rewriteValuePPC64_OpRound32F_0(v)
|
return rewriteValuePPC64_OpRound32F_0(v)
|
||||||
case OpRound64F:
|
case OpRound64F:
|
||||||
@ -988,6 +1004,46 @@ func rewriteValuePPC64_OpAvg64u_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValuePPC64_OpBitLen32_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (BitLen32 x)
|
||||||
|
// cond:
|
||||||
|
// result: (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64SUB)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
|
||||||
|
v0.AuxInt = 32
|
||||||
|
v.AddArg(v0)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpPPC64CNTLZW, types.Int)
|
||||||
|
v1.AddArg(x)
|
||||||
|
v.AddArg(v1)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (BitLen64 x)
|
||||||
|
// cond:
|
||||||
|
// result: (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64SUB)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
|
||||||
|
v0.AuxInt = 64
|
||||||
|
v.AddArg(v0)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpPPC64CNTLZD, types.Int)
|
||||||
|
v1.AddArg(x)
|
||||||
|
v.AddArg(v1)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
|
func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
|
||||||
// match: (ClosureCall [argwid] entry closure mem)
|
// match: (ClosureCall [argwid] entry closure mem)
|
||||||
// cond:
|
// cond:
|
||||||
@ -1155,6 +1211,50 @@ func rewriteValuePPC64_OpConvert_0(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (Ctz32 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTW)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int)
|
||||||
|
v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int)
|
||||||
|
v2.AuxInt = -1
|
||||||
|
v2.AddArg(x)
|
||||||
|
v1.AddArg(v2)
|
||||||
|
v1.AddArg(x)
|
||||||
|
v0.AddArg(v1)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpCtz64_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (Ctz64 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTD)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int64)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int64)
|
||||||
|
v1.AuxInt = -1
|
||||||
|
v1.AddArg(x)
|
||||||
|
v0.AddArg(v1)
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
|
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
@ -7944,6 +8044,68 @@ func rewriteValuePPC64_OpPPC64XORconst_0(v *Value) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValuePPC64_OpPopCount16_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (PopCount16 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTW (MOVHZreg x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTW)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, types.Int64)
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpPopCount32_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (PopCount32 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTW (MOVWZreg x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTW)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpPopCount64_0(v *Value) bool {
|
||||||
|
// match: (PopCount64 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
types := &b.Func.Config.Types
|
||||||
|
_ = types
|
||||||
|
// match: (PopCount8 x)
|
||||||
|
// cond:
|
||||||
|
// result: (POPCNTB (MOVBreg x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpPPC64POPCNTB)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, types.Int64)
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValuePPC64_OpRound32F_0(v *Value) bool {
|
func rewriteValuePPC64_OpRound32F_0(v *Value) bool {
|
||||||
// match: (Round32F x)
|
// match: (Round32F x)
|
||||||
// cond:
|
// cond:
|
||||||
|
@ -254,26 +254,26 @@ func testOnesCount(t *testing.T, x uint64, want int) {
|
|||||||
if x <= 1<<8-1 {
|
if x <= 1<<8-1 {
|
||||||
got := OnesCount8(uint8(x))
|
got := OnesCount8(uint8(x))
|
||||||
if got != want {
|
if got != want {
|
||||||
t.Fatalf("OnesCount8(%#02x) == %d; want %d", x, got, want)
|
t.Fatalf("OnesCount8(%#02x) == %d; want %d", uint8(x), got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if x <= 1<<16-1 {
|
if x <= 1<<16-1 {
|
||||||
got := OnesCount16(uint16(x))
|
got := OnesCount16(uint16(x))
|
||||||
if got != want {
|
if got != want {
|
||||||
t.Fatalf("OnesCount16(%#04x) == %d; want %d", x, got, want)
|
t.Fatalf("OnesCount16(%#04x) == %d; want %d", uint16(x), got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if x <= 1<<32-1 {
|
if x <= 1<<32-1 {
|
||||||
got := OnesCount32(uint32(x))
|
got := OnesCount32(uint32(x))
|
||||||
if got != want {
|
if got != want {
|
||||||
t.Fatalf("OnesCount32(%#08x) == %d; want %d", x, got, want)
|
t.Fatalf("OnesCount32(%#08x) == %d; want %d", uint32(x), got, want)
|
||||||
}
|
}
|
||||||
if UintSize == 32 {
|
if UintSize == 32 {
|
||||||
got = OnesCount(uint(x))
|
got = OnesCount(uint(x))
|
||||||
if got != want {
|
if got != want {
|
||||||
t.Fatalf("OnesCount(%#08x) == %d; want %d", x, got, want)
|
t.Fatalf("OnesCount(%#08x) == %d; want %d", uint32(x), got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user