1
0
mirror of https://github.com/golang/go synced 2024-11-11 20:50:23 -07:00

cmd/compile: intrinsify math/bits/ReverseBytes{16|32|64} for ppc64/power10

This change intrinsifies ReverseBytes{16|32|64} by generating the
corresponding new instructions in Power10: brh, brd and brw and
adds a verification test for the same.
On Power 9 and 8, the .go code performs optimally as it is.

Performance improvement seen on Power10:
ReverseBytes32  1.38ns ± 0%  1.18ns ± 0%  -14.2
ReverseBytes64  1.52ns ± 0%  1.11ns ± 0%  -26.87
ReverseBytes16  1.41ns ± 1%  1.18ns ± 0%  -16.47

Change-Id: I88f127f3ab9ba24a772becc21ad90acfba324b37
Reviewed-on: https://go-review.googlesource.com/c/go/+/446675
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Archana R 2022-10-31 11:47:17 -05:00 committed by Lynn Boger
parent a96487613e
commit cd1fc87156
9 changed files with 132 additions and 6 deletions

View File

@ -670,7 +670,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG

View File

@ -1273,3 +1273,5 @@
(PrefetchCache ptr mem) => (DCBT ptr mem [0])
(PrefetchCacheStreamed ptr mem) => (DCBT ptr mem [16])
// Use byte reverse instructions on Power10
(Bswap(16|32|64) x) && buildcfg.GOPPC64>=10 => (BR(H|W|D) x)

View File

@ -295,6 +295,9 @@ func init() {
{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"}, // reversebytes64(arg0)
{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"}, // reversebytes32(arg0)
{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"}, // reversebytes16(arg0)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)

View File

@ -238,6 +238,7 @@ var genericOps = []opData{
{name: "BitLen32", argLength: 1}, // Number of bits in arg[0] (returns 0-32)
{name: "BitLen64", argLength: 1}, // Number of bits in arg[0] (returns 0-64)
{name: "Bswap16", argLength: 1}, // Swap bytes
{name: "Bswap32", argLength: 1}, // Swap bytes
{name: "Bswap64", argLength: 1}, // Swap bytes

View File

@ -2161,6 +2161,9 @@ const (
OpPPC64XORCC
OpPPC64EQV
OpPPC64NEG
OpPPC64BRD
OpPPC64BRW
OpPPC64BRH
OpPPC64FNEG
OpPPC64FSQRT
OpPPC64FSQRTS
@ -2962,6 +2965,7 @@ const (
OpBitLen16
OpBitLen32
OpBitLen64
OpBswap16
OpBswap32
OpBswap64
OpBitRev8
@ -29013,6 +29017,45 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BRD",
argLen: 1,
asm: ppc64.ABRD,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "BRW",
argLen: 1,
asm: ppc64.ABRW,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "BRH",
argLen: 1,
asm: ppc64.ABRH,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "FNEG",
argLen: 1,
@ -38564,6 +38607,11 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Bswap16",
argLen: 1,
generic: true,
},
{
name: "Bswap32",
argLen: 1,

View File

@ -107,6 +107,12 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpBitLen32(v)
case OpBitLen64:
return rewriteValuePPC64_OpBitLen64(v)
case OpBswap16:
return rewriteValuePPC64_OpBswap16(v)
case OpBswap32:
return rewriteValuePPC64_OpBswap32(v)
case OpBswap64:
return rewriteValuePPC64_OpBswap64(v)
case OpCeil:
v.Op = OpPPC64FCEIL
return true
@ -1122,6 +1128,54 @@ func rewriteValuePPC64_OpBitLen64(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpBswap16(v *Value) bool {
v_0 := v.Args[0]
// match: (Bswap16 x)
// cond: buildcfg.GOPPC64>=10
// result: (BRH x)
for {
x := v_0
if !(buildcfg.GOPPC64 >= 10) {
break
}
v.reset(OpPPC64BRH)
v.AddArg(x)
return true
}
return false
}
func rewriteValuePPC64_OpBswap32(v *Value) bool {
v_0 := v.Args[0]
// match: (Bswap32 x)
// cond: buildcfg.GOPPC64>=10
// result: (BRW x)
for {
x := v_0
if !(buildcfg.GOPPC64 >= 10) {
break
}
v.reset(OpPPC64BRW)
v.AddArg(x)
return true
}
return false
}
func rewriteValuePPC64_OpBswap64(v *Value) bool {
v_0 := v.Args[0]
// match: (Bswap64 x)
// cond: buildcfg.GOPPC64>=10
// result: (BRD x)
for {
x := v_0
if !(buildcfg.GOPPC64 >= 10) {
break
}
v.reset(OpPPC64BRD)
v.AddArg(x)
return true
}
return false
}
func rewriteValuePPC64_OpCom16(v *Value) bool {
v_0 := v.Args[0]
// match: (Com16 x)

View File

@ -4000,17 +4000,23 @@ func InitTables() {
},
sys.ARM64, sys.PPC64)
/* Use only on Power10 as the new byte reverse instructions that Power10 provide
make it worthwhile as an intrinsic */
brev_arch := []sys.ArchFamily{sys.AMD64, sys.ARM64, sys.ARM, sys.S390X}
if buildcfg.GOPPC64 >= 10 {
brev_arch = append(brev_arch, sys.PPC64)
}
/******** runtime/internal/sys ********/
addF("runtime/internal/sys", "Bswap32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
brev_arch...)
addF("runtime/internal/sys", "Bswap64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
},
sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
brev_arch...)
/****** Prefetch ******/
makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
@ -4537,7 +4543,16 @@ func InitTables() {
alias("math/bits", "ReverseBytes64", "runtime/internal/sys", "Bswap64", all...)
alias("math/bits", "ReverseBytes32", "runtime/internal/sys", "Bswap32", all...)
// ReverseBytes inlines correctly, no need to intrinsify it.
// ReverseBytes16 lowers to a rotate, no need for anything special here.
// Nothing special is needed for targets where ReverseBytes16 lowers to a rotate
// On Power10, 16-bit rotate is not available so use BRH instruction
if buildcfg.GOPPC64 >= 10 {
addF("math/bits", "ReverseBytes16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0])
},
sys.PPC64)
}
addF("math/bits", "Len64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])

View File

@ -198,6 +198,7 @@ func ReverseBytes64(n uint64) uint64 {
// amd64:"BSWAPQ"
// s390x:"MOVDBR"
// arm64:"REV"
// ppc64x/power10: "BRD"
return bits.ReverseBytes64(n)
}
@ -205,6 +206,7 @@ func ReverseBytes32(n uint32) uint32 {
// amd64:"BSWAPL"
// s390x:"MOVWBR"
// arm64:"REVW"
// ppc64x/power10: "BRW"
return bits.ReverseBytes32(n)
}
@ -214,6 +216,7 @@ func ReverseBytes16(n uint16) uint16 {
// arm/5:"SLL","SRL","ORR"
// arm/6:"REV16"
// arm/7:"REV16"
// ppc64x/power10: "BRH"
return bits.ReverseBytes16(n)
}

View File

@ -1649,8 +1649,8 @@ var (
"loong64": {},
"mips": {"GOMIPS", "hardfloat", "softfloat"},
"mips64": {"GOMIPS64", "hardfloat", "softfloat"},
"ppc64": {"GOPPC64", "power8", "power9"},
"ppc64le": {"GOPPC64", "power8", "power9"},
"ppc64": {"GOPPC64", "power8", "power9", "power10"},
"ppc64le": {"GOPPC64", "power8", "power9", "power10"},
"ppc64x": {}, // A pseudo-arch representing both ppc64 and ppc64le
"s390x": {},
"wasm": {},