mirror of
https://github.com/golang/go
synced 2024-11-25 10:17:57 -07:00
cmd/compile: wire up Bswap/ReverseBytes intrinsics for loong64
Micro-benchmark results on Loongson 3A5000 and 3A6000: goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | ReverseBytes 2.0020n ± 0% 0.4040n ± 0% -79.82% (p=0.000 n=20) ReverseBytes16 0.8866n ± 1% 0.8007n ± 0% -9.69% (p=0.000 n=20) ReverseBytes32 1.2195n ± 0% 0.8007n ± 0% -34.34% (p=0.000 n=20) ReverseBytes64 2.0705n ± 0% 0.8008n ± 0% -61.32% (p=0.000 n=20) geomean 1.455n 0.6749n -53.62% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | ReverseBytes 2.8040n ± 0% 0.5205n ± 0% -81.44% (p=0.000 n=20) ReverseBytes16 0.7066n ± 0% 0.8011n ± 0% +13.37% (p=0.000 n=20) ReverseBytes32 1.5500n ± 0% 0.8010n ± 0% -48.32% (p=0.000 n=20) ReverseBytes64 2.7665n ± 0% 0.8010n ± 0% -71.05% (p=0.000 n=20) geomean 1.707n 0.7192n -57.87% Updates #59120 This patch is a copy of CL 483357. Co-authored-by: WANG Xuerui <git@xen0n.name> Change-Id: If355354cd031533df91991fcc3392e5a6c314295 Reviewed-on: https://go-review.googlesource.com/c/go/+/624576 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org>
This commit is contained in:
parent
d98c51809d
commit
d6fb0ab2c7
@ -487,6 +487,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
ssa.OpLOONG64CLZV,
|
||||
ssa.OpLOONG64SQRTD,
|
||||
ssa.OpLOONG64SQRTF,
|
||||
ssa.OpLOONG64REVB2H,
|
||||
ssa.OpLOONG64REVB2W,
|
||||
ssa.OpLOONG64REVBV,
|
||||
ssa.OpLOONG64ABSD:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
|
@ -147,6 +147,7 @@
|
||||
|
||||
(BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
|
||||
(BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
|
||||
(Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...)
|
||||
|
||||
// math package intrinsics
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
|
@ -202,6 +202,10 @@ func init() {
|
||||
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
|
||||
{name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)
|
||||
|
||||
{name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
|
||||
{name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
|
||||
{name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
|
||||
|
||||
{name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
|
||||
{name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
|
||||
{name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
|
||||
|
@ -1790,6 +1790,9 @@ const (
|
||||
OpLOONG64SQRTF
|
||||
OpLOONG64CLZW
|
||||
OpLOONG64CLZV
|
||||
OpLOONG64REVB2H
|
||||
OpLOONG64REVB2W
|
||||
OpLOONG64REVBV
|
||||
OpLOONG64FMINF
|
||||
OpLOONG64FMIND
|
||||
OpLOONG64FMAXF
|
||||
@ -24012,6 +24015,45 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2H",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2H,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2W",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2W,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVBV",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVBV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMINF",
|
||||
argLen: 2,
|
||||
|
@ -94,6 +94,15 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return rewriteValueLOONG64_OpBitLen32(v)
|
||||
case OpBitLen64:
|
||||
return rewriteValueLOONG64_OpBitLen64(v)
|
||||
case OpBswap16:
|
||||
v.Op = OpLOONG64REVB2H
|
||||
return true
|
||||
case OpBswap32:
|
||||
v.Op = OpLOONG64REVB2W
|
||||
return true
|
||||
case OpBswap64:
|
||||
v.Op = OpLOONG64REVBV
|
||||
return true
|
||||
case OpClosureCall:
|
||||
v.Op = OpLOONG64CALLclosure
|
||||
return true
|
||||
|
@ -183,7 +183,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
},
|
||||
all...)
|
||||
|
||||
brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X}
|
||||
brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X}
|
||||
if cfg.goppc64 >= 10 {
|
||||
// Use only on Power10 as the new byte reverse instructions that Power10 provide
|
||||
// make it worthwhile as an intrinsic
|
||||
@ -804,6 +804,11 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
sys.S390X)
|
||||
alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
|
||||
alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
|
||||
addF("math/bits", "ReverseBytes16",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
|
||||
},
|
||||
sys.Loong64)
|
||||
// ReverseBytes inlines correctly, no need to intrinsify it.
|
||||
// Nothing special is needed for targets where ReverseBytes16 lowers to a rotate
|
||||
// On Power10, 16-bit rotate is not available so use BRH instruction
|
||||
|
@ -390,6 +390,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "internal/runtime/math", "Add64"}: struct{}{},
|
||||
{"loong64", "internal/runtime/math", "Mul64"}: struct{}{},
|
||||
{"loong64", "internal/runtime/math", "MulUintptr"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "Bswap32"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "Bswap64"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "GetCallerPC"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "GetCallerSP"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
|
||||
@ -411,6 +413,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "math/bits", "RotateLeft"}: struct{}{},
|
||||
{"loong64", "math/bits", "RotateLeft32"}: struct{}{},
|
||||
{"loong64", "math/bits", "RotateLeft64"}: struct{}{},
|
||||
{"loong64", "math/bits", "ReverseBytes16"}: struct{}{},
|
||||
{"loong64", "math/bits", "ReverseBytes32"}: struct{}{},
|
||||
{"loong64", "math/bits", "ReverseBytes64"}: struct{}{},
|
||||
{"loong64", "math/bits", "Sub"}: struct{}{},
|
||||
{"loong64", "math/bits", "Sub64"}: struct{}{},
|
||||
{"loong64", "runtime", "KeepAlive"}: struct{}{},
|
||||
|
@ -208,6 +208,7 @@ func ReverseBytes(n uint) uint {
|
||||
// 386:"BSWAPL"
|
||||
// s390x:"MOVDBR"
|
||||
// arm64:"REV"
|
||||
// loong64:"REVBV"
|
||||
return bits.ReverseBytes(n)
|
||||
}
|
||||
|
||||
@ -217,6 +218,7 @@ func ReverseBytes64(n uint64) uint64 {
|
||||
// s390x:"MOVDBR"
|
||||
// arm64:"REV"
|
||||
// ppc64x/power10: "BRD"
|
||||
// loong64:"REVBV"
|
||||
return bits.ReverseBytes64(n)
|
||||
}
|
||||
|
||||
@ -225,6 +227,7 @@ func ReverseBytes32(n uint32) uint32 {
|
||||
// 386:"BSWAPL"
|
||||
// s390x:"MOVWBR"
|
||||
// arm64:"REVW"
|
||||
// loong64:"REVB2W"
|
||||
// ppc64x/power10: "BRW"
|
||||
return bits.ReverseBytes32(n)
|
||||
}
|
||||
@ -235,6 +238,7 @@ func ReverseBytes16(n uint16) uint16 {
|
||||
// arm/5:"SLL","SRL","ORR"
|
||||
// arm/6:"REV16"
|
||||
// arm/7:"REV16"
|
||||
// loong64:"REVB2H"
|
||||
// ppc64x/power10: "BRH"
|
||||
return bits.ReverseBytes16(n)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user