From 583d750fa119d504686c737be6a898994b674b69 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Sat, 2 Nov 2024 15:40:13 +0800 Subject: [PATCH] cmd/compile: wire up bits.Reverse intrinsics for loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Micro-benchmark results on Loongson 3A5000 and 3A6000: goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000 @ 2500.00MHz | CL 624576 | this CL | | sec/op | sec/op vs base | Reverse 2.8130n ± 0% 0.8008n ± 0% -71.53% (p=0.000 n=20) Reverse8 0.7014n ± 0% 0.4040n ± 0% -42.40% (p=0.000 n=20) Reverse16 1.2975n ± 0% 0.6632n ± 1% -48.89% (p=0.000 n=20) Reverse32 2.7520n ± 0% 0.4042n ± 0% -85.31% (p=0.000 n=20) Reverse64 2.8970n ± 0% 0.4041n ± 0% -86.05% (p=0.000 n=20) geomean 1.828n 0.5116n -72.01% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | CL 624576 | this CL | | sec/op | sec/op vs base | Reverse 4.0050n ± 0% 0.8011n ± 0% -80.00% (p=0.000 n=20) Reverse8 0.8010n ± 0% 0.5210n ± 1% -34.96% (p=0.000 n=20) Reverse16 1.6160n ± 0% 0.6008n ± 0% -62.82% (p=0.000 n=20) Reverse32 3.8550n ± 0% 0.5179n ± 0% -86.57% (p=0.000 n=20) Reverse64 3.8050n ± 0% 0.5177n ± 0% -86.40% (p=0.000 n=20) geomean 2.378n 0.5828n -75.49% Updates #59120 This patch is a copy of CL 483656. Co-authored-by: WANG Xuerui Change-Id: I98681091763279279c8404bd0295785f13ea1c8e Reviewed-on: https://go-review.googlesource.com/c/go/+/624276 Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Reviewed-by: David Chase --- src/cmd/compile/internal/loong64/ssa.go | 3 ++ .../compile/internal/ssa/_gen/LOONG64.rules | 4 ++ .../compile/internal/ssa/_gen/LOONG64Ops.go | 4 ++ src/cmd/compile/internal/ssa/opGen.go | 42 +++++++++++++++++++ .../compile/internal/ssa/rewriteLOONG64.go | 26 ++++++++++++ src/cmd/compile/internal/ssagen/intrinsics.go | 10 ++--- .../internal/ssagen/intrinsics_test.go | 5 +++ test/codegen/mathbits.go | 29 +++++++++++++ 8 files changed, 118 insertions(+), 5 deletions(-) diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index e7cb82a280a..4c9bcfe46ed 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -516,6 +516,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpLOONG64REVB2H, ssa.OpLOONG64REVB2W, ssa.OpLOONG64REVBV, + ssa.OpLOONG64BITREV4B, + ssa.OpLOONG64BITREVW, + ssa.OpLOONG64BITREVV, ssa.OpLOONG64ABSD: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index bac1f27b1dc..6ff98a46f75 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -148,6 +148,10 @@ (BitLen64 x) => (NEGV (SUBVconst [64] (CLZV x))) (BitLen32 x) => (NEGV (SUBVconst [32] (CLZW x))) (Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...) +(BitRev8 ...) => (BITREV4B ...) +(BitRev16 x) => (REVB2H (BITREV4B x)) +(BitRev32 ...) => (BITREVW ...) +(BitRev64 ...) => (BITREVV ...) // math package intrinsics (Sqrt ...) => (SQRTD ...) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index 0da0bb82279..a8a38ee7b8c 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -216,6 +216,10 @@ func init() { {name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655 {name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211 + {name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0] + {name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0] + {name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0] + {name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32 {name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64 {name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index f1006a3f3ca..ae0e87702aa 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1801,6 +1801,9 @@ const ( OpLOONG64REVB2H OpLOONG64REVB2W OpLOONG64REVBV + OpLOONG64BITREV4B + OpLOONG64BITREVW + OpLOONG64BITREVV OpLOONG64FMINF OpLOONG64FMIND OpLOONG64FMAXF @@ -24201,6 +24204,45 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "BITREV4B", + argLen: 1, + asm: loong64.ABITREV4B, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { + name: "BITREVW", + argLen: 1, + asm: loong64.ABITREVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { + name: "BITREVV", + argLen: 1, + asm: loong64.ABITREVV, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "FMINF", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 88c5036b541..779ec89134e 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -125,6 +125,17 @@ func rewriteValueLOONG64(v *Value) bool { return rewriteValueLOONG64_OpBitLen32(v) case OpBitLen64: return rewriteValueLOONG64_OpBitLen64(v) + case OpBitRev16: + return rewriteValueLOONG64_OpBitRev16(v) + case OpBitRev32: + v.Op = OpLOONG64BITREVW + return true + case OpBitRev64: + v.Op = OpLOONG64BITREVV + return true + case OpBitRev8: + v.Op = OpLOONG64BITREV4B + return true case OpBswap16: v.Op = OpLOONG64REVB2H return true @@ -975,6 +986,21 @@ func rewriteValueLOONG64_OpBitLen64(v *Value) bool { return true } } +func rewriteValueLOONG64_OpBitRev16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (BitRev16 x) + // result: (REVB2H (BITREV4B x)) + for { + t := v.Type + x := v_0 + v.reset(OpLOONG64REVB2H) + v0 := b.NewValue0(v.Pos, OpLOONG64BITREV4B, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} func rewriteValueLOONG64_OpCom16(v *Value) bool { v_0 := v.Args[0] b := v.Block diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 33345e9296e..db335ee8b38 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -946,27 +946,27 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) }, - sys.ARM64) + sys.ARM64, sys.Loong64) addF("math/bits", "Reverse32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0]) }, - sys.ARM64) + sys.ARM64, sys.Loong64) addF("math/bits", "Reverse16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0]) }, - sys.ARM64) + sys.ARM64, sys.Loong64) addF("math/bits", "Reverse8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0]) }, - sys.ARM64) + sys.ARM64, sys.Loong64) addF("math/bits", "Reverse", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) }, - sys.ARM64) + sys.ARM64, sys.Loong64) addF("math/bits", "RotateLeft8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1]) diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index a8656cc3d4b..ca9e1c9e0ac 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -421,6 +421,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"loong64", "math/bits", "Len16"}: struct{}{}, {"loong64", "math/bits", "Len32"}: struct{}{}, {"loong64", "math/bits", "Len64"}: struct{}{}, + {"loong64", "math/bits", "Reverse"}: struct{}{}, + {"loong64", "math/bits", "Reverse8"}: struct{}{}, + {"loong64", "math/bits", "Reverse16"}: struct{}{}, + {"loong64", "math/bits", "Reverse32"}: struct{}{}, + {"loong64", "math/bits", "Reverse64"}: struct{}{}, {"loong64", "math/bits", "RotateLeft"}: struct{}{}, {"loong64", "math/bits", "RotateLeft32"}: struct{}{}, {"loong64", "math/bits", "RotateLeft64"}: struct{}{}, diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 715f67a3c8b..a3d1143424b 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -199,6 +199,35 @@ func OnesCount8(n uint8) int { return bits.OnesCount8(n) } +// ------------------ // +// bits.Reverse // +// ------------------ // + +func Reverse(n uint) uint { + // loong64:"BITREVV" + return bits.Reverse(n) +} + +func Reverse64(n uint64) uint64 { + // loong64:"BITREVV" + return bits.Reverse64(n) +} + +func Reverse32(n uint32) uint32 { + // loong64:"BITREVW" + return bits.Reverse32(n) +} + +func Reverse16(n uint16) uint16 { + // loong64:"BITREV4B","REVB2H" + return bits.Reverse16(n) +} + +func Reverse8(n uint8) uint8 { + // loong64:"BITREV4B" + return bits.Reverse8(n) +} + // ----------------------- // // bits.ReverseBytes // // ----------------------- //