1
0
mirror of https://github.com/golang/go synced 2024-09-30 13:18:34 -06:00

cmd/compile/internal: intrinsify publicationBarrier on riscv64

This enables publicationBarrier to be used as an intrinsic
on riscv64, optimizing the required function call and return
instructions for invoking the "runtime.publicationBarrier"
function.

This function is called by mallocgc. The benchmark results for malloc tested on Lichee-Pi-4A(TH1520, RISC-V 2.0G C910 x4) are as follows.

goos: linux
goarch: riscv64
pkg: runtime
                    │   old.txt   │              new.txt               │
                    │   sec/op    │   sec/op     vs base               │
Malloc8-4             92.78n ± 1%   90.77n ± 1%  -2.17% (p=0.001 n=10)
Malloc16-4            156.5n ± 1%   151.7n ± 2%  -3.10% (p=0.000 n=10)
MallocTypeInfo8-4     131.7n ± 1%   130.6n ± 2%       ~ (p=0.165 n=10)
MallocTypeInfo16-4    186.5n ± 2%   186.2n ± 1%       ~ (p=0.956 n=10)
MallocLargeStruct-4   1.345µ ± 1%   1.355µ ± 1%       ~ (p=0.093 n=10)
geomean               216.9n        214.5n       -1.10%


Change-Id: Ieab6c02309614bac5c1b12b5ee3311f988ff644d
Reviewed-on: https://go-review.googlesource.com/c/go/+/531719
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: M Zhuo <mzh@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Joel Sing <joel@sing.id.au>
This commit is contained in:
Xianmiao Qu 2023-09-30 16:12:34 +08:00 committed by Gopher Robot
parent 1b5cfc6ca6
commit d98f74b31e
6 changed files with 22 additions and 1 deletions

View File

@ -695,6 +695,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Sym = ir.Syms.Duffcopy
p.To.Offset = v.AuxInt
case ssa.OpRISCV64LoweredPubBarrier:
// FENCE
s.Prog(v.Op.Asm())
case ssa.OpRISCV64LoweredRound32F, ssa.OpRISCV64LoweredRound64F:
// input is already rounded

View File

@ -412,6 +412,9 @@
// Write barrier.
(WB ...) => (LoweredWB ...)
// Publication barrier as intrinsic
(PubBarrier ...) => (LoweredPubBarrier ...)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)

View File

@ -399,6 +399,9 @@ func init() {
// Returns a pointer to a write barrier buffer in X24.
{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ (gpMask | regNamed["g"])) | regNamed["X1"], outputs: []regMask{regNamed["X24"]}}, clobberFlags: true, aux: "Int64"},
// Do data barrier. arg0=memorys
{name: "LoweredPubBarrier", argLength: 1, asm: "FENCE", hasSideEffects: true},
// There are three of these functions so that they can have three different register inputs.
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
// default registers to match so we don't need to copy registers around unnecessarily.

View File

@ -2429,6 +2429,7 @@ const (
OpRISCV64LoweredGetCallerSP
OpRISCV64LoweredGetCallerPC
OpRISCV64LoweredWB
OpRISCV64LoweredPubBarrier
OpRISCV64LoweredPanicBoundsA
OpRISCV64LoweredPanicBoundsB
OpRISCV64LoweredPanicBoundsC
@ -32583,6 +32584,13 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredPubBarrier",
argLen: 1,
hasSideEffects: true,
asm: riscv.AFENCE,
reg: regInfo{},
},
{
name: "LoweredPanicBoundsA",
auxType: auxInt64,

View File

@ -432,6 +432,9 @@ func rewriteValueRISCV64(v *Value) bool {
return true
case OpPanicBounds:
return rewriteValueRISCV64_OpPanicBounds(v)
case OpPubBarrier:
v.Op = OpRISCV64LoweredPubBarrier
return true
case OpRISCV64ADD:
return rewriteValueRISCV64_OpRISCV64ADD(v)
case OpRISCV64ADDI:

View File

@ -4147,7 +4147,7 @@ func InitTables() {
s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
return nil
},
sys.ARM64, sys.PPC64)
sys.ARM64, sys.PPC64, sys.RISCV64)
brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X}
if buildcfg.GOPPC64 >= 10 {