mirror of
https://github.com/golang/go
synced 2024-11-11 18:21:40 -07:00
cmd/compiler,internal/runtime/atomic: optimize Load{64,32,8} on loong64
The LoadAcquire barrier on Loong64 is "dbar 0x14", using the correct barrier in Load{8,32,64} implementation can improve performance. goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A6000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | AtomicLoad64 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad64-2 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad64-4 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad 17.220n ± 0% 4.402n ± 0% -74.44% (p=0.000 n=20) AtomicLoad-2 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad-4 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad8 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad8-2 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) AtomicLoad8-4 17.210n ± 0% 4.402n ± 0% -74.42% (p=0.000 n=20) geomean 17.21n 4.402n -74.42% goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | AtomicLoad64 18.82n ± 0% 10.41n ± 0% -44.69% (p=0.000 n=20) AtomicLoad64-2 18.81n ± 0% 10.41n ± 0% -44.66% (p=0.000 n=20) AtomicLoad64-4 18.82n ± 0% 10.41n ± 0% -44.69% (p=0.000 n=20) AtomicLoad 18.81n ± 0% 10.41n ± 0% -44.66% (p=0.000 n=20) AtomicLoad-2 18.82n ± 0% 10.41n ± 0% -44.69% (p=0.000 n=20) AtomicLoad-4 18.81n ± 0% 10.42n ± 0% -44.63% (p=0.000 n=20) AtomicLoad8 18.82n ± 0% 10.41n ± 0% -44.69% (p=0.000 n=20) AtomicLoad8-2 18.82n ± 0% 10.41n ± 0% -44.70% (p=0.000 n=20) AtomicLoad8-4 18.82n ± 0% 10.41n ± 0% -44.69% (p=0.000 n=20) geomean 18.82n 10.41n -44.68% Change-Id: I9d47c9d6f359c4f2e41035ca656429aade2e7847 Reviewed-on: https://go-review.googlesource.com/c/go/+/581357 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
01ab9a016a
commit
3214129a83
@ -468,6 +468,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
|
||||
s.UseArgs(16) // space used in callee args area by assembly stubs
|
||||
case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
|
||||
// MOVB (Rarg0), Rout
|
||||
// DBAR 0x14
|
||||
as := loong64.AMOVV
|
||||
switch v.Op {
|
||||
case ssa.OpLOONG64LoweredAtomicLoad8:
|
||||
@ -475,13 +477,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
case ssa.OpLOONG64LoweredAtomicLoad32:
|
||||
as = loong64.AMOVW
|
||||
}
|
||||
s.Prog(loong64.ADBAR)
|
||||
p := s.Prog(as)
|
||||
p.From.Type = obj.TYPE_MEM
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg0()
|
||||
s.Prog(loong64.ADBAR)
|
||||
p1 := s.Prog(loong64.ADBAR)
|
||||
p1.From.Type = obj.TYPE_CONST
|
||||
p1.From.Offset = 0x14
|
||||
|
||||
case ssa.OpLOONG64LoweredAtomicStore8, ssa.OpLOONG64LoweredAtomicStore32, ssa.OpLOONG64LoweredAtomicStore64:
|
||||
as := loong64.AMOVV
|
||||
switch v.Op {
|
||||
|
@ -319,38 +319,30 @@ TEXT ·Oruintptr(SB), NOSPLIT, $0-24
|
||||
// uint32 internal∕runtime∕atomic·Load(uint32 volatile* ptr)
|
||||
TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
|
||||
MOVV ptr+0(FP), R19
|
||||
DBAR
|
||||
MOVWU 0(R19), R19
|
||||
DBAR
|
||||
DBAR $0x14 // LoadAcquire barrier
|
||||
MOVW R19, ret+8(FP)
|
||||
RET
|
||||
|
||||
// uint8 internal∕runtime∕atomic·Load8(uint8 volatile* ptr)
|
||||
TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
|
||||
MOVV ptr+0(FP), R19
|
||||
DBAR
|
||||
MOVBU 0(R19), R19
|
||||
DBAR
|
||||
DBAR $0x14
|
||||
MOVB R19, ret+8(FP)
|
||||
RET
|
||||
|
||||
// uint64 internal∕runtime∕atomic·Load64(uint64 volatile* ptr)
|
||||
TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVV ptr+0(FP), R19
|
||||
DBAR
|
||||
MOVV 0(R19), R19
|
||||
DBAR
|
||||
DBAR $0x14
|
||||
MOVV R19, ret+8(FP)
|
||||
RET
|
||||
|
||||
// void *internal∕runtime∕atomic·Loadp(void *volatile *ptr)
|
||||
TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVV ptr+0(FP), R19
|
||||
DBAR
|
||||
MOVV 0(R19), R19
|
||||
DBAR
|
||||
MOVV R19, ret+8(FP)
|
||||
RET
|
||||
JMP ·Load64(SB)
|
||||
|
||||
// uint32 internal∕runtime∕atomic·LoadAcq(uint32 volatile* ptr)
|
||||
TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
|
||||
|
@ -43,6 +43,14 @@ func BenchmarkAtomicStore(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAtomicLoad8(b *testing.B) {
|
||||
var x uint8
|
||||
sink = &x
|
||||
for i := 0; i < b.N; i++ {
|
||||
atomic.Load8(&x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnd8(b *testing.B) {
|
||||
var x [512]uint8 // give byte its own cache line
|
||||
sink = &x
|
||||
|
Loading…
Reference in New Issue
Block a user