1
0
mirror of https://github.com/golang/go synced 2024-11-25 00:57:59 -07:00

cmd/compile,cmd/go,cmd/internal,runtime: remove dynamic checks for atomics for ARM64 targets that support LSE

Remove dynamic checks for atomic instructions for ARM64 targets that support LSE extension.

For #66131

Change-Id: I0ec1b183a3f4ea4c8a537430646e6bc4b4f64271
Reviewed-on: https://go-review.googlesource.com/c/go/+/569536
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Fannie Zhang <Fannie.Zhang@arm.com>
Reviewed-by: Shu-Chun Weng <scw@google.com>
This commit is contained in:
Andrey Bokhanko 2024-03-06 17:44:03 +03:00 committed by Cherry Mui
parent aa1b50e179
commit 0ae8468b20
8 changed files with 143 additions and 32 deletions

View File

@ -4387,31 +4387,35 @@ func InitTables() {
makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.Kind, emit atomicOpEmitter) intrinsicBuilder {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
// Target Atomic feature is identified by dynamic detection
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
v := s.load(types.Types[types.TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely
if buildcfg.GOARM64.LSE {
emit(s, n, args, op1, typ)
} else {
// Target Atomic feature is identified by dynamic detection
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
v := s.load(types.Types[types.TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely
// We have atomic instructions - use it directly.
s.startBlock(bTrue)
emit(s, n, args, op1, typ)
s.endBlock().AddEdgeTo(bEnd)
// We have atomic instructions - use it directly.
s.startBlock(bTrue)
emit(s, n, args, op1, typ)
s.endBlock().AddEdgeTo(bEnd)
// Use original instruction sequence.
s.startBlock(bFalse)
emit(s, n, args, op0, typ)
s.endBlock().AddEdgeTo(bEnd)
// Use original instruction sequence.
s.startBlock(bFalse)
emit(s, n, args, op0, typ)
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
// Merge results.
s.startBlock(bEnd)
}
if rtyp == types.TNIL {
return nil
} else {

View File

@ -402,6 +402,7 @@ var (
// Used in envcmd.MkEnv and build ID computations.
GOARM = envOr("GOARM", fmt.Sprint(buildcfg.GOARM))
GOARM64 = envOr("GOARM64", fmt.Sprint(buildcfg.GOARM64))
GO386 = envOr("GO386", buildcfg.GO386)
GOAMD64 = envOr("GOAMD64", fmt.Sprintf("%s%d", "v", buildcfg.GOAMD64))
GOMIPS = envOr("GOMIPS", buildcfg.GOMIPS)
@ -429,6 +430,8 @@ func GetArchEnv() (key, val string) {
switch Goarch {
case "arm":
return "GOARM", GOARM
case "arm64":
return "GOARM64", GOARM64
case "386":
return "GO386", GO386
case "amd64":

View File

@ -8,6 +8,7 @@ import (
"bufio"
"bytes"
"fmt"
"internal/buildcfg"
"internal/platform"
"io"
"log"
@ -378,6 +379,13 @@ func asmArgs(a *Action, p *load.Package) []any {
}
}
if cfg.Goarch == "arm64" {
g, err := buildcfg.ParseGoarm64(cfg.GOARM64)
if err == nil && g.LSE {
args = append(args, "-D", "GOARM64_LSE")
}
}
return args
}

View File

@ -1459,7 +1459,19 @@ var (
// Regexp to extract an architecture check: architecture name (or triplet),
// followed by semi-colon, followed by a comma-separated list of opcode checks.
// Extraneous spaces are ignored.
rxAsmPlatform = regexp.MustCompile(`(\w+)(/\w+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
//
// An example: arm64/v8.1 : -`ADD` , `SUB`
// "(\w+)" matches "arm64" (architecture name)
// "(/[\w.]+)?" matches "v8.1" (architecture version)
// "(/\w*)?" doesn't match anything here (it's an optional part of the triplet)
// "\s*:\s*" matches " : " (semi-colon)
// "(" starts a capturing group
// first reMatchCheck matches "-`ADD`"
// `(?:" starts a non-capturing group
// "\s*,\s*` matches " , "
// second reMatchCheck matches "`SUB`"
// ")*)" closes started groups; "*" means that there might be other elements in the comma-separated list
rxAsmPlatform = regexp.MustCompile(`(\w+)(/[\w.]+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
// Regexp to extract a single opcoded check
rxAsmCheck = regexp.MustCompile(reMatchCheck)
@ -1471,7 +1483,7 @@ var (
"386": {"GO386", "sse2", "softfloat"},
"amd64": {"GOAMD64", "v1", "v2", "v3", "v4"},
"arm": {"GOARM", "5", "6", "7", "7,softfloat"},
"arm64": {},
"arm64": {"GOARM64", "v8.0", "v8.1"},
"loong64": {},
"mips": {"GOMIPS", "hardfloat", "softfloat"},
"mips64": {"GOMIPS64", "hardfloat", "softfloat"},

View File

@ -127,7 +127,7 @@ func goarm() (g goarmFeatures) {
return
}
type goarm64Features struct {
type Goarm64Features struct {
Version string
// Large Systems Extension
LSE bool
@ -139,7 +139,7 @@ type goarm64Features struct {
Crypto bool
}
func (g goarm64Features) String() string {
func (g Goarm64Features) String() string {
arm64Str := g.Version
if g.LSE {
arm64Str += ",lse"
@ -150,7 +150,7 @@ func (g goarm64Features) String() string {
return arm64Str
}
func parseGoarm64(v string) (g goarm64Features) {
func ParseGoarm64(v string) (g Goarm64Features, e error) {
const (
lseOpt = ",lse"
cryptoOpt = ",crypto"
@ -184,7 +184,7 @@ func parseGoarm64(v string) (g goarm64Features) {
// LSE extension is mandatory starting from 8.1
g.LSE = true
default:
Error = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
lseOpt, cryptoOpt)
g.Version = defaultGOARM64
}
@ -192,13 +192,14 @@ func parseGoarm64(v string) (g goarm64Features) {
return
}
func goarm64() goarm64Features {
return parseGoarm64(envOr("GOARM64", defaultGOARM64))
func goarm64() (g Goarm64Features) {
g, Error = ParseGoarm64(envOr("GOARM64", defaultGOARM64))
return
}
// Returns true if g supports giving ARM64 ISA
// Note that this function doesn't accept / test suffixes (like ",lse" or ",crypto")
func (g goarm64Features) Supports(s string) bool {
func (g Goarm64Features) Supports(s string) bool {
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
if len(s) != 4 {
return false

View File

@ -75,7 +75,7 @@ func TestConfigFlags(t *testing.T) {
}
func TestGoarm64FeaturesSupports(t *testing.T) {
g := parseGoarm64("v9.3")
g, _ := ParseGoarm64("v9.3")
if !g.Supports("v9.3") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.3")

View File

@ -128,17 +128,21 @@ TEXT ·Store64(SB), NOSPLIT, $0-16
TEXT ·Xchg(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW new+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
SWPALW R1, (R0), R2
MOVW R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
STLXRW R1, (R0), R3
CBNZ R3, load_store_loop
MOVW R2, ret+16(FP)
RET
#endif
// uint64 Xchg64(ptr *uint64, new uint64)
// Atomically:
@ -148,17 +152,21 @@ load_store_loop:
TEXT ·Xchg64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD new+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
SWPALD R1, (R0), R2
MOVD R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
STLXR R1, (R0), R3
CBNZ R3, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
// bool Cas(uint32 *ptr, uint32 old, uint32 new)
// Atomically:
@ -171,14 +179,17 @@ TEXT ·Cas(SB), NOSPLIT, $0-17
MOVD ptr+0(FP), R0
MOVW old+8(FP), R1
MOVW new+12(FP), R2
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MOVD R1, R3
CASALW R3, (R0), R2
CMP R1, R3
CSET EQ, R0
MOVB R0, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R3
CMPW R1, R3
@ -189,6 +200,7 @@ ok:
CSET EQ, R0
MOVB R0, ret+16(FP)
RET
#endif
// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
// Atomically:
@ -202,14 +214,17 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25
MOVD ptr+0(FP), R0
MOVD old+8(FP), R1
MOVD new+16(FP), R2
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MOVD R1, R3
CASALD R3, (R0), R2
CMP R1, R3
CSET EQ, R0
MOVB R0, ret+24(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R3
CMP R1, R3
@ -220,6 +235,7 @@ ok:
CSET EQ, R0
MOVB R0, ret+24(FP)
RET
#endif
// uint32 xadd(uint32 volatile *ptr, int32 delta)
// Atomically:
@ -228,12 +244,15 @@ ok:
TEXT ·Xadd(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW delta+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDADDALW R1, (R0), R2
ADD R1, R2
MOVW R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ADDW R2, R1, R2
@ -241,6 +260,7 @@ load_store_loop:
CBNZ R3, load_store_loop
MOVW R2, ret+16(FP)
RET
#endif
// uint64 Xadd64(uint64 volatile *ptr, int64 delta)
// Atomically:
@ -249,12 +269,15 @@ load_store_loop:
TEXT ·Xadd64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD delta+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDADDALD R1, (R0), R2
ADD R1, R2
MOVD R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
ADD R2, R1, R2
@ -262,6 +285,7 @@ load_store_loop:
CBNZ R3, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
TEXT ·Xchgint32(SB), NOSPLIT, $0-20
B ·Xchg(SB)
@ -275,72 +299,91 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
TEXT ·And8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R0
MOVB val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MVN R1, R2
LDCLRALB R2, (R0), R3
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRB (R0), R2
AND R1, R2
STLXRB R2, (R0), R3
CBNZ R3, load_store_loop
RET
#endif
TEXT ·Or8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R0
MOVB val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDORALB R1, (R0), R2
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRB (R0), R2
ORR R1, R2
STLXRB R2, (R0), R3
CBNZ R3, load_store_loop
RET
#endif
// func And(addr *uint32, v uint32)
TEXT ·And(SB), NOSPLIT, $0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MVN R1, R2
LDCLRALW R2, (R0), R3
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
AND R1, R2
STLXRW R2, (R0), R3
CBNZ R3, load_store_loop
RET
#endif
// func Or(addr *uint32, v uint32)
TEXT ·Or(SB), NOSPLIT, $0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDORALW R1, (R0), R2
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ORR R1, R2
STLXRW R2, (R0), R3
CBNZ R3, load_store_loop
RET
#endif
// func Or32(addr *uint32, v uint32) old uint32
TEXT ·Or32(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDORALW R1, (R0), R2
MOVD R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ORR R1, R2, R3
@ -348,17 +391,21 @@ load_store_loop:
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
// func And32(addr *uint32, v uint32) old uint32
TEXT ·And32(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MVN R1, R2
LDCLRALW R2, (R0), R3
MOVD R3, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
AND R1, R2, R3
@ -366,16 +413,20 @@ load_store_loop:
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
// func Or64(addr *uint64, v uint64) old uint64
TEXT ·Or64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
LDORALD R1, (R0), R2
MOVD R2, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
ORR R1, R2, R3
@ -383,17 +434,21 @@ load_store_loop:
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
// func And64(addr *uint64, v uint64) old uint64
TEXT ·And64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
#ifndef GOARM64_LSE
MOVBU internalcpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
#endif
MVN R1, R2
LDCLRALD R2, (R0), R3
MOVD R3, ret+16(FP)
RET
#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
AND R1, R2, R3
@ -401,6 +456,7 @@ load_store_loop:
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
#endif
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Anduintptr(SB), NOSPLIT, $0-24

27
test/codegen/atomics.go Normal file
View File

@ -0,0 +1,27 @@
// asmcheck
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// These tests check that atomic instructions without dynamic checks are
// generated for architectures that support them
package codegen
import "sync/atomic"
type Counter struct {
count int32
}
func (c *Counter) Increment() {
// Check that ARm64 v8.0 has both atomic instruction (LDADDALW) and a dynamic check
// (for arm64HasATOMICS), while ARM64 v8.1 has only atomic and no dynamic check.
// arm64/v8.0:"LDADDALW"
// arm64/v8.1:"LDADDALW"
// arm64/v8.0:".*arm64HasATOMICS"
// arm64/v8.1:-".*arm64HasATOMICS"
atomic.AddInt32(&c.count, 1)
}