1
0
mirror of https://github.com/golang/go synced 2024-11-23 07:10:05 -07:00

[dev.regabi] cmd/compile, runtime: reserve R14 as g registers on AMD64

This is a proof-of-concept change for using the g register on
AMD64. getg is now lowered to R14 in the new ABI. The g register
is not yet used in all places where it can be used (e.g. stack
bounds check, runtime assembly code).

Change-Id: I10123ddf38e31782cf58bafcdff170aee0ff0d1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/289196
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Cherry Zhang 2021-02-02 18:20:16 -05:00
parent a21de9ec73
commit 5d7dc53888
10 changed files with 1113 additions and 1075 deletions

View File

@ -166,6 +166,34 @@ func duff(size int64) (int64, int64) {
return off, adj
}
func getgFromTLS(s *ssagen.State, r int16) {
// See the comments in cmd/internal/obj/x86/obj6.go
// near CanUse1InsnTLS for a detailed explanation of these instructions.
if x86.CanUse1InsnTLS(base.Ctxt) {
// MOVQ (TLS), r
p := s.Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_MEM
p.From.Reg = x86.REG_TLS
p.To.Type = obj.TYPE_REG
p.To.Reg = r
} else {
// MOVQ TLS, r
// MOVQ (r)(TLS*1), r
p := s.Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_TLS
p.To.Type = obj.TYPE_REG
p.To.Reg = r
q := s.Prog(x86.AMOVQ)
q.From.Type = obj.TYPE_MEM
q.From.Reg = r
q.From.Index = x86.REG_TLS
q.From.Scale = 1
q.To.Type = obj.TYPE_REG
q.To.Reg = r
}
}
func ssaGenValue(s *ssagen.State, v *ssa.Value) {
switch v.Op {
case ssa.OpAMD64VFMADD231SD:
@ -989,41 +1017,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
// Closure pointer is DX.
ssagen.CheckLoweredGetClosurePtr(v)
case ssa.OpAMD64LoweredGetG:
r := v.Reg()
// See the comments in cmd/internal/obj/x86/obj6.go
// near CanUse1InsnTLS for a detailed explanation of these instructions.
if x86.CanUse1InsnTLS(base.Ctxt) {
// MOVQ (TLS), r
p := s.Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_MEM
p.From.Reg = x86.REG_TLS
p.To.Type = obj.TYPE_REG
p.To.Reg = r
} else {
// MOVQ TLS, r
// MOVQ (r)(TLS*1), r
p := s.Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_TLS
p.To.Type = obj.TYPE_REG
p.To.Reg = r
q := s.Prog(x86.AMOVQ)
q.From.Type = obj.TYPE_MEM
q.From.Reg = r
q.From.Index = x86.REG_TLS
q.From.Scale = 1
q.To.Type = obj.TYPE_REG
q.To.Reg = r
if base.Flag.ABIWrap {
v.Fatalf("LoweredGetG should not appear in new ABI")
}
r := v.Reg()
getgFromTLS(s, r)
case ssa.OpAMD64CALLstatic:
if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
// set G register from TLS
getgFromTLS(s, x86.REG_R14)
}
s.Call(v)
if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
// set G register from TLS
getgFromTLS(s, x86.REG_R14)
}
case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
s.Call(v)
@ -1325,6 +1336,8 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
if s.ABI == obj.ABI0 && b.Aux.(*obj.LSym).ABI() == obj.ABIInternal {
// zeroing X15 when entering ABIInternal from ABI0
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
// set G register from TLS
getgFromTLS(s, x86.REG_R14)
}
p := s.Prog(obj.ARET)
p.To.Type = obj.TYPE_MEM

View File

@ -5,6 +5,7 @@
package ssa
import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/types"
"cmd/internal/obj"
@ -197,7 +198,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
c.specialRegMask = specialRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
c.hasGReg = base.Flag.ABIWrap
case "386":
c.PtrSize = 4
c.RegSize = 4

View File

@ -459,7 +459,7 @@
(IsInBounds idx len) => (SETB (CMPQ idx len))
(IsSliceInBounds idx len) => (SETBE (CMPQ idx len))
(NilCheck ...) => (LoweredNilCheck ...)
(GetG ...) => (LoweredGetG ...)
(GetG mem) && !base.Flag.ABIWrap => (LoweredGetG mem) // only lower in old ABI. in new ABI we have a G register.
(GetClosurePtr ...) => (LoweredGetClosurePtr ...)
(GetCallerPC ...) => (LoweredGetCallerPC ...)
(GetCallerSP ...) => (LoweredGetCallerSP ...)

View File

@ -44,7 +44,7 @@ var regNamesAMD64 = []string{
"R11",
"R12",
"R13",
"R14",
"g", // a.k.a. R14
"R15",
"X0",
"X1",
@ -96,12 +96,14 @@ func init() {
cx = buildReg("CX")
dx = buildReg("DX")
bx = buildReg("BX")
gp = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15")
gp = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15")
g = buildReg("g")
fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14")
x15 = buildReg("X15")
gpsp = gp | buildReg("SP")
gpspsb = gpsp | buildReg("SB")
callerSave = gp | fp
gpspsbg = gpspsb | g
callerSave = gp | fp | g // runtime.setg (and anything calling it) may clobber g
)
// Common slices of register masks
var (
@ -114,10 +116,10 @@ func init() {
gp01 = regInfo{inputs: nil, outputs: gponly}
gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly}
gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
gp11sb = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly}
gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
gp21sb = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly}
gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx}}
gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
@ -126,9 +128,9 @@ func init() {
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
gp1flags = regInfo{inputs: []regMask{gpsp}}
gp0flagsLoad = regInfo{inputs: []regMask{gpspsb, 0}}
gp1flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gp2flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
gp0flagsLoad = regInfo{inputs: []regMask{gpspsbg, 0}}
gp1flagsLoad = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}}
gp2flagsLoad = regInfo{inputs: []regMask{gpspsbg, gpsp, gpsp, 0}}
flagsgp = regInfo{inputs: nil, outputs: gponly}
gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
@ -137,24 +139,24 @@ func init() {
readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly}
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstorexchg = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsbg, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gpsp, 0}}
gpstoreconstidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}}
gpstorexchg = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: []regMask{gp}}
cmpxchg = regInfo{inputs: []regMask{gp, ax, gp, 0}, outputs: []regMask{gp, 0}, clobbers: ax}
fp01 = regInfo{inputs: nil, outputs: fponly}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
fp21loadidx = regInfo{inputs: []regMask{fp, gpspsb, gpspsb, 0}, outputs: fponly}
fp21load = regInfo{inputs: []regMask{fp, gpspsbg, 0}, outputs: fponly}
fp21loadidx = regInfo{inputs: []regMask{fp, gpspsbg, gpspsb, 0}, outputs: fponly}
fpgp = regInfo{inputs: fponly, outputs: gponly}
gpfp = regInfo{inputs: gponly, outputs: fponly}
fp11 = regInfo{inputs: fponly, outputs: fponly}
@ -830,7 +832,7 @@ func init() {
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary, but may clobber others.
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ (gp | g)}, clobberFlags: true, aux: "Sym", symEffect: "None"},
{name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "UInt64", aux: "Sym", symEffect: "None"},

View File

@ -582,6 +582,7 @@ func fprint(w io.Writer, n Node) {
"math",
"cmd/internal/obj",
"cmd/internal/objabi",
"cmd/compile/internal/base",
"cmd/compile/internal/types",
}, n.Arch.imports...) {
fmt.Fprintf(w, "import %q\n", path)

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,7 @@
package ssa
import "math"
import "cmd/compile/internal/base"
import "cmd/compile/internal/types"
func rewriteValueAMD64(v *Value) bool {
@ -767,8 +768,7 @@ func rewriteValueAMD64(v *Value) bool {
v.Op = OpAMD64LoweredGetClosurePtr
return true
case OpGetG:
v.Op = OpAMD64LoweredGetG
return true
return rewriteValueAMD64_OpGetG(v)
case OpHasCPUFeature:
return rewriteValueAMD64_OpHasCPUFeature(v)
case OpHmul32:
@ -30126,6 +30126,22 @@ func rewriteValueAMD64_OpFloor(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpGetG(v *Value) bool {
v_0 := v.Args[0]
// match: (GetG mem)
// cond: !base.Flag.ABIWrap
// result: (LoweredGetG mem)
for {
mem := v_0
if !(!base.Flag.ABIWrap) {
break
}
v.reset(OpAMD64LoweredGetG)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types

View File

@ -262,6 +262,7 @@ TEXT runtime·gogo(SB), NOSPLIT, $16-8
MOVQ 0(DX), CX // make sure g != nil
get_tls(CX)
MOVQ DX, g(CX)
MOVQ DX, R14 // set the g register
MOVQ gobuf_sp(BX), SP // restore SP
MOVQ gobuf_ret(BX), AX
MOVQ gobuf_ctxt(BX), DX
@ -298,6 +299,7 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-8
MOVQ $runtime·badmcall(SB), AX
JMP AX
MOVQ SI, g(CX) // g = m->g0
MOVQ SI, R14 // set the g register
MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
PUSHQ AX
MOVQ DI, DX
@ -344,6 +346,7 @@ TEXT runtime·systemstack(SB), NOSPLIT, $0-8
// switch to g0
MOVQ DX, g(CX)
MOVQ DX, R14 // set the g register
MOVQ (g_sched+gobuf_sp)(DX), BX
// make it look like mstart called systemstack on g0, to stop traceback
SUBQ $8, BX
@ -824,6 +827,7 @@ settls:
TEXT setg_gcc<>(SB),NOSPLIT,$0
get_tls(AX)
MOVQ DI, g(AX)
MOVQ DI, R14 // set the g register
RET
TEXT runtime·abort(SB),NOSPLIT,$0-0
@ -1368,24 +1372,24 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
// It clobbers FLAGS. It does not clobber any general-purpose registers,
// but may clobber others (e.g., SSE registers).
// Defined as ABIInternal since it does not use the stack-based Go ABI.
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$120
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
// Save the registers clobbered by the fast path. This is slightly
// faster than having the caller spill these.
MOVQ R14, 104(SP)
MOVQ R13, 112(SP)
MOVQ R12, 96(SP)
MOVQ R13, 104(SP)
// TODO: Consider passing g.m.p in as an argument so they can be shared
// across a sequence of write barriers.
get_tls(R13)
MOVQ g(R13), R13
MOVQ g_m(R13), R13
MOVQ m_p(R13), R13
MOVQ (p_wbBuf+wbBuf_next)(R13), R14
MOVQ (p_wbBuf+wbBuf_next)(R13), R12
// Increment wbBuf.next position.
LEAQ 16(R14), R14
MOVQ R14, (p_wbBuf+wbBuf_next)(R13)
CMPQ R14, (p_wbBuf+wbBuf_end)(R13)
LEAQ 16(R12), R12
MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
// Record the write.
MOVQ AX, -16(R14) // Record value
MOVQ AX, -16(R12) // Record value
// Note: This turns bad pointer writes into bad
// pointer reads, which could be confusing. We could avoid
// reading from obviously bad pointers, which would
@ -1393,12 +1397,12 @@ TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$120
// patch this up in the signal handler, or use XCHG to
// combine the read and the write.
MOVQ (DI), R13
MOVQ R13, -8(R14) // Record *slot
MOVQ R13, -8(R12) // Record *slot
// Is the buffer full? (flags set in CMPQ above)
JEQ flush
ret:
MOVQ 104(SP), R14
MOVQ 112(SP), R13
MOVQ 96(SP), R12
MOVQ 104(SP), R13
// Do the write.
MOVQ AX, (DI)
RET
@ -1428,10 +1432,10 @@ flush:
MOVQ R9, 64(SP)
MOVQ R10, 72(SP)
MOVQ R11, 80(SP)
MOVQ R12, 88(SP)
// R12 already saved
// R13 already saved
// R14 already saved
MOVQ R15, 96(SP)
// R14 is g
MOVQ R15, 88(SP)
// This takes arguments DI and AX
CALL runtime·wbBufFlush(SB)
@ -1447,8 +1451,7 @@ flush:
MOVQ 64(SP), R9
MOVQ 72(SP), R10
MOVQ 80(SP), R11
MOVQ 88(SP), R12
MOVQ 96(SP), R15
MOVQ 88(SP), R15
JMP ret
// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.

View File

@ -452,12 +452,13 @@ rest:
PUSHQ R15
// Set g = g0.
get_tls(R12)
MOVQ g(R12), R13
MOVQ g_m(R13), R14
MOVQ m_g0(R14), R15
MOVQ g(R12), R14
MOVQ g_m(R14), R13
MOVQ m_g0(R13), R15
CMPQ R13, R15
JEQ noswitch // branch if already on g0
MOVQ R15, g(R12) // g = m->g0
MOVQ R15, R14 // set g register
PUSHQ RARG1 // func arg
PUSHQ RARG0 // func arg
CALL runtime·racecallback(SB)

View File

@ -632,6 +632,7 @@ nog1:
get_tls(CX)
MOVQ R13, g_m(R9)
MOVQ R9, g(CX)
MOVQ R9, R14 // set g register
CALL runtime·stackcheck(SB)
nog2: