mirror of
https://github.com/golang/go
synced 2024-11-26 04:58:00 -07:00
cmd/compile/internal/amd64: follow-on regabi fix for amd64 zerorange
This patch provides a better long-term fix for the compiler's zerorange() helper function to make it generate code friendly to the register ABI. CL 305829 did part of the work, but didn't properly handle the case where the compiler emits a REP.STOSQ sequence; this patch changes the REP code to make sure it doesn't clobber any incoming register parameter values. Also included is a test that is specifically written to trigger the REP emit code in the compiler (prior to this, this code was not being hit on linux/amd64 all.bash). Updates #45372. Updates #40724. Change-Id: Iaf1c4e709e98eda45cd6f3aeebda0fe9160f1f42 Reviewed-on: https://go-review.googlesource.com/c/go/+/307829 Trust: Than McIntosh <thanm@google.com> Run-TryBot: Than McIntosh <thanm@google.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
16cd770e06
commit
c26f954a54
@ -58,7 +58,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
|
||||
const (
|
||||
r13 = 1 << iota // if R13 is already zeroed.
|
||||
x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
|
||||
rax // if RAX is already zeroed.
|
||||
)
|
||||
|
||||
if cnt == 0 {
|
||||
@ -117,18 +116,33 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
|
||||
|
||||
} else {
|
||||
// Note: here we have to use RAX since it is an implicit input
|
||||
// for the REPSTOSQ below. This is going to be problematic when
|
||||
// regabi is in effect; this will be fixed in a forthcoming CL.
|
||||
if *state&rax == 0 {
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
||||
*state |= rax
|
||||
}
|
||||
// When the register ABI is in effect, at this point in the
|
||||
// prolog we may have live values in all of RAX,RDI,RCX. Save
|
||||
// them off to registers before the REPSTOSQ below, then
|
||||
// restore. Note that R12 and R13 are always available as
|
||||
// scratch regs; here we also use R15 (this is safe to do
|
||||
// since there won't be any globals accessed in the prolog).
|
||||
// See rewriteToUseGot() in obj6.go for more on r15 use.
|
||||
|
||||
// Save rax/rdi/rcx
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
|
||||
|
||||
// Set up the REPSTOSQ and kick it off.
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
|
||||
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
|
||||
p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
|
||||
p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
|
||||
|
||||
// Restore rax/rdi/rcx
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
|
||||
|
||||
// Record the fact that r13 is no longer zero.
|
||||
*state &= ^uint32(r13)
|
||||
}
|
||||
|
||||
return p
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
package test
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
var glob = 3
|
||||
var globp *int64
|
||||
@ -94,3 +96,90 @@ func testZeroRange136(t *testing.T) (r, s, t2, u, v, w, x, y, r1, s1, t1, u1, v1
|
||||
globp = &z1
|
||||
return
|
||||
}
|
||||
|
||||
type S struct {
|
||||
x [2]uint64
|
||||
p *uint64
|
||||
y [2]uint64
|
||||
q uint64
|
||||
}
|
||||
|
||||
type M struct {
|
||||
x [8]uint64
|
||||
p *uint64
|
||||
y [8]uint64
|
||||
q uint64
|
||||
}
|
||||
|
||||
type L struct {
|
||||
x [4096]uint64
|
||||
p *uint64
|
||||
y [4096]uint64
|
||||
q uint64
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func triggerZerorangeLarge(f, g, h uint64) (rv0 uint64) {
|
||||
ll := L{p: &f}
|
||||
da := f
|
||||
rv0 = f + g + h
|
||||
defer func(dl L, i uint64) {
|
||||
rv0 += dl.q + i
|
||||
}(ll, da)
|
||||
return rv0
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func triggerZerorangeMedium(f, g, h uint64) (rv0 uint64) {
|
||||
ll := M{p: &f}
|
||||
rv0 = f + g + h
|
||||
defer func(dm M, i uint64) {
|
||||
rv0 += dm.q + i
|
||||
}(ll, f)
|
||||
return rv0
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func triggerZerorangeSmall(f, g, h uint64) (rv0 uint64) {
|
||||
ll := S{p: &f}
|
||||
rv0 = f + g + h
|
||||
defer func(ds S, i uint64) {
|
||||
rv0 += ds.q + i
|
||||
}(ll, f)
|
||||
return rv0
|
||||
}
|
||||
|
||||
// This test was created as a follow up to issue #45372, to help
|
||||
// improve coverage of the compiler's arch-specific "zerorange"
|
||||
// function, which is invoked to zero out ambiguously live portions of
|
||||
// the stack frame in certain specific circumstances.
|
||||
//
|
||||
// In the current compiler implementation, for zerorange to be
|
||||
// invoked, we need to have an ambiguously live variable that needs
|
||||
// zeroing. One way to trigger this is to have a function with an
|
||||
// open-coded defer, where the opendefer function has an argument that
|
||||
// contains a pointer (this is what's used below).
|
||||
//
|
||||
// At the moment this test doesn't do any specific checking for
|
||||
// code sequence, or verification that things were properly set to zero,
|
||||
// this seems as though it would be too tricky and would result
|
||||
// in a "brittle" test.
|
||||
//
|
||||
// The small/medium/large scenarios below are inspired by the amd64
|
||||
// implementation of zerorange, which generates different code
|
||||
// depending on the size of the thing that needs to be zeroed out
|
||||
// (I've verified at the time of the writing of this test that it
|
||||
// exercises the various cases).
|
||||
//
|
||||
func TestZerorange45372(t *testing.T) {
|
||||
if r := triggerZerorangeLarge(101, 303, 505); r != 1010 {
|
||||
t.Errorf("large: wanted %d got %d", 1010, r)
|
||||
}
|
||||
if r := triggerZerorangeMedium(101, 303, 505); r != 1010 {
|
||||
t.Errorf("medium: wanted %d got %d", 1010, r)
|
||||
}
|
||||
if r := triggerZerorangeSmall(101, 303, 505); r != 1010 {
|
||||
t.Errorf("small: wanted %d got %d", 1010, r)
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user