1
0
mirror of https://github.com/golang/go synced 2024-11-26 04:58:00 -07:00

cmd/compile/internal/amd64: follow-on regabi fix for amd64 zerorange

This patch provides a better long-term fix for the compiler's
zerorange() helper function to make it generate code friendly to the
register ABI.

CL 305829 did part of the work, but didn't properly handle the case
where the compiler emits a REP.STOSQ sequence; this patch changes the
REP code to make sure it doesn't clobber any incoming register
parameter values.

Also included is a test that is specifically written to trigger
the REP emit code in the compiler (prior to this, this code was
not being hit on linux/amd64 all.bash).

Updates #45372.
Updates #40724.

Change-Id: Iaf1c4e709e98eda45cd6f3aeebda0fe9160f1f42
Reviewed-on: https://go-review.googlesource.com/c/go/+/307829
Trust: Than McIntosh <thanm@google.com>
Run-TryBot: Than McIntosh <thanm@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Than McIntosh 2021-04-06 16:18:50 -04:00
parent 16cd770e06
commit c26f954a54
2 changed files with 112 additions and 9 deletions

View File

@ -58,7 +58,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
const (
r13 = 1 << iota // if R13 is already zeroed.
x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
rax // if RAX is already zeroed.
)
if cnt == 0 {
@ -117,18 +116,33 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
} else {
// Note: here we have to use RAX since it is an implicit input
// for the REPSTOSQ below. This is going to be problematic when
// regabi is in effect; this will be fixed in a forthcoming CL.
if *state&rax == 0 {
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
*state |= rax
}
// When the register ABI is in effect, at this point in the
// prolog we may have live values in all of RAX,RDI,RCX. Save
// them off to registers before the REPSTOSQ below, then
// restore. Note that R12 and R13 are always available as
// scratch regs; here we also use R15 (this is safe to do
// since there won't be any globals accessed in the prolog).
// See rewriteToUseGot() in obj6.go for more on r15 use.
// Save rax/rdi/rcx
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
// Set up the REPSTOSQ and kick it off.
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
// Restore rax/rdi/rcx
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
// Record the fact that r13 is no longer zero.
*state &= ^uint32(r13)
}
return p

View File

@ -4,7 +4,9 @@
package test
import "testing"
import (
"testing"
)
var glob = 3
var globp *int64
@ -94,3 +96,90 @@ func testZeroRange136(t *testing.T) (r, s, t2, u, v, w, x, y, r1, s1, t1, u1, v1
globp = &z1
return
}
type S struct {
x [2]uint64
p *uint64
y [2]uint64
q uint64
}
type M struct {
x [8]uint64
p *uint64
y [8]uint64
q uint64
}
type L struct {
x [4096]uint64
p *uint64
y [4096]uint64
q uint64
}
//go:noinline
func triggerZerorangeLarge(f, g, h uint64) (rv0 uint64) {
ll := L{p: &f}
da := f
rv0 = f + g + h
defer func(dl L, i uint64) {
rv0 += dl.q + i
}(ll, da)
return rv0
}
//go:noinline
func triggerZerorangeMedium(f, g, h uint64) (rv0 uint64) {
ll := M{p: &f}
rv0 = f + g + h
defer func(dm M, i uint64) {
rv0 += dm.q + i
}(ll, f)
return rv0
}
//go:noinline
func triggerZerorangeSmall(f, g, h uint64) (rv0 uint64) {
ll := S{p: &f}
rv0 = f + g + h
defer func(ds S, i uint64) {
rv0 += ds.q + i
}(ll, f)
return rv0
}
// This test was created as a follow up to issue #45372, to help
// improve coverage of the compiler's arch-specific "zerorange"
// function, which is invoked to zero out ambiguously live portions of
// the stack frame in certain specific circumstances.
//
// In the current compiler implementation, for zerorange to be
// invoked, we need to have an ambiguously live variable that needs
// zeroing. One way to trigger this is to have a function with an
// open-coded defer, where the opendefer function has an argument that
// contains a pointer (this is what's used below).
//
// At the moment this test doesn't do any specific checking for
// code sequence, or verification that things were properly set to zero,
// this seems as though it would be too tricky and would result
// in a "brittle" test.
//
// The small/medium/large scenarios below are inspired by the amd64
// implementation of zerorange, which generates different code
// depending on the size of the thing that needs to be zeroed out
// (I've verified at the time of the writing of this test that it
// exercises the various cases).
//
func TestZerorange45372(t *testing.T) {
if r := triggerZerorangeLarge(101, 303, 505); r != 1010 {
t.Errorf("large: wanted %d got %d", 1010, r)
}
if r := triggerZerorangeMedium(101, 303, 505); r != 1010 {
t.Errorf("medium: wanted %d got %d", 1010, r)
}
if r := triggerZerorangeSmall(101, 303, 505); r != 1010 {
t.Errorf("small: wanted %d got %d", 1010, r)
}
}