From 1678829d9555761c0fa6571fd3bcaec016add3d2 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 22 Feb 2021 17:43:08 -0500 Subject: [PATCH] cmd/compile: correctly use X15 to zero frame In CL 288093 we reserve X15 as the zero register and use that to zero values. It only covered zeroing generated in SSA but missed zeroing the frame generated late in the compilation. The latter still uses X0, but now DUFFZERO expects X15, so it doesn't actually zero the frame. Change it to use X15. Should fix #44333. Change-Id: I239d2b78a5f6468bc86b70aecdd294045311759f Reviewed-on: https://go-review.googlesource.com/c/go/+/295210 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/ggen.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go index aefdb14a69..14c3bd1129 100644 --- a/src/cmd/compile/internal/amd64/ggen.go +++ b/src/cmd/compile/internal/amd64/ggen.go @@ -56,8 +56,8 @@ func dzDI(b int64) int64 { func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog { const ( - ax = 1 << iota - x0 + ax = 1 << iota // if AX is already zeroed. + x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero. ) if cnt == 0 { @@ -85,29 +85,29 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. } p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off) } else if !isPlan9 && cnt <= int64(8*types.RegSize) { - if *state&x0 == 0 { - p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) - *state |= x0 + if objabi.Regabi_enabled == 0 && *state&x15 == 0 { + p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0) + *state |= x15 } for i := int64(0); i < cnt/16; i++ { - p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16) + p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16) } if cnt%16 != 0 { - p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16)) + p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16)) } } else if !isPlan9 && (cnt <= int64(128*types.RegSize)) { - if *state&x0 == 0 { - p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) - *state |= x0 + if objabi.Regabi_enabled == 0 && *state&x15 == 0 { + p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0) + *state |= x15 } p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) p.To.Sym = ir.Syms.Duffzero if cnt%16 != 0 { - p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) + p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) } } else { if *state&ax == 0 {