diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go index 259a083f75..e56ec90dc8 100644 --- a/src/cmd/compile/internal/amd64/ggen.go +++ b/src/cmd/compile/internal/amd64/ggen.go @@ -56,7 +56,7 @@ func dzDI(b int64) int64 { func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog { const ( - ax = 1 << iota // if AX is already zeroed. + r13 = 1 << iota // if R13 is already zeroed. x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero. ) @@ -69,21 +69,21 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. if cnt%int64(types.PtrSize) != 0 { base.Fatalf("zerorange count not a multiple of widthptr %d", cnt) } - if *state&ax == 0 { - p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) - *state |= ax + if *state&r13 == 0 { + p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0) + *state |= r13 } - p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off) + p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off) off += int64(types.PtrSize) cnt -= int64(types.PtrSize) } if cnt == 8 { - if *state&ax == 0 { - p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) - *state |= ax + if *state&r13 == 0 { + p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0) + *state |= r13 } - p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off) } else if !isPlan9 && cnt <= int64(8*types.RegSize) { if !objabi.Experiment.RegabiG && *state&x15 == 0 { p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0) @@ -102,17 +102,23 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0) *state |= x15 } + // Save DI to r12. With the amd64 Go register abi, DI can contain + // an incoming parameter, whereas R12 is always scratch. + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) + // Emit duffzero call p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) p.To.Sym = ir.Syms.Duffzero - if cnt%16 != 0 { p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) } + // Restore DI from r12 + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) + } else { - if *state&ax == 0 { - p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) - *state |= ax + if *state&r13 == 0 { + p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0) + *state |= r13 } p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)