1
0
mirror of https://github.com/golang/go synced 2024-11-17 19:15:21 -07:00

runtime, cmd/compile: optimize open-coded defers

This CL optimizes open-coded defers in two ways:

1. It modifies local variable sorting to place all open-coded defer
closure slots in order, so that rather than requiring the metadata to
contain each offset individually, we just need a single offset to the
first slot.

2. Because the slots are in ascending order and can be directly
indexed, we can get rid of the count of how many defers are in the
frame. Instead, we just find the top set bit in the active defers
bitmask, and load the corresponding closure.

Change-Id: I6f912295a492211023a9efe12c94a14f449d86ad
Reviewed-on: https://go-review.googlesource.com/c/go/+/516199
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Matthew Dempsky 2023-08-04 14:10:59 -07:00
parent 65d4723b49
commit bb5974e0cb
4 changed files with 77 additions and 54 deletions

View File

@ -64,6 +64,22 @@ func cmpstackvarlt(a, b *ir.Name) bool {
return a.Type().Alignment() > b.Type().Alignment()
}
// Sort normal variables before open-coded-defer slots, so that the
// latter are grouped together and near the top of the frame (to
// minimize varint encoding of their varp offset).
if a.OpenDeferSlot() != b.OpenDeferSlot() {
return a.OpenDeferSlot()
}
// If a and b are both open-coded defer slots, then order them by
// index in descending order, so they'll be laid out in the frame in
// ascending order.
//
// Their index was saved in FrameOffset in state.openDeferSave.
if a.OpenDeferSlot() {
return a.FrameOffset() > b.FrameOffset()
}
// Tie breaker for stable results.
return a.Sym().Name < b.Sym().Name
}

View File

@ -282,22 +282,26 @@ func dvarint(x *obj.LSym, off int, v int64) int {
// top of the local variables) for their starting address. The format is:
//
// - Offset of the deferBits variable
// - Number of defers in the function
// - Information about each defer call, in reverse order of appearance in the function:
// - Offset of the closure value to call
// - Offset of the first closure slot (the rest are laid out consecutively).
func (s *state) emitOpenDeferInfo() {
firstOffset := s.openDefers[0].closureNode.FrameOffset()
// Verify that cmpstackvarlt laid out the slots in order.
for i, r := range s.openDefers {
have := r.closureNode.FrameOffset()
want := firstOffset + int64(i)*int64(types.PtrSize)
if have != want {
base.FatalfAt(s.curfn.Pos(), "unexpected frame offset for open-coded defer slot #%v: have %v, want %v", i, have, want)
}
}
x := base.Ctxt.Lookup(s.curfn.LSym.Name + ".opendefer")
x.Set(obj.AttrContentAddressable, true)
s.curfn.LSym.Func().OpenCodedDeferInfo = x
off := 0
off = dvarint(x, off, -s.deferBitsTemp.FrameOffset())
off = dvarint(x, off, int64(len(s.openDefers)))
// Write in reverse-order, for ease of running in that order at runtime
for i := len(s.openDefers) - 1; i >= 0; i-- {
r := s.openDefers[i]
off = dvarint(x, off, -r.closureNode.FrameOffset())
}
off = dvarint(x, off, -firstOffset)
}
func okOffset(offset int64) int64 {
@ -567,7 +571,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func {
// Main call to ssa package to compile function
ssa.Compile(s.f)
if s.hasOpenDefers {
if len(s.openDefers) != 0 {
s.emitOpenDeferInfo()
}
@ -5053,6 +5057,7 @@ func (s *state) openDeferSave(t *types.Type, val *ssa.Value) *ssa.Value {
pos := val.Pos
temp := typecheck.TempAt(pos.WithNotStmt(), s.curfn, t)
temp.SetOpenDeferSlot(true)
temp.SetFrameOffset(int64(len(s.openDefers))) // so cmpstackvarlt can order them
var addrTemp *ssa.Value
// Use OpVarLive to make sure stack slot for the closure is not removed by
// dead-store elimination

View File

@ -682,24 +682,21 @@ func (p *_panic) nextDefer() (func(), bool) {
p.argp = add(p.startSP, sys.MinFrameSize)
for {
for p.openDefers > 0 {
p.openDefers--
// Find the closure offset for the next deferred call.
var closureOffset uint32
closureOffset, p.closureOffsets = readvarintUnsafe(p.closureOffsets)
bit := uint8(1 << p.openDefers)
if *p.deferBitsPtr&bit == 0 {
continue
}
*p.deferBitsPtr &^= bit
if *p.deferBitsPtr == 0 {
p.openDefers = 0 // short circuit: no more active defers
for p.deferBitsPtr != nil {
bits := *p.deferBitsPtr
if bits == 0 {
p.deferBitsPtr = nil
break
}
return *(*func())(add(p.varp, -uintptr(closureOffset))), true
// Find index of top bit set.
i := 7 - uintptr(sys.LeadingZeros8(bits))
// Clear bit and store it back.
bits &^= 1 << i
*p.deferBitsPtr = bits
return *(*func())(add(p.slotsPtr, i*goarch.PtrSize)), true
}
if d := gp._defer; d != nil && d.sp == uintptr(p.sp) {
@ -752,25 +749,8 @@ func (p *_panic) nextFrame() (ok bool) {
// then we can simply loop until we find the next frame where
// it's non-zero.
if fd := funcdata(u.frame.fn, abi.FUNCDATA_OpenCodedDeferInfo); fd != nil {
if u.frame.fn.deferreturn == 0 {
throw("missing deferreturn")
}
p.retpc = u.frame.fn.entry() + uintptr(u.frame.fn.deferreturn)
var deferBitsOffset uint32
deferBitsOffset, fd = readvarintUnsafe(fd)
deferBitsPtr := (*uint8)(add(unsafe.Pointer(u.frame.varp), -uintptr(deferBitsOffset)))
if *deferBitsPtr != 0 {
var openDefers uint32
openDefers, fd = readvarintUnsafe(fd)
p.openDefers = uint8(openDefers)
p.deferBitsPtr = deferBitsPtr
p.closureOffsets = fd
break // found a frame with open-coded defers
}
if p.initOpenCodedDefers(u.frame.fn, unsafe.Pointer(u.frame.varp)) {
break // found a frame with open-coded defers
}
if u.frame.sp == limit {
@ -787,7 +767,6 @@ func (p *_panic) nextFrame() (ok bool) {
}
p.sp = unsafe.Pointer(u.frame.sp)
p.fp = unsafe.Pointer(u.frame.fp)
p.varp = unsafe.Pointer(u.frame.varp)
ok = true
})
@ -795,6 +774,31 @@ func (p *_panic) nextFrame() (ok bool) {
return
}
func (p *_panic) initOpenCodedDefers(fn funcInfo, varp unsafe.Pointer) bool {
fd := funcdata(fn, abi.FUNCDATA_OpenCodedDeferInfo)
if fd == nil {
return false
}
if fn.deferreturn == 0 {
throw("missing deferreturn")
}
deferBitsOffset, fd := readvarintUnsafe(fd)
deferBitsPtr := (*uint8)(add(varp, -uintptr(deferBitsOffset)))
if *deferBitsPtr == 0 {
return false // has open-coded defers, but none pending
}
slotsOffset, fd := readvarintUnsafe(fd)
p.retpc = fn.entry() + uintptr(fn.deferreturn)
p.deferBitsPtr = deferBitsPtr
p.slotsPtr = add(varp, -uintptr(slotsOffset))
return true
}
// The implementation of the predeclared function recover.
// Cannot split the stack because it needs to reliably
// find the stack segment of its caller.

View File

@ -1024,19 +1024,17 @@ type _panic struct {
startSP unsafe.Pointer
// The current stack frame that we're running deferred calls for.
sp unsafe.Pointer
lr uintptr
fp unsafe.Pointer
varp unsafe.Pointer
sp unsafe.Pointer
lr uintptr
fp unsafe.Pointer
// retpc stores the PC where the panic should jump back to, if the
// function last returned by _panic.next() recovers the panic.
retpc uintptr
// Extra state for handling open-coded defers.
deferBitsPtr *uint8
closureOffsets unsafe.Pointer
openDefers uint8 // count of pending open-coded defers
deferBitsPtr *uint8
slotsPtr unsafe.Pointer
recovered bool // whether this panic has been recovered
goexit bool