1
0
mirror of https://github.com/golang/go synced 2024-11-12 09:50:21 -07:00

runtime: iterate over set bits in adjustpointers

There are several things combined in this change.

First, eliminate the gobitvector type in favor
of adding a ptrbit method to bitvector.
In non-performance-critical code, use that method.
In performance critical code, though, load the bitvector data
one byte at a time and iterate only over set bits.
To support that, add and use sys.Ctz8.

name                old time/op  new time/op  delta
StackCopyPtr-8      81.8ms ± 5%  78.9ms ± 3%   -3.58%  (p=0.000 n=97+96)
StackCopy-8         65.9ms ± 3%  62.8ms ± 3%   -4.67%  (p=0.000 n=96+92)
StackCopyNoCache-8   105ms ± 3%   102ms ± 3%   -3.38%  (p=0.000 n=96+95)

Change-Id: I00b80f45612708bd440b1a411a57fa6dfa24aa74
Reviewed-on: https://go-review.googlesource.com/109716
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Josh Bleecher Snyder 2018-04-01 11:01:36 -07:00
parent 13cd006139
commit 5af0b28a73
7 changed files with 76 additions and 47 deletions

View File

@ -2980,6 +2980,8 @@ func init() {
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...) alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...) alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
/******** math ********/ /******** math ********/
addF("math", "Sqrt", addF("math", "Sqrt",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {

View File

@ -233,9 +233,8 @@ type childInfo struct {
// dump kinds & offsets of interesting fields in bv // dump kinds & offsets of interesting fields in bv
func dumpbv(cbv *bitvector, offset uintptr) { func dumpbv(cbv *bitvector, offset uintptr) {
bv := gobv(*cbv) for i := uintptr(0); i < uintptr(cbv.n); i++ {
for i := uintptr(0); i < bv.n; i++ { if cbv.ptrbit(i) == 1 {
if bv.bytedata[i/8]>>(i%8)&1 == 1 {
dumpint(fieldKindPtr) dumpint(fieldKindPtr)
dumpint(uint64(offset + i*sys.PtrSize)) dumpint(uint64(offset + i*sys.PtrSize))
} }

View File

@ -50,6 +50,30 @@ func Ctz32(x uint32) int {
return i + z return i + z
} }
// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
func Ctz8(x uint8) int {
return int(ntz8tab[x])
}
var ntz8tab = [256]uint8{
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
}
// Bswap64 returns its input with byte order reversed // Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201 // 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 { func Bswap64(x uint64) uint64 {

View File

@ -34,6 +34,14 @@ TEXT runtimeinternalsys·Ctz32(SB), NOSPLIT, $0-8
MOVL AX, ret+4(FP) MOVL AX, ret+4(FP)
RET RET
TEXT runtimeinternalsys·Ctz8(SB), NOSPLIT, $0-8
MOVBLZX x+0(FP), AX
BSFL AX, AX
JNZ 2(PC)
MOVL $8, AX
MOVL AX, ret+4(FP)
RET
TEXT runtimeinternalsys·Bswap64(SB), NOSPLIT, $0-16 TEXT runtimeinternalsys·Bswap64(SB), NOSPLIT, $0-16
MOVL x_lo+0(FP), AX MOVL x_lo+0(FP), AX
MOVL x_hi+4(FP), BX MOVL x_hi+4(FP), BX

View File

@ -8,5 +8,6 @@ package sys
func Ctz64(x uint64) int func Ctz64(x uint64) int
func Ctz32(x uint32) int func Ctz32(x uint32) int
func Ctz8(x uint8) int
func Bswap64(x uint64) uint64 func Bswap64(x uint64) uint64
func Bswap32(x uint32) uint32 func Bswap32(x uint32) uint32

View File

@ -2021,9 +2021,8 @@ func getgcmask(ep interface{}) (mask []byte) {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/sys.PtrSize) mask = make([]byte, n/sys.PtrSize)
for i := uintptr(0); i < n; i += sys.PtrSize { for i := uintptr(0); i < n; i += sys.PtrSize {
bitmap := bv.bytedata
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 mask[i/sys.PtrSize] = bv.ptrbit(off)
} }
} }
return return

View File

@ -544,64 +544,60 @@ type bitvector struct {
bytedata *uint8 bytedata *uint8
} }
type gobitvector struct { // ptrbit returns the i'th bit in bv.
n uintptr // ptrbit is less efficient than iterating directly over bitvector bits,
bytedata []uint8 // and should only be used in non-performance-critical code.
} // See adjustpointers for an example of a high-efficiency walk of a bitvector.
func (bv *bitvector) ptrbit(i uintptr) uint8 {
func gobv(bv bitvector) gobitvector { b := *(addb(bv.bytedata, i/8))
return gobitvector{ return (b >> (i % 8)) & 1
uintptr(bv.n),
(*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8],
}
}
func ptrbit(bv *gobitvector, i uintptr) uint8 {
return (bv.bytedata[i/8] >> (i % 8)) & 1
} }
// bv describes the memory starting at address scanp. // bv describes the memory starting at address scanp.
// Adjust any pointers contained therein. // Adjust any pointers contained therein.
func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f funcInfo) { func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) {
bv := gobv(*cbv)
minp := adjinfo.old.lo minp := adjinfo.old.lo
maxp := adjinfo.old.hi maxp := adjinfo.old.hi
delta := adjinfo.delta delta := adjinfo.delta
num := bv.n num := uintptr(bv.n)
// If this frame might contain channel receive slots, use CAS // If this frame might contain channel receive slots, use CAS
// to adjust pointers. If the slot hasn't been received into // to adjust pointers. If the slot hasn't been received into
// yet, it may contain stack pointers and a concurrent send // yet, it may contain stack pointers and a concurrent send
// could race with adjusting those pointers. (The sent value // could race with adjusting those pointers. (The sent value
// itself can never contain stack pointers.) // itself can never contain stack pointers.)
useCAS := uintptr(scanp) < adjinfo.sghi useCAS := uintptr(scanp) < adjinfo.sghi
for i := uintptr(0); i < num; i++ { for i := uintptr(0); i < num; i += 8 {
if stackDebug >= 4 { if stackDebug >= 4 {
print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n") for j := uintptr(0); j < 8; j++ {
} print(" ", add(scanp, (i+j)*sys.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*sys.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n")
if ptrbit(&bv, i) != 1 {
continue
}
pp := (*uintptr)(add(scanp, i*sys.PtrSize))
retry:
p := *pp
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg().m.traceback = 2
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
throw("invalid pointer found on stack")
}
if minp <= p && p < maxp {
if stackDebug >= 3 {
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
} }
if useCAS { }
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp)) b := *(addb(bv.bytedata, i/8))
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) { for b != 0 {
goto retry j := uintptr(sys.Ctz8(b))
b &= b - 1
pp := (*uintptr)(add(scanp, (i+j)*sys.PtrSize))
retry:
p := *pp
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg().m.traceback = 2
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
throw("invalid pointer found on stack")
}
if minp <= p && p < maxp {
if stackDebug >= 3 {
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
}
if useCAS {
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
goto retry
}
} else {
*pp = p + delta
} }
} else {
*pp = p + delta
} }
} }
} }