mirror of
https://github.com/golang/go
synced 2024-11-26 20:41:24 -07:00
runtime: iterate over set bits in adjustpointers
There are several things combined in this change. First, eliminate the gobitvector type in favor of adding a ptrbit method to bitvector. In non-performance-critical code, use that method. In performance critical code, though, load the bitvector data one byte at a time and iterate only over set bits. To support that, add and use sys.Ctz8. name old time/op new time/op delta StackCopyPtr-8 81.8ms ± 5% 78.9ms ± 3% -3.58% (p=0.000 n=97+96) StackCopy-8 65.9ms ± 3% 62.8ms ± 3% -4.67% (p=0.000 n=96+92) StackCopyNoCache-8 105ms ± 3% 102ms ± 3% -3.38% (p=0.000 n=96+95) Change-Id: I00b80f45612708bd440b1a411a57fa6dfa24aa74 Reviewed-on: https://go-review.googlesource.com/109716 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
13cd006139
commit
5af0b28a73
@ -2980,6 +2980,8 @@ func init() {
|
||||
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
|
||||
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
|
||||
|
||||
alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
|
||||
|
||||
/******** math ********/
|
||||
addF("math", "Sqrt",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
|
@ -233,9 +233,8 @@ type childInfo struct {
|
||||
|
||||
// dump kinds & offsets of interesting fields in bv
|
||||
func dumpbv(cbv *bitvector, offset uintptr) {
|
||||
bv := gobv(*cbv)
|
||||
for i := uintptr(0); i < bv.n; i++ {
|
||||
if bv.bytedata[i/8]>>(i%8)&1 == 1 {
|
||||
for i := uintptr(0); i < uintptr(cbv.n); i++ {
|
||||
if cbv.ptrbit(i) == 1 {
|
||||
dumpint(fieldKindPtr)
|
||||
dumpint(uint64(offset + i*sys.PtrSize))
|
||||
}
|
||||
|
@ -50,6 +50,30 @@ func Ctz32(x uint32) int {
|
||||
return i + z
|
||||
}
|
||||
|
||||
// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
||||
func Ctz8(x uint8) int {
|
||||
return int(ntz8tab[x])
|
||||
}
|
||||
|
||||
var ntz8tab = [256]uint8{
|
||||
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||
}
|
||||
|
||||
// Bswap64 returns its input with byte order reversed
|
||||
// 0x0102030405060708 -> 0x0807060504030201
|
||||
func Bswap64(x uint64) uint64 {
|
||||
|
@ -34,6 +34,14 @@ TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
||||
MOVL AX, ret+4(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
|
||||
MOVBLZX x+0(FP), AX
|
||||
BSFL AX, AX
|
||||
JNZ 2(PC)
|
||||
MOVL $8, AX
|
||||
MOVL AX, ret+4(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
|
||||
MOVL x_lo+0(FP), AX
|
||||
MOVL x_hi+4(FP), BX
|
||||
|
@ -8,5 +8,6 @@ package sys
|
||||
|
||||
func Ctz64(x uint64) int
|
||||
func Ctz32(x uint32) int
|
||||
func Ctz8(x uint8) int
|
||||
func Bswap64(x uint64) uint64
|
||||
func Bswap32(x uint32) uint32
|
||||
|
@ -2021,9 +2021,8 @@ func getgcmask(ep interface{}) (mask []byte) {
|
||||
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
|
||||
mask = make([]byte, n/sys.PtrSize)
|
||||
for i := uintptr(0); i < n; i += sys.PtrSize {
|
||||
bitmap := bv.bytedata
|
||||
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
|
||||
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
|
||||
mask[i/sys.PtrSize] = bv.ptrbit(off)
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -544,64 +544,60 @@ type bitvector struct {
|
||||
bytedata *uint8
|
||||
}
|
||||
|
||||
type gobitvector struct {
|
||||
n uintptr
|
||||
bytedata []uint8
|
||||
}
|
||||
|
||||
func gobv(bv bitvector) gobitvector {
|
||||
return gobitvector{
|
||||
uintptr(bv.n),
|
||||
(*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8],
|
||||
}
|
||||
}
|
||||
|
||||
func ptrbit(bv *gobitvector, i uintptr) uint8 {
|
||||
return (bv.bytedata[i/8] >> (i % 8)) & 1
|
||||
// ptrbit returns the i'th bit in bv.
|
||||
// ptrbit is less efficient than iterating directly over bitvector bits,
|
||||
// and should only be used in non-performance-critical code.
|
||||
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
|
||||
func (bv *bitvector) ptrbit(i uintptr) uint8 {
|
||||
b := *(addb(bv.bytedata, i/8))
|
||||
return (b >> (i % 8)) & 1
|
||||
}
|
||||
|
||||
// bv describes the memory starting at address scanp.
|
||||
// Adjust any pointers contained therein.
|
||||
func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f funcInfo) {
|
||||
bv := gobv(*cbv)
|
||||
func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) {
|
||||
minp := adjinfo.old.lo
|
||||
maxp := adjinfo.old.hi
|
||||
delta := adjinfo.delta
|
||||
num := bv.n
|
||||
num := uintptr(bv.n)
|
||||
// If this frame might contain channel receive slots, use CAS
|
||||
// to adjust pointers. If the slot hasn't been received into
|
||||
// yet, it may contain stack pointers and a concurrent send
|
||||
// could race with adjusting those pointers. (The sent value
|
||||
// itself can never contain stack pointers.)
|
||||
useCAS := uintptr(scanp) < adjinfo.sghi
|
||||
for i := uintptr(0); i < num; i++ {
|
||||
for i := uintptr(0); i < num; i += 8 {
|
||||
if stackDebug >= 4 {
|
||||
print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n")
|
||||
}
|
||||
if ptrbit(&bv, i) != 1 {
|
||||
continue
|
||||
}
|
||||
pp := (*uintptr)(add(scanp, i*sys.PtrSize))
|
||||
retry:
|
||||
p := *pp
|
||||
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
|
||||
// Looks like a junk value in a pointer slot.
|
||||
// Live analysis wrong?
|
||||
getg().m.traceback = 2
|
||||
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
|
||||
throw("invalid pointer found on stack")
|
||||
}
|
||||
if minp <= p && p < maxp {
|
||||
if stackDebug >= 3 {
|
||||
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
|
||||
for j := uintptr(0); j < 8; j++ {
|
||||
print(" ", add(scanp, (i+j)*sys.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*sys.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n")
|
||||
}
|
||||
if useCAS {
|
||||
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
|
||||
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
|
||||
goto retry
|
||||
}
|
||||
b := *(addb(bv.bytedata, i/8))
|
||||
for b != 0 {
|
||||
j := uintptr(sys.Ctz8(b))
|
||||
b &= b - 1
|
||||
pp := (*uintptr)(add(scanp, (i+j)*sys.PtrSize))
|
||||
retry:
|
||||
p := *pp
|
||||
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
|
||||
// Looks like a junk value in a pointer slot.
|
||||
// Live analysis wrong?
|
||||
getg().m.traceback = 2
|
||||
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
|
||||
throw("invalid pointer found on stack")
|
||||
}
|
||||
if minp <= p && p < maxp {
|
||||
if stackDebug >= 3 {
|
||||
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
|
||||
}
|
||||
if useCAS {
|
||||
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
|
||||
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
|
||||
goto retry
|
||||
}
|
||||
} else {
|
||||
*pp = p + delta
|
||||
}
|
||||
} else {
|
||||
*pp = p + delta
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user