mirror of
https://github.com/golang/go
synced 2024-11-12 09:50:21 -07:00
runtime: iterate over set bits in adjustpointers
There are several things combined in this change. First, eliminate the gobitvector type in favor of adding a ptrbit method to bitvector. In non-performance-critical code, use that method. In performance critical code, though, load the bitvector data one byte at a time and iterate only over set bits. To support that, add and use sys.Ctz8. name old time/op new time/op delta StackCopyPtr-8 81.8ms ± 5% 78.9ms ± 3% -3.58% (p=0.000 n=97+96) StackCopy-8 65.9ms ± 3% 62.8ms ± 3% -4.67% (p=0.000 n=96+92) StackCopyNoCache-8 105ms ± 3% 102ms ± 3% -3.38% (p=0.000 n=96+95) Change-Id: I00b80f45612708bd440b1a411a57fa6dfa24aa74 Reviewed-on: https://go-review.googlesource.com/109716 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
13cd006139
commit
5af0b28a73
@ -2980,6 +2980,8 @@ func init() {
|
|||||||
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
|
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
|
||||||
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
|
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
|
||||||
|
|
||||||
|
alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
|
||||||
|
|
||||||
/******** math ********/
|
/******** math ********/
|
||||||
addF("math", "Sqrt",
|
addF("math", "Sqrt",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
@ -233,9 +233,8 @@ type childInfo struct {
|
|||||||
|
|
||||||
// dump kinds & offsets of interesting fields in bv
|
// dump kinds & offsets of interesting fields in bv
|
||||||
func dumpbv(cbv *bitvector, offset uintptr) {
|
func dumpbv(cbv *bitvector, offset uintptr) {
|
||||||
bv := gobv(*cbv)
|
for i := uintptr(0); i < uintptr(cbv.n); i++ {
|
||||||
for i := uintptr(0); i < bv.n; i++ {
|
if cbv.ptrbit(i) == 1 {
|
||||||
if bv.bytedata[i/8]>>(i%8)&1 == 1 {
|
|
||||||
dumpint(fieldKindPtr)
|
dumpint(fieldKindPtr)
|
||||||
dumpint(uint64(offset + i*sys.PtrSize))
|
dumpint(uint64(offset + i*sys.PtrSize))
|
||||||
}
|
}
|
||||||
|
@ -50,6 +50,30 @@ func Ctz32(x uint32) int {
|
|||||||
return i + z
|
return i + z
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
||||||
|
func Ctz8(x uint8) int {
|
||||||
|
return int(ntz8tab[x])
|
||||||
|
}
|
||||||
|
|
||||||
|
var ntz8tab = [256]uint8{
|
||||||
|
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
|
||||||
|
}
|
||||||
|
|
||||||
// Bswap64 returns its input with byte order reversed
|
// Bswap64 returns its input with byte order reversed
|
||||||
// 0x0102030405060708 -> 0x0807060504030201
|
// 0x0102030405060708 -> 0x0807060504030201
|
||||||
func Bswap64(x uint64) uint64 {
|
func Bswap64(x uint64) uint64 {
|
||||||
|
@ -34,6 +34,14 @@ TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
|
|||||||
MOVL AX, ret+4(FP)
|
MOVL AX, ret+4(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
|
||||||
|
MOVBLZX x+0(FP), AX
|
||||||
|
BSFL AX, AX
|
||||||
|
JNZ 2(PC)
|
||||||
|
MOVL $8, AX
|
||||||
|
MOVL AX, ret+4(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
|
TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
|
||||||
MOVL x_lo+0(FP), AX
|
MOVL x_lo+0(FP), AX
|
||||||
MOVL x_hi+4(FP), BX
|
MOVL x_hi+4(FP), BX
|
||||||
|
@ -8,5 +8,6 @@ package sys
|
|||||||
|
|
||||||
func Ctz64(x uint64) int
|
func Ctz64(x uint64) int
|
||||||
func Ctz32(x uint32) int
|
func Ctz32(x uint32) int
|
||||||
|
func Ctz8(x uint8) int
|
||||||
func Bswap64(x uint64) uint64
|
func Bswap64(x uint64) uint64
|
||||||
func Bswap32(x uint32) uint32
|
func Bswap32(x uint32) uint32
|
||||||
|
@ -2021,9 +2021,8 @@ func getgcmask(ep interface{}) (mask []byte) {
|
|||||||
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
|
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
|
||||||
mask = make([]byte, n/sys.PtrSize)
|
mask = make([]byte, n/sys.PtrSize)
|
||||||
for i := uintptr(0); i < n; i += sys.PtrSize {
|
for i := uintptr(0); i < n; i += sys.PtrSize {
|
||||||
bitmap := bv.bytedata
|
|
||||||
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
|
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
|
||||||
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
|
mask[i/sys.PtrSize] = bv.ptrbit(off)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
@ -544,64 +544,60 @@ type bitvector struct {
|
|||||||
bytedata *uint8
|
bytedata *uint8
|
||||||
}
|
}
|
||||||
|
|
||||||
type gobitvector struct {
|
// ptrbit returns the i'th bit in bv.
|
||||||
n uintptr
|
// ptrbit is less efficient than iterating directly over bitvector bits,
|
||||||
bytedata []uint8
|
// and should only be used in non-performance-critical code.
|
||||||
}
|
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
|
||||||
|
func (bv *bitvector) ptrbit(i uintptr) uint8 {
|
||||||
func gobv(bv bitvector) gobitvector {
|
b := *(addb(bv.bytedata, i/8))
|
||||||
return gobitvector{
|
return (b >> (i % 8)) & 1
|
||||||
uintptr(bv.n),
|
|
||||||
(*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ptrbit(bv *gobitvector, i uintptr) uint8 {
|
|
||||||
return (bv.bytedata[i/8] >> (i % 8)) & 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// bv describes the memory starting at address scanp.
|
// bv describes the memory starting at address scanp.
|
||||||
// Adjust any pointers contained therein.
|
// Adjust any pointers contained therein.
|
||||||
func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f funcInfo) {
|
func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) {
|
||||||
bv := gobv(*cbv)
|
|
||||||
minp := adjinfo.old.lo
|
minp := adjinfo.old.lo
|
||||||
maxp := adjinfo.old.hi
|
maxp := adjinfo.old.hi
|
||||||
delta := adjinfo.delta
|
delta := adjinfo.delta
|
||||||
num := bv.n
|
num := uintptr(bv.n)
|
||||||
// If this frame might contain channel receive slots, use CAS
|
// If this frame might contain channel receive slots, use CAS
|
||||||
// to adjust pointers. If the slot hasn't been received into
|
// to adjust pointers. If the slot hasn't been received into
|
||||||
// yet, it may contain stack pointers and a concurrent send
|
// yet, it may contain stack pointers and a concurrent send
|
||||||
// could race with adjusting those pointers. (The sent value
|
// could race with adjusting those pointers. (The sent value
|
||||||
// itself can never contain stack pointers.)
|
// itself can never contain stack pointers.)
|
||||||
useCAS := uintptr(scanp) < adjinfo.sghi
|
useCAS := uintptr(scanp) < adjinfo.sghi
|
||||||
for i := uintptr(0); i < num; i++ {
|
for i := uintptr(0); i < num; i += 8 {
|
||||||
if stackDebug >= 4 {
|
if stackDebug >= 4 {
|
||||||
print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n")
|
for j := uintptr(0); j < 8; j++ {
|
||||||
}
|
print(" ", add(scanp, (i+j)*sys.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*sys.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n")
|
||||||
if ptrbit(&bv, i) != 1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pp := (*uintptr)(add(scanp, i*sys.PtrSize))
|
|
||||||
retry:
|
|
||||||
p := *pp
|
|
||||||
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
|
|
||||||
// Looks like a junk value in a pointer slot.
|
|
||||||
// Live analysis wrong?
|
|
||||||
getg().m.traceback = 2
|
|
||||||
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
|
|
||||||
throw("invalid pointer found on stack")
|
|
||||||
}
|
|
||||||
if minp <= p && p < maxp {
|
|
||||||
if stackDebug >= 3 {
|
|
||||||
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
|
|
||||||
}
|
}
|
||||||
if useCAS {
|
}
|
||||||
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
|
b := *(addb(bv.bytedata, i/8))
|
||||||
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
|
for b != 0 {
|
||||||
goto retry
|
j := uintptr(sys.Ctz8(b))
|
||||||
|
b &= b - 1
|
||||||
|
pp := (*uintptr)(add(scanp, (i+j)*sys.PtrSize))
|
||||||
|
retry:
|
||||||
|
p := *pp
|
||||||
|
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
|
||||||
|
// Looks like a junk value in a pointer slot.
|
||||||
|
// Live analysis wrong?
|
||||||
|
getg().m.traceback = 2
|
||||||
|
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
|
||||||
|
throw("invalid pointer found on stack")
|
||||||
|
}
|
||||||
|
if minp <= p && p < maxp {
|
||||||
|
if stackDebug >= 3 {
|
||||||
|
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
|
||||||
|
}
|
||||||
|
if useCAS {
|
||||||
|
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
|
||||||
|
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
|
||||||
|
goto retry
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*pp = p + delta
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
*pp = p + delta
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user