mirror of
https://github.com/golang/go
synced 2024-11-12 06:30:21 -07:00
[dev.garbage] runtime: logic that uses count trailing zero (ctz)
Most (all?) processors that Go supports supply a hardware instruction that takes a byte and returns the number of zeros trailing the first 1 encountered, or 8 if no ones are found. This is the index within the byte of the first 1 encountered. CTZ should improve the performance of the nextFreeIndex function. Since nextFreeIndex wants the next unmarked (0) bit a bit-wise complement is needed before calling ctz. Furthermore unmarked bits associated with previously allocated objects need to be ignored. Instead of writing a 1 as we allocate the code masks all bits less than the freeindex after loading the byte. While this CL does not actual execute a CTZ instruction it supplies a ctz function with the appropiate signature along with the logic to execute it. Change-Id: I5c55ce0ed48ca22c21c4dd9f969b0819b4eadaa7 Reviewed-on: https://go-review.googlesource.com/20169 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
e4ac2d4acc
commit
44fe90d0b3
@ -186,12 +186,22 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
|
|||||||
return markBits{&s.allocBits[whichByte], uint8(1 << whichBit), allocBitIndex}
|
return markBits{&s.allocBits[whichByte], uint8(1 << whichBit), allocBitIndex}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A temporary stand in for the count trailing zero ctz instruction.
|
||||||
|
func ctz(markBits byte) uint8 {
|
||||||
|
tz := uint8(0) // trailing zero count.
|
||||||
|
if markBits == 0 {
|
||||||
|
return 8 // 8
|
||||||
|
}
|
||||||
|
for mask := byte(1); mask&markBits == 0; mask, tz = mask<<1, tz+1 {
|
||||||
|
}
|
||||||
|
return tz
|
||||||
|
}
|
||||||
|
|
||||||
// nextFreeIndex returns the index of the next free object in s at or
|
// nextFreeIndex returns the index of the next free object in s at or
|
||||||
// after the index'th object.
|
// after the index'th object.
|
||||||
// There are hardware instructions that can be used to make this
|
// There are hardware instructions that can be used to make this
|
||||||
// faster if profiling warrants it.
|
// faster if profiling warrants it.
|
||||||
func (s *mspan) nextFreeIndex(index uintptr) uintptr {
|
func (s *mspan) nextFreeIndex(index uintptr) uintptr {
|
||||||
var mask uint8
|
|
||||||
if index == s.nelems {
|
if index == s.nelems {
|
||||||
return index
|
return index
|
||||||
}
|
}
|
||||||
@ -200,47 +210,34 @@ func (s *mspan) nextFreeIndex(index uintptr) uintptr {
|
|||||||
}
|
}
|
||||||
whichByte := index / 8
|
whichByte := index / 8
|
||||||
theByte := s.allocBits[whichByte]
|
theByte := s.allocBits[whichByte]
|
||||||
// Optimize for the first byte holding a free object.
|
|
||||||
if theByte != 0xff {
|
theBitMask := uint8(1<<(index%8) - 1)
|
||||||
mask = 1 << (index % 8)
|
// theBitMask holds a 1 for every bit < index which have already been allocated.
|
||||||
for index < s.nelems {
|
// Flip the masked marked bits so 1 means a free bit.
|
||||||
if mask&theByte == 0 {
|
theByte = ^(theByte | theBitMask)
|
||||||
return index
|
tz := ctz(theByte)
|
||||||
}
|
if tz != 8 {
|
||||||
if mask == 1<<7 {
|
result := uintptr(tz) + whichByte*8
|
||||||
break
|
if result >= s.nelems {
|
||||||
}
|
|
||||||
mask = mask << 1
|
|
||||||
index++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxByteIndex := (s.nelems - 1) / 8
|
|
||||||
theByte = 0xff // Free bit not found in this byte above so set to 0xff.
|
|
||||||
// If there was a 0 bit before incoming index then the byte would not be 0xff.
|
|
||||||
for theByte == 0xff {
|
|
||||||
whichByte++
|
|
||||||
if whichByte > maxByteIndex {
|
|
||||||
return s.nelems
|
return s.nelems
|
||||||
}
|
}
|
||||||
if uintptr(len(s.allocBits)) <= whichByte {
|
return result
|
||||||
throw("whichByte > len(s.allocBits")
|
|
||||||
}
|
|
||||||
theByte = s.allocBits[whichByte]
|
|
||||||
}
|
}
|
||||||
index = whichByte * 8
|
whichByte++
|
||||||
mask = uint8(1)
|
index = (whichByte) * 8
|
||||||
|
for ; index < s.nelems; index += 8 {
|
||||||
for index < s.nelems {
|
theByte = ^s.allocBits[whichByte]
|
||||||
if mask&theByte == 0 {
|
tz = ctz(theByte)
|
||||||
return index
|
if tz != 8 {
|
||||||
|
result := uintptr(tz) + whichByte*8
|
||||||
|
if result >= s.nelems {
|
||||||
|
return s.nelems
|
||||||
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
if mask == 1<<7 {
|
whichByte++
|
||||||
break
|
|
||||||
}
|
|
||||||
mask = mask << 1
|
|
||||||
index++
|
|
||||||
}
|
}
|
||||||
return index
|
return s.nelems
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mspan) isFree(index uintptr) bool {
|
func (s *mspan) isFree(index uintptr) bool {
|
||||||
|
Loading…
Reference in New Issue
Block a user