From 44fe90d0b393c961e3fb1b4c37e93ce268da46bc Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Wed, 17 Feb 2016 11:27:52 -0500 Subject: [PATCH] [dev.garbage] runtime: logic that uses count trailing zero (ctz) Most (all?) processors that Go supports supply a hardware instruction that takes a byte and returns the number of zeros trailing the first 1 encountered, or 8 if no ones are found. This is the index within the byte of the first 1 encountered. CTZ should improve the performance of the nextFreeIndex function. Since nextFreeIndex wants the next unmarked (0) bit a bit-wise complement is needed before calling ctz. Furthermore unmarked bits associated with previously allocated objects need to be ignored. Instead of writing a 1 as we allocate the code masks all bits less than the freeindex after loading the byte. While this CL does not actual execute a CTZ instruction it supplies a ctz function with the appropiate signature along with the logic to execute it. Change-Id: I5c55ce0ed48ca22c21c4dd9f969b0819b4eadaa7 Reviewed-on: https://go-review.googlesource.com/20169 Reviewed-by: Keith Randall Reviewed-by: Austin Clements --- src/runtime/mbitmap.go | 71 ++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 10446fee42..f02558bed0 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -186,12 +186,22 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { return markBits{&s.allocBits[whichByte], uint8(1 << whichBit), allocBitIndex} } +// A temporary stand in for the count trailing zero ctz instruction. +func ctz(markBits byte) uint8 { + tz := uint8(0) // trailing zero count. + if markBits == 0 { + return 8 // 8 + } + for mask := byte(1); mask&markBits == 0; mask, tz = mask<<1, tz+1 { + } + return tz +} + // nextFreeIndex returns the index of the next free object in s at or // after the index'th object. // There are hardware instructions that can be used to make this // faster if profiling warrants it. func (s *mspan) nextFreeIndex(index uintptr) uintptr { - var mask uint8 if index == s.nelems { return index } @@ -200,47 +210,34 @@ func (s *mspan) nextFreeIndex(index uintptr) uintptr { } whichByte := index / 8 theByte := s.allocBits[whichByte] - // Optimize for the first byte holding a free object. - if theByte != 0xff { - mask = 1 << (index % 8) - for index < s.nelems { - if mask&theByte == 0 { - return index - } - if mask == 1<<7 { - break - } - mask = mask << 1 - index++ - } - } - maxByteIndex := (s.nelems - 1) / 8 - theByte = 0xff // Free bit not found in this byte above so set to 0xff. - // If there was a 0 bit before incoming index then the byte would not be 0xff. - for theByte == 0xff { - whichByte++ - if whichByte > maxByteIndex { + + theBitMask := uint8(1<<(index%8) - 1) + // theBitMask holds a 1 for every bit < index which have already been allocated. + // Flip the masked marked bits so 1 means a free bit. + theByte = ^(theByte | theBitMask) + tz := ctz(theByte) + if tz != 8 { + result := uintptr(tz) + whichByte*8 + if result >= s.nelems { return s.nelems } - if uintptr(len(s.allocBits)) <= whichByte { - throw("whichByte > len(s.allocBits") - } - theByte = s.allocBits[whichByte] + return result } - index = whichByte * 8 - mask = uint8(1) - - for index < s.nelems { - if mask&theByte == 0 { - return index + whichByte++ + index = (whichByte) * 8 + for ; index < s.nelems; index += 8 { + theByte = ^s.allocBits[whichByte] + tz = ctz(theByte) + if tz != 8 { + result := uintptr(tz) + whichByte*8 + if result >= s.nelems { + return s.nelems + } + return result } - if mask == 1<<7 { - break - } - mask = mask << 1 - index++ + whichByte++ } - return index + return s.nelems } func (s *mspan) isFree(index uintptr) bool {