mirror of
https://github.com/golang/go
synced 2024-11-19 23:34:40 -07:00
2ac8bdc52a
The bitmap allocation data structure prototypes. Before this is released these underlying data structures need to be more performant but the signatures of helper functions utilizing these structures will remain stable. Change-Id: I5ace12f2fb512a7038a52bbde2bfb7e98783bcbe Reviewed-on: https://go-review.googlesource.com/19221 Reviewed-by: Austin Clements <austin@google.com> Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
263 lines
8.2 KiB
Go
263 lines
8.2 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Malloc small size classes.
|
|
//
|
|
// See malloc.go for overview.
|
|
//
|
|
// The size classes are chosen so that rounding an allocation
|
|
// request up to the next size class wastes at most 12.5% (1.125x).
|
|
//
|
|
// Each size class has its own page count that gets allocated
|
|
// and chopped up when new objects of the size class are needed.
|
|
// That page count is chosen so that chopping up the run of
|
|
// pages into objects of the given size wastes at most 12.5% (1.125x)
|
|
// of the memory. It is not necessary that the cutoff here be
|
|
// the same as above.
|
|
//
|
|
// The two sources of waste multiply, so the worst possible case
|
|
// for the above constraints would be that allocations of some
|
|
// size might have a 26.6% (1.266x) overhead.
|
|
// In practice, only one of the wastes comes into play for a
|
|
// given size (sizes < 512 waste mainly on the round-up,
|
|
// sizes > 512 waste mainly on the page chopping).
|
|
//
|
|
// TODO(rsc): Compute max waste for any given size.
|
|
|
|
package runtime
|
|
|
|
// Size classes. Computed and initialized by InitSizes.
|
|
//
|
|
// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
|
|
// 1 <= sizeclass < NumSizeClasses, for n.
|
|
// Size class 0 is reserved to mean "not small".
|
|
//
|
|
// class_to_size[i] = largest size in class i
|
|
// class_to_allocnpages[i] = number of pages to allocate when
|
|
// making new objects in class i
|
|
|
|
// The SizeToClass lookup is implemented using two arrays,
|
|
// one mapping sizes <= 1024 to their class and one mapping
|
|
// sizes >= 1024 and <= MaxSmallSize to their class.
|
|
// All objects are 8-aligned, so the first array is indexed by
|
|
// the size divided by 8 (rounded up). Objects >= 1024 bytes
|
|
// are 128-aligned, so the second array is indexed by the
|
|
// size divided by 128 (rounded up). The arrays are filled in
|
|
// by InitSizes.
|
|
|
|
var class_to_size [_NumSizeClasses]int32
|
|
var class_to_allocnpages [_NumSizeClasses]int32
|
|
var class_to_divmagic [_NumSizeClasses]divMagic
|
|
|
|
var size_to_class8 [1024/8 + 1]int8
|
|
var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
|
|
|
|
func sizeToClass(size int32) int32 {
|
|
if size > _MaxSmallSize {
|
|
throw("invalid size")
|
|
}
|
|
if size > 1024-8 {
|
|
return int32(size_to_class128[(size-1024+127)>>7])
|
|
}
|
|
return int32(size_to_class8[(size+7)>>3])
|
|
}
|
|
|
|
func initSizes() {
|
|
// Initialize the runtime·class_to_size table (and choose class sizes in the process).
|
|
class_to_size[0] = 0
|
|
sizeclass := 1 // 0 means no class
|
|
align := 8
|
|
for size := align; size <= _MaxSmallSize; size += align {
|
|
if size&(size-1) == 0 { // bump alignment once in a while
|
|
if size >= 2048 {
|
|
align = 256
|
|
} else if size >= 128 {
|
|
align = size / 8
|
|
} else if size >= 16 {
|
|
align = 16 // required for x86 SSE instructions, if we want to use them
|
|
}
|
|
}
|
|
if align&(align-1) != 0 {
|
|
throw("incorrect alignment")
|
|
}
|
|
|
|
// Make the allocnpages big enough that
|
|
// the leftover is less than 1/8 of the total,
|
|
// so wasted space is at most 12.5%.
|
|
allocsize := _PageSize
|
|
for allocsize%size > allocsize/8 {
|
|
allocsize += _PageSize
|
|
}
|
|
npages := allocsize >> _PageShift
|
|
|
|
// If the previous sizeclass chose the same
|
|
// allocation size and fit the same number of
|
|
// objects into the page, we might as well
|
|
// use just this size instead of having two
|
|
// different sizes.
|
|
if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
|
|
class_to_size[sizeclass-1] = int32(size)
|
|
continue
|
|
}
|
|
|
|
class_to_allocnpages[sizeclass] = int32(npages)
|
|
class_to_size[sizeclass] = int32(size)
|
|
sizeclass++
|
|
}
|
|
if sizeclass != _NumSizeClasses {
|
|
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
|
|
throw("bad NumSizeClasses")
|
|
}
|
|
// Check maxObjsPerSpan => number of objects invariant.
|
|
for i, size := range class_to_size {
|
|
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
|
|
throw("span contains too many objects")
|
|
}
|
|
if size == 0 && i != 0 {
|
|
throw("size is 0 but class is not 0")
|
|
}
|
|
}
|
|
// Initialize the size_to_class tables.
|
|
nextsize := 0
|
|
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
|
|
for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
|
|
size_to_class8[nextsize/8] = int8(sizeclass)
|
|
}
|
|
if nextsize >= 1024 {
|
|
for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
|
|
size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Double-check SizeToClass.
|
|
if false {
|
|
for n := int32(0); n < _MaxSmallSize; n++ {
|
|
sizeclass := sizeToClass(n)
|
|
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
|
|
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
|
|
print("incorrect SizeToClass\n")
|
|
goto dump
|
|
}
|
|
if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
|
|
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
|
|
print("SizeToClass too big\n")
|
|
goto dump
|
|
}
|
|
}
|
|
}
|
|
|
|
testdefersizes()
|
|
|
|
// Copy out for statistics table.
|
|
for i := 0; i < len(class_to_size); i++ {
|
|
memstats.by_size[i].size = uint32(class_to_size[i])
|
|
}
|
|
|
|
for i := 1; i < len(class_to_size); i++ {
|
|
class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
|
|
}
|
|
|
|
return
|
|
|
|
dump:
|
|
if true {
|
|
print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
|
|
print("runtime·class_to_size:")
|
|
for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
|
|
print(" ", class_to_size[sizeclass], "")
|
|
}
|
|
print("\n\n")
|
|
print("runtime: size_to_class8:")
|
|
for i := 0; i < len(size_to_class8); i++ {
|
|
print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
|
|
}
|
|
print("\n")
|
|
print("runtime: size_to_class128:")
|
|
for i := 0; i < len(size_to_class128); i++ {
|
|
print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
|
|
}
|
|
print("\n")
|
|
}
|
|
throw("InitSizes failed")
|
|
}
|
|
|
|
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
|
|
func roundupsize(size uintptr) uintptr {
|
|
if size < _MaxSmallSize {
|
|
if size <= 1024-8 {
|
|
return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
|
|
} else {
|
|
return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
|
|
}
|
|
}
|
|
if size+_PageSize < size {
|
|
return size
|
|
}
|
|
return round(size, _PageSize)
|
|
}
|
|
|
|
// divMagic holds magic constants to implement division
|
|
// by a particular constant as a shift, multiply, and shift.
|
|
// That is, given
|
|
// m = computeMagic(d)
|
|
// then
|
|
// n/d == ((n>>m.shift) * m.mul) >> m.shift2
|
|
//
|
|
// The magic computation picks m such that
|
|
// d = d₁*d₂
|
|
// d₂= 2^m.shift
|
|
// m.mul = ⌈2^m.shift2 / d₁⌉
|
|
//
|
|
// The magic computation here is tailored for malloc block sizes
|
|
// and does not handle arbitrary d correctly. Malloc block sizes d are
|
|
// always even, so the first shift implements the factors of 2 in d
|
|
// and then the mul and second shift implement the odd factor
|
|
// that remains. Because the first shift divides n by at least 2 (actually 8)
|
|
// before the multiply gets involved, the huge corner cases that
|
|
// require additional adjustment are impossible, so the usual
|
|
// fixup is not needed.
|
|
//
|
|
// For more details see Hacker's Delight, Chapter 10, and
|
|
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
|
|
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
|
|
type divMagic struct {
|
|
shift uint8
|
|
mul uint32
|
|
shift2 uint8
|
|
baseMask uintptr
|
|
}
|
|
|
|
func computeDivMagic(d uint32) divMagic {
|
|
var m divMagic
|
|
|
|
// If the size is a power of two, heapBitsForObject can divide even faster by masking.
|
|
// Compute this mask.
|
|
if d&(d-1) == 0 {
|
|
// It is a power of 2 (assuming dinptr != 1)
|
|
m.baseMask = ^(uintptr(d) - 1)
|
|
} else {
|
|
m.baseMask = 0
|
|
}
|
|
|
|
// Compute pre-shift by factoring power of 2 out of d.
|
|
for d&1 == 0 {
|
|
m.shift++
|
|
d >>= 1
|
|
}
|
|
|
|
// Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int.
|
|
// This is always a good enough approximation.
|
|
// We could use smaller k for some divisors but there's no point.
|
|
k := uint8(63)
|
|
d64 := uint64(d)
|
|
for ((1<<k)+d64-1)/d64 >= 1<<32 {
|
|
k--
|
|
}
|
|
m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉
|
|
m.shift2 = k
|
|
|
|
return m
|
|
}
|