2014-07-30 10:01:52 -06:00
|
|
|
// Copyright 2014 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
// Memory allocator, based on tcmalloc.
|
|
|
|
// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
|
|
|
|
|
|
|
|
// The main allocator works in runs of pages.
|
|
|
|
// Small allocation sizes (up to and including 32 kB) are
|
|
|
|
// rounded to one of about 100 size classes, each of which
|
|
|
|
// has its own free list of objects of exactly that size.
|
|
|
|
// Any free page of memory can be split into a set of objects
|
|
|
|
// of one size class, which are then managed using free list
|
|
|
|
// allocators.
|
|
|
|
//
|
|
|
|
// The allocator's data structures are:
|
|
|
|
//
|
|
|
|
// FixAlloc: a free-list allocator for fixed-size objects,
|
|
|
|
// used to manage storage used by the allocator.
|
|
|
|
// MHeap: the malloc heap, managed at page (4096-byte) granularity.
|
|
|
|
// MSpan: a run of pages managed by the MHeap.
|
|
|
|
// MCentral: a shared free list for a given size class.
|
|
|
|
// MCache: a per-thread (in Go, per-P) cache for small objects.
|
|
|
|
// MStats: allocation statistics.
|
|
|
|
//
|
|
|
|
// Allocating a small object proceeds up a hierarchy of caches:
|
|
|
|
//
|
|
|
|
// 1. Round the size up to one of the small size classes
|
|
|
|
// and look in the corresponding MCache free list.
|
|
|
|
// If the list is not empty, allocate an object from it.
|
|
|
|
// This can all be done without acquiring a lock.
|
|
|
|
//
|
|
|
|
// 2. If the MCache free list is empty, replenish it by
|
|
|
|
// taking a bunch of objects from the MCentral free list.
|
|
|
|
// Moving a bunch amortizes the cost of acquiring the MCentral lock.
|
|
|
|
//
|
|
|
|
// 3. If the MCentral free list is empty, replenish it by
|
|
|
|
// allocating a run of pages from the MHeap and then
|
|
|
|
// chopping that memory into objects of the given size.
|
|
|
|
// Allocating many objects amortizes the cost of locking
|
|
|
|
// the heap.
|
|
|
|
//
|
|
|
|
// 4. If the MHeap is empty or has no page runs large enough,
|
|
|
|
// allocate a new group of pages (at least 1MB) from the
|
|
|
|
// operating system. Allocating a large run of pages
|
|
|
|
// amortizes the cost of talking to the operating system.
|
|
|
|
//
|
|
|
|
// Freeing a small object proceeds up the same hierarchy:
|
|
|
|
//
|
|
|
|
// 1. Look up the size class for the object and add it to
|
|
|
|
// the MCache free list.
|
|
|
|
//
|
|
|
|
// 2. If the MCache free list is too long or the MCache has
|
|
|
|
// too much memory, return some to the MCentral free lists.
|
|
|
|
//
|
|
|
|
// 3. If all the objects in a given span have returned to
|
|
|
|
// the MCentral list, return that span to the page heap.
|
|
|
|
//
|
|
|
|
// 4. If the heap has too much memory, return some to the
|
|
|
|
// operating system.
|
|
|
|
//
|
|
|
|
// TODO(rsc): Step 4 is not implemented.
|
|
|
|
//
|
|
|
|
// Allocating and freeing a large object uses the page heap
|
|
|
|
// directly, bypassing the MCache and MCentral free lists.
|
|
|
|
//
|
|
|
|
// The small objects on the MCache and MCentral free lists
|
|
|
|
// may or may not be zeroed. They are zeroed if and only if
|
|
|
|
// the second word of the object is zero. A span in the
|
|
|
|
// page heap is zeroed unless s->needzero is set. When a span
|
|
|
|
// is allocated to break into small objects, it is zeroed if needed
|
|
|
|
// and s->needzero is set. There are two main benefits to delaying the
|
|
|
|
// zeroing this way:
|
|
|
|
//
|
|
|
|
// 1. stack frames allocated from the small object lists
|
|
|
|
// or the page heap can avoid zeroing altogether.
|
|
|
|
// 2. the cost of zeroing when reusing a small object is
|
|
|
|
// charged to the mutator, not the garbage collector.
|
|
|
|
//
|
|
|
|
// This code was written with an eye toward translating to Go
|
|
|
|
// in the future. Methods have the form Type_Method(Type *t, ...).
|
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
package runtime
|
|
|
|
|
2014-08-15 13:22:33 -06:00
|
|
|
import "unsafe"
|
2014-07-30 10:01:52 -06:00
|
|
|
|
|
|
|
const (
|
2014-08-07 03:34:30 -06:00
|
|
|
debugMalloc = false
|
|
|
|
|
2014-09-16 08:22:15 -06:00
|
|
|
flagNoScan = _FlagNoScan
|
|
|
|
flagNoZero = _FlagNoZero
|
2014-07-30 10:01:52 -06:00
|
|
|
|
2014-09-16 08:22:15 -06:00
|
|
|
maxTinySize = _TinySize
|
|
|
|
tinySizeClass = _TinySizeClass
|
|
|
|
maxSmallSize = _MaxSmallSize
|
2014-07-30 10:01:52 -06:00
|
|
|
|
2014-09-16 08:22:15 -06:00
|
|
|
pageShift = _PageShift
|
|
|
|
pageSize = _PageSize
|
|
|
|
pageMask = _PageMask
|
2014-08-07 03:34:30 -06:00
|
|
|
|
2014-09-16 08:22:15 -06:00
|
|
|
mSpanInUse = _MSpanInUse
|
2014-08-28 14:23:10 -06:00
|
|
|
|
2014-11-11 15:05:02 -07:00
|
|
|
concurrentSweep = _ConcurrentSweep
|
2014-07-30 10:01:52 -06:00
|
|
|
)
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
const (
|
|
|
|
_PageShift = 13
|
|
|
|
_PageSize = 1 << _PageShift
|
|
|
|
_PageMask = _PageSize - 1
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
|
|
|
|
_64bit = 1 << (^uintptr(0) >> 63) / 2
|
|
|
|
|
|
|
|
// Computed constant. The definition of MaxSmallSize and the
|
2015-03-11 13:58:47 -06:00
|
|
|
// algorithm in msize.go produces some number of different allocation
|
2015-02-19 11:38:46 -07:00
|
|
|
// size classes. NumSizeClasses is that number. It's needed here
|
|
|
|
// because there are static arrays of this length; when msize runs its
|
|
|
|
// size choosing algorithm it double-checks that NumSizeClasses agrees.
|
|
|
|
_NumSizeClasses = 67
|
|
|
|
|
|
|
|
// Tunable constants.
|
|
|
|
_MaxSmallSize = 32 << 10
|
|
|
|
|
|
|
|
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
|
|
|
|
_TinySize = 16
|
|
|
|
_TinySizeClass = 2
|
|
|
|
|
|
|
|
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
|
|
|
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
|
|
|
|
_HeapAllocChunk = 1 << 20 // Chunk size for heap growth
|
|
|
|
|
|
|
|
// Per-P, per order stack segment cache size.
|
|
|
|
_StackCacheSize = 32 * 1024
|
|
|
|
|
|
|
|
// Number of orders that get caching. Order 0 is FixedStack
|
|
|
|
// and each successive order is twice as large.
|
|
|
|
// We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
|
|
|
|
// will be allocated directly.
|
|
|
|
// Since FixedStack is different on different systems, we
|
|
|
|
// must vary NumStackOrders to keep the same maximum cached size.
|
|
|
|
// OS | FixedStack | NumStackOrders
|
|
|
|
// -----------------+------------+---------------
|
|
|
|
// linux/darwin/bsd | 2KB | 4
|
|
|
|
// windows/32 | 4KB | 3
|
|
|
|
// windows/64 | 8KB | 2
|
|
|
|
// plan9 | 4KB | 3
|
|
|
|
_NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
|
|
|
|
|
|
|
|
// Number of bits in page to span calculations (4k pages).
|
|
|
|
// On Windows 64-bit we limit the arena to 32GB or 35 bits.
|
|
|
|
// Windows counts memory used by page table into committed memory
|
|
|
|
// of the process, so we can't reserve too much memory.
|
2015-07-10 17:17:11 -06:00
|
|
|
// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
|
2015-06-07 22:14:08 -06:00
|
|
|
// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
|
2015-02-19 11:38:46 -07:00
|
|
|
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
|
2015-04-10 20:14:43 -06:00
|
|
|
// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
|
|
|
|
// but as most devices have less than 4GB of physical memory anyway, we
|
|
|
|
// try to be conservative here, and only ask for a 2GB heap.
|
2015-06-07 22:14:08 -06:00
|
|
|
_MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows)*(1-goos_darwin*goarch_arm64))*39 + goos_darwin*goarch_arm64*31 + (1-_64bit)*32
|
2015-02-19 11:38:46 -07:00
|
|
|
_MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
|
|
|
|
|
|
|
|
_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
|
|
|
|
|
|
|
|
// Max number of threads to run garbage collection.
|
|
|
|
// 2, 3, and 4 are all plausible maximums depending
|
|
|
|
// on the hardware details of the machine. The garbage
|
|
|
|
// collector scales well to 32 cpus.
|
|
|
|
_MaxGcproc = 32
|
|
|
|
)
|
|
|
|
|
cmd/cc, runtime: preserve C runtime type names in generated Go
uintptr or uint64 in the runtime C were turning into uint in the Go,
bool was turning into uint8, and so on. Fix that.
Also delete Go wrappers for C functions.
The C functions can be called directly now
(but still eventually need to be converted to Go).
LGTM=bradfitz, minux, iant
R=golang-codereviews, bradfitz, iant, minux
CC=golang-codereviews, khr, r
https://golang.org/cl/138740043
2014-08-27 19:59:49 -06:00
|
|
|
// Page number (address>>pageShift)
|
|
|
|
type pageID uintptr
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
const _MaxArena32 = 2 << 30
|
|
|
|
|
|
|
|
// OS-defined helpers:
|
|
|
|
//
|
|
|
|
// sysAlloc obtains a large chunk of zeroed memory from the
|
|
|
|
// operating system, typically on the order of a hundred kilobytes
|
|
|
|
// or a megabyte.
|
|
|
|
// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
|
|
|
|
// may use larger alignment, so the caller must be careful to realign the
|
|
|
|
// memory obtained by sysAlloc.
|
|
|
|
//
|
|
|
|
// SysUnused notifies the operating system that the contents
|
|
|
|
// of the memory region are no longer needed and can be reused
|
|
|
|
// for other purposes.
|
|
|
|
// SysUsed notifies the operating system that the contents
|
|
|
|
// of the memory region are needed again.
|
|
|
|
//
|
|
|
|
// SysFree returns it unconditionally; this is only used if
|
|
|
|
// an out-of-memory error has been detected midway through
|
|
|
|
// an allocation. It is okay if SysFree is a no-op.
|
|
|
|
//
|
|
|
|
// SysReserve reserves address space without allocating memory.
|
|
|
|
// If the pointer passed to it is non-nil, the caller wants the
|
|
|
|
// reservation there, but SysReserve can still choose another
|
|
|
|
// location if that one is unavailable. On some systems and in some
|
|
|
|
// cases SysReserve will simply check that the address space is
|
|
|
|
// available and not actually reserve it. If SysReserve returns
|
|
|
|
// non-nil, it sets *reserved to true if the address space is
|
|
|
|
// reserved, false if it has merely been checked.
|
|
|
|
// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
|
|
|
|
// may use larger alignment, so the caller must be careful to realign the
|
|
|
|
// memory obtained by sysAlloc.
|
|
|
|
//
|
|
|
|
// SysMap maps previously reserved address space for use.
|
|
|
|
// The reserved argument is true if the address space was really
|
|
|
|
// reserved, not merely checked.
|
|
|
|
//
|
|
|
|
// SysFault marks a (already sysAlloc'd) region to fault
|
|
|
|
// if accessed. Used only for debugging the runtime.
|
|
|
|
|
|
|
|
func mallocinit() {
|
|
|
|
initSizes()
|
|
|
|
|
|
|
|
if class_to_size[_TinySizeClass] != _TinySize {
|
|
|
|
throw("bad TinySizeClass")
|
|
|
|
}
|
|
|
|
|
|
|
|
var p, bitmapSize, spansSize, pSize, limit uintptr
|
|
|
|
var reserved bool
|
|
|
|
|
|
|
|
// limit = runtime.memlimit();
|
|
|
|
// See https://golang.org/issue/5049
|
|
|
|
// TODO(rsc): Fix after 1.1.
|
|
|
|
limit = 0
|
|
|
|
|
|
|
|
// Set up the allocation arena, a contiguous area of memory where
|
|
|
|
// allocated data will be found. The arena begins with a bitmap large
|
|
|
|
// enough to hold 4 bits per allocated word.
|
|
|
|
if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
|
|
|
|
// On a 64-bit machine, allocate from a single contiguous reservation.
|
2015-06-07 22:14:08 -06:00
|
|
|
// 512 GB (MaxMem) should be big enough for now.
|
2015-02-19 11:38:46 -07:00
|
|
|
//
|
|
|
|
// The code will work with the reservation at any address, but ask
|
|
|
|
// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
|
2015-06-07 22:14:08 -06:00
|
|
|
// Allocating a 512 GB region takes away 39 bits, and the amd64
|
|
|
|
// doesn't let us choose the top 17 bits, so that leaves the 9 bits
|
2015-02-19 11:38:46 -07:00
|
|
|
// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
|
|
|
|
// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
|
|
|
|
// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
|
|
|
|
// UTF-8 sequences, and they are otherwise as far away from
|
|
|
|
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
|
|
|
|
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
|
|
|
|
// on OS X during thread allocations. 0x00c0 causes conflicts with
|
|
|
|
// AddressSanitizer which reserves all memory up to 0x0100.
|
|
|
|
// These choices are both for debuggability and to reduce the
|
2015-06-07 22:14:08 -06:00
|
|
|
// odds of a conservative garbage collector (as is still used in gccgo)
|
|
|
|
// not collecting memory because some non-pointer block of memory
|
|
|
|
// had a bit pattern that matched a memory address.
|
2015-02-19 11:38:46 -07:00
|
|
|
//
|
2015-06-07 22:14:08 -06:00
|
|
|
// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
|
2015-02-19 11:38:46 -07:00
|
|
|
// but it hardly matters: e0 00 is not valid UTF-8 either.
|
|
|
|
//
|
|
|
|
// If this fails we fall back to the 32 bit memory mechanism
|
2015-03-08 07:20:20 -06:00
|
|
|
//
|
|
|
|
// However, on arm64, we ignore all this advice above and slam the
|
|
|
|
// allocation at 0x40 << 32 because when using 4k pages with 3-level
|
|
|
|
// translation buffers, the user address space is limited to 39 bits
|
2015-04-10 20:14:43 -06:00
|
|
|
// On darwin/arm64, the address space is even smaller.
|
2015-02-19 11:38:46 -07:00
|
|
|
arenaSize := round(_MaxMem, _PageSize)
|
|
|
|
bitmapSize = arenaSize / (ptrSize * 8 / 4)
|
|
|
|
spansSize = arenaSize / _PageSize * ptrSize
|
|
|
|
spansSize = round(spansSize, _PageSize)
|
|
|
|
for i := 0; i <= 0x7f; i++ {
|
2015-04-10 20:14:43 -06:00
|
|
|
switch {
|
|
|
|
case GOARCH == "arm64" && GOOS == "darwin":
|
|
|
|
p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
|
|
|
|
case GOARCH == "arm64":
|
2015-03-08 07:20:20 -06:00
|
|
|
p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)
|
2015-04-10 20:14:43 -06:00
|
|
|
default:
|
2015-03-08 07:20:20 -06:00
|
|
|
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
|
|
|
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
|
|
|
if p != 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if p == 0 {
|
|
|
|
// On a 32-bit machine, we can't typically get away
|
|
|
|
// with a giant virtual address space reservation.
|
|
|
|
// Instead we map the memory information bitmap
|
|
|
|
// immediately after the data segment, large enough
|
|
|
|
// to handle another 2GB of mappings (256 MB),
|
|
|
|
// along with a reservation for an initial arena.
|
|
|
|
// When that gets used up, we'll start asking the kernel
|
|
|
|
// for any memory anywhere and hope it's in the 2GB
|
|
|
|
// following the bitmap (presumably the executable begins
|
|
|
|
// near the bottom of memory, so we'll have to use up
|
|
|
|
// most of memory before the kernel resorts to giving out
|
|
|
|
// memory before the beginning of the text segment).
|
|
|
|
//
|
|
|
|
// Alternatively we could reserve 512 MB bitmap, enough
|
|
|
|
// for 4GB of mappings, and then accept any memory the
|
|
|
|
// kernel threw at us, but normally that's a waste of 512 MB
|
|
|
|
// of address space, which is probably too much in a 32-bit world.
|
|
|
|
|
|
|
|
// If we fail to allocate, try again with a smaller arena.
|
|
|
|
// This is necessary on Android L where we share a process
|
|
|
|
// with ART, which reserves virtual memory aggressively.
|
|
|
|
arenaSizes := []uintptr{
|
|
|
|
512 << 20,
|
|
|
|
256 << 20,
|
2015-02-24 09:11:56 -07:00
|
|
|
128 << 20,
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, arenaSize := range arenaSizes {
|
|
|
|
bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
|
|
|
|
spansSize = _MaxArena32 / _PageSize * ptrSize
|
|
|
|
if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
|
|
|
|
bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
|
|
|
|
arenaSize = bitmapSize * 8
|
|
|
|
spansSize = arenaSize / _PageSize * ptrSize
|
|
|
|
}
|
|
|
|
spansSize = round(spansSize, _PageSize)
|
|
|
|
|
|
|
|
// SysReserve treats the address we ask for, end, as a hint,
|
|
|
|
// not as an absolute requirement. If we ask for the end
|
|
|
|
// of the data segment but the operating system requires
|
|
|
|
// a little more space before we can start allocating, it will
|
|
|
|
// give out a slightly higher pointer. Except QEMU, which
|
|
|
|
// is buggy, as usual: it won't adjust the pointer upward.
|
|
|
|
// So adjust it upward a little bit ourselves: 1/4 MB to get
|
|
|
|
// away from the running binary image and then round up
|
|
|
|
// to a MB boundary.
|
2015-04-06 18:55:02 -06:00
|
|
|
p = round(firstmoduledata.end+(1<<18), 1<<20)
|
2015-02-19 11:38:46 -07:00
|
|
|
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
|
|
|
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
|
|
|
if p != 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if p == 0 {
|
|
|
|
throw("runtime: cannot reserve arena virtual address space")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// PageSize can be larger than OS definition of page size,
|
|
|
|
// so SysReserve can give us a PageSize-unaligned pointer.
|
|
|
|
// To overcome this we ask for PageSize more and round up the pointer.
|
|
|
|
p1 := round(p, _PageSize)
|
|
|
|
|
|
|
|
mheap_.spans = (**mspan)(unsafe.Pointer(p1))
|
|
|
|
mheap_.bitmap = p1 + spansSize
|
|
|
|
mheap_.arena_start = p1 + (spansSize + bitmapSize)
|
|
|
|
mheap_.arena_used = mheap_.arena_start
|
|
|
|
mheap_.arena_end = p + pSize
|
|
|
|
mheap_.arena_reserved = reserved
|
|
|
|
|
|
|
|
if mheap_.arena_start&(_PageSize-1) != 0 {
|
|
|
|
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
|
|
|
|
throw("misrounded allocation in mallocinit")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize the rest of the allocator.
|
|
|
|
mHeap_Init(&mheap_, spansSize)
|
|
|
|
_g_ := getg()
|
|
|
|
_g_.m.mcache = allocmcache()
|
|
|
|
}
|
|
|
|
|
|
|
|
// sysReserveHigh reserves space somewhere high in the address space.
|
|
|
|
// sysReserve doesn't actually reserve the full amount requested on
|
|
|
|
// 64-bit systems, because of problems with ulimit. Instead it checks
|
|
|
|
// that it can get the first 64 kB and assumes it can grab the rest as
|
|
|
|
// needed. This doesn't work well with the "let the kernel pick an address"
|
|
|
|
// mode, so don't do that. Pick a high address instead.
|
|
|
|
func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
|
|
|
|
if ptrSize == 4 {
|
|
|
|
return sysReserve(nil, n, reserved)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i <= 0x7f; i++ {
|
|
|
|
p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
|
|
|
*reserved = false
|
|
|
|
p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
|
|
|
|
if p != 0 {
|
|
|
|
return unsafe.Pointer(p)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sysReserve(nil, n, reserved)
|
|
|
|
}
|
|
|
|
|
|
|
|
func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
|
|
|
|
if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
|
|
|
|
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
|
|
|
|
// Reserve some more space.
|
|
|
|
p_size := round(n+_PageSize, 256<<20)
|
|
|
|
new_end := h.arena_end + p_size
|
|
|
|
if new_end <= h.arena_start+_MaxArena32 {
|
|
|
|
// TODO: It would be bad if part of the arena
|
|
|
|
// is reserved and part is not.
|
|
|
|
var reserved bool
|
|
|
|
p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
|
|
|
|
if p == h.arena_end {
|
|
|
|
h.arena_end = new_end
|
|
|
|
h.arena_reserved = reserved
|
|
|
|
} else if p+p_size <= h.arena_start+_MaxArena32 {
|
|
|
|
// Keep everything page-aligned.
|
|
|
|
// Our pages are bigger than hardware pages.
|
|
|
|
h.arena_end = p + p_size
|
2015-06-22 09:18:23 -06:00
|
|
|
used := p + (-uintptr(p) & (_PageSize - 1))
|
|
|
|
mHeap_MapBits(h, used)
|
|
|
|
mHeap_MapSpans(h, used)
|
|
|
|
h.arena_used = used
|
2015-02-19 11:38:46 -07:00
|
|
|
h.arena_reserved = reserved
|
|
|
|
} else {
|
|
|
|
var stat uint64
|
|
|
|
sysFree((unsafe.Pointer)(p), p_size, &stat)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
|
|
|
|
// Keep taking from our reservation.
|
|
|
|
p := h.arena_used
|
|
|
|
sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
|
2015-06-07 20:59:29 -06:00
|
|
|
mHeap_MapBits(h, p+n)
|
|
|
|
mHeap_MapSpans(h, p+n)
|
2015-06-15 11:35:56 -06:00
|
|
|
h.arena_used = p + n
|
2015-02-19 11:38:46 -07:00
|
|
|
if raceenabled {
|
|
|
|
racemapshadow((unsafe.Pointer)(p), n)
|
|
|
|
}
|
|
|
|
|
|
|
|
if uintptr(p)&(_PageSize-1) != 0 {
|
|
|
|
throw("misrounded allocation in MHeap_SysAlloc")
|
|
|
|
}
|
|
|
|
return (unsafe.Pointer)(p)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If using 64-bit, our reservation is all we have.
|
|
|
|
if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// On 32-bit, once the reservation is gone we can
|
|
|
|
// try to get memory at a location chosen by the OS
|
|
|
|
// and hope that it is in the range we allocated bitmap for.
|
|
|
|
p_size := round(n, _PageSize) + _PageSize
|
|
|
|
p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
|
|
|
|
if p == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
|
|
|
|
print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
|
|
|
|
sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
p_end := p + p_size
|
|
|
|
p += -p & (_PageSize - 1)
|
|
|
|
if uintptr(p)+n > uintptr(h.arena_used) {
|
2015-06-07 20:59:29 -06:00
|
|
|
mHeap_MapBits(h, p+n)
|
|
|
|
mHeap_MapSpans(h, p+n)
|
2015-06-15 11:35:56 -06:00
|
|
|
h.arena_used = p + n
|
2015-02-19 11:38:46 -07:00
|
|
|
if p_end > h.arena_end {
|
|
|
|
h.arena_end = p_end
|
|
|
|
}
|
|
|
|
if raceenabled {
|
|
|
|
racemapshadow((unsafe.Pointer)(p), n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if uintptr(p)&(_PageSize-1) != 0 {
|
|
|
|
throw("misrounded allocation in MHeap_SysAlloc")
|
|
|
|
}
|
|
|
|
return (unsafe.Pointer)(p)
|
|
|
|
}
|
|
|
|
|
2014-09-16 08:22:15 -06:00
|
|
|
// base address for all 0-byte allocations
|
|
|
|
var zerobase uintptr
|
2014-07-30 10:01:52 -06:00
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
const (
|
|
|
|
// flags to malloc
|
|
|
|
_FlagNoScan = 1 << 0 // GC doesn't have to scan object
|
|
|
|
_FlagNoZero = 1 << 1 // don't zero memory
|
|
|
|
)
|
2015-01-06 12:58:49 -07:00
|
|
|
|
2014-08-05 07:03:06 -06:00
|
|
|
// Allocate an object of size bytes.
|
|
|
|
// Small objects are allocated from the per-P cache's free lists.
|
2014-07-30 10:01:52 -06:00
|
|
|
// Large objects (> 32 kB) are allocated straight from the heap.
|
2014-11-03 11:26:46 -07:00
|
|
|
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
2015-03-05 15:33:08 -07:00
|
|
|
if gcphase == _GCmarktermination {
|
|
|
|
throw("mallocgc called with gcphase == _GCmarktermination")
|
|
|
|
}
|
2015-03-08 18:56:15 -06:00
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
if size == 0 {
|
2014-09-16 08:22:15 -06:00
|
|
|
return unsafe.Pointer(&zerobase)
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
|
2014-09-08 23:08:34 -06:00
|
|
|
if flags&flagNoScan == 0 && typ == nil {
|
2014-12-27 21:58:00 -07:00
|
|
|
throw("malloc missing type")
|
2014-09-08 23:08:34 -06:00
|
|
|
}
|
|
|
|
|
2015-03-08 18:56:15 -06:00
|
|
|
if debug.sbrk != 0 {
|
|
|
|
align := uintptr(16)
|
|
|
|
if typ != nil {
|
|
|
|
align = uintptr(typ.align)
|
|
|
|
}
|
|
|
|
return persistentalloc(size, align, &memstats.other_sys)
|
|
|
|
}
|
|
|
|
|
2015-01-16 12:43:38 -07:00
|
|
|
// Set mp.mallocing to keep from being preempted by GC.
|
|
|
|
mp := acquirem()
|
|
|
|
if mp.mallocing != 0 {
|
|
|
|
throw("malloc deadlock")
|
2014-08-18 06:33:39 -06:00
|
|
|
}
|
2015-05-12 11:01:37 -06:00
|
|
|
if mp.gsignal == getg() {
|
|
|
|
throw("malloc during signal")
|
|
|
|
}
|
2015-01-16 12:43:38 -07:00
|
|
|
mp.mallocing = 1
|
2014-08-18 06:33:39 -06:00
|
|
|
|
2015-03-08 18:56:15 -06:00
|
|
|
shouldhelpgc := false
|
|
|
|
dataSize := size
|
2014-08-18 06:33:39 -06:00
|
|
|
c := gomcache()
|
2014-07-30 10:01:52 -06:00
|
|
|
var s *mspan
|
|
|
|
var x unsafe.Pointer
|
|
|
|
if size <= maxSmallSize {
|
|
|
|
if flags&flagNoScan != 0 && size < maxTinySize {
|
|
|
|
// Tiny allocator.
|
|
|
|
//
|
|
|
|
// Tiny allocator combines several tiny allocation requests
|
|
|
|
// into a single memory block. The resulting memory block
|
|
|
|
// is freed when all subobjects are unreachable. The subobjects
|
|
|
|
// must be FlagNoScan (don't have pointers), this ensures that
|
|
|
|
// the amount of potentially wasted memory is bounded.
|
|
|
|
//
|
|
|
|
// Size of the memory block used for combining (maxTinySize) is tunable.
|
|
|
|
// Current setting is 16 bytes, which relates to 2x worst case memory
|
|
|
|
// wastage (when all but one subobjects are unreachable).
|
|
|
|
// 8 bytes would result in no wastage at all, but provides less
|
|
|
|
// opportunities for combining.
|
|
|
|
// 32 bytes provides more opportunities for combining,
|
|
|
|
// but can lead to 4x worst case wastage.
|
|
|
|
// The best case winning is 8x regardless of block size.
|
|
|
|
//
|
|
|
|
// Objects obtained from tiny allocator must not be freed explicitly.
|
|
|
|
// So when an object will be freed explicitly, we ensure that
|
|
|
|
// its size >= maxTinySize.
|
|
|
|
//
|
|
|
|
// SetFinalizer has a special case for objects potentially coming
|
|
|
|
// from tiny allocator, it such case it allows to set finalizers
|
|
|
|
// for an inner byte of a memory block.
|
|
|
|
//
|
|
|
|
// The main targets of tiny allocator are small strings and
|
|
|
|
// standalone escaping variables. On a json benchmark
|
|
|
|
// the allocator reduces number of allocations by ~12% and
|
|
|
|
// reduces heap size by ~20%.
|
2015-01-14 12:13:55 -07:00
|
|
|
off := c.tinyoffset
|
|
|
|
// Align tiny pointer for required (conservative) alignment.
|
|
|
|
if size&7 == 0 {
|
|
|
|
off = round(off, 8)
|
|
|
|
} else if size&3 == 0 {
|
|
|
|
off = round(off, 4)
|
|
|
|
} else if size&1 == 0 {
|
|
|
|
off = round(off, 2)
|
|
|
|
}
|
2015-01-14 13:48:32 -07:00
|
|
|
if off+size <= maxTinySize && c.tiny != nil {
|
2015-01-14 12:13:55 -07:00
|
|
|
// The object fits into existing tiny block.
|
|
|
|
x = add(c.tiny, off)
|
|
|
|
c.tinyoffset = off + size
|
|
|
|
c.local_tinyallocs++
|
2015-01-16 12:43:38 -07:00
|
|
|
mp.mallocing = 0
|
|
|
|
releasem(mp)
|
2015-01-14 12:13:55 -07:00
|
|
|
return x
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
// Allocate a new maxTinySize block.
|
|
|
|
s = c.alloc[tinySizeClass]
|
|
|
|
v := s.freelist
|
2014-11-20 10:08:13 -07:00
|
|
|
if v.ptr() == nil {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 12:54:31 -07:00
|
|
|
systemstack(func() {
|
2014-11-11 15:05:02 -07:00
|
|
|
mCache_Refill(c, tinySizeClass)
|
|
|
|
})
|
2015-01-06 12:58:49 -07:00
|
|
|
shouldhelpgc = true
|
2014-07-30 10:01:52 -06:00
|
|
|
s = c.alloc[tinySizeClass]
|
|
|
|
v = s.freelist
|
|
|
|
}
|
2014-11-20 10:08:13 -07:00
|
|
|
s.freelist = v.ptr().next
|
2014-07-30 10:01:52 -06:00
|
|
|
s.ref++
|
2015-02-19 16:11:24 -07:00
|
|
|
// prefetchnta offers best performance, see change list message.
|
|
|
|
prefetchnta(uintptr(v.ptr().next))
|
2014-07-30 10:01:52 -06:00
|
|
|
x = unsafe.Pointer(v)
|
|
|
|
(*[2]uint64)(x)[0] = 0
|
|
|
|
(*[2]uint64)(x)[1] = 0
|
|
|
|
// See if we need to replace the existing tiny block with the new one
|
|
|
|
// based on amount of remaining free space.
|
2015-01-14 12:13:55 -07:00
|
|
|
if size < c.tinyoffset {
|
|
|
|
c.tiny = x
|
|
|
|
c.tinyoffset = size
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
size = maxTinySize
|
|
|
|
} else {
|
|
|
|
var sizeclass int8
|
|
|
|
if size <= 1024-8 {
|
|
|
|
sizeclass = size_to_class8[(size+7)>>3]
|
|
|
|
} else {
|
|
|
|
sizeclass = size_to_class128[(size-1024+127)>>7]
|
|
|
|
}
|
|
|
|
size = uintptr(class_to_size[sizeclass])
|
|
|
|
s = c.alloc[sizeclass]
|
|
|
|
v := s.freelist
|
2014-11-20 10:08:13 -07:00
|
|
|
if v.ptr() == nil {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 12:54:31 -07:00
|
|
|
systemstack(func() {
|
2014-11-11 15:05:02 -07:00
|
|
|
mCache_Refill(c, int32(sizeclass))
|
|
|
|
})
|
2015-01-06 12:58:49 -07:00
|
|
|
shouldhelpgc = true
|
2014-07-30 10:01:52 -06:00
|
|
|
s = c.alloc[sizeclass]
|
|
|
|
v = s.freelist
|
|
|
|
}
|
2014-11-20 10:08:13 -07:00
|
|
|
s.freelist = v.ptr().next
|
2014-07-30 10:01:52 -06:00
|
|
|
s.ref++
|
2015-02-19 16:11:24 -07:00
|
|
|
// prefetchnta offers best performance, see change list message.
|
|
|
|
prefetchnta(uintptr(v.ptr().next))
|
2014-07-30 10:01:52 -06:00
|
|
|
x = unsafe.Pointer(v)
|
|
|
|
if flags&flagNoZero == 0 {
|
2014-11-20 10:08:13 -07:00
|
|
|
v.ptr().next = 0
|
2014-07-30 10:01:52 -06:00
|
|
|
if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
|
|
|
|
memclr(unsafe.Pointer(v), size)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-04-13 15:06:22 -06:00
|
|
|
c.local_cachealloc += size
|
2014-07-30 10:01:52 -06:00
|
|
|
} else {
|
2014-11-11 15:05:02 -07:00
|
|
|
var s *mspan
|
2015-01-06 12:58:49 -07:00
|
|
|
shouldhelpgc = true
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 12:54:31 -07:00
|
|
|
systemstack(func() {
|
2014-11-11 15:05:02 -07:00
|
|
|
s = largeAlloc(size, uint32(flags))
|
|
|
|
})
|
2014-07-30 10:01:52 -06:00
|
|
|
x = unsafe.Pointer(uintptr(s.start << pageShift))
|
|
|
|
size = uintptr(s.elemsize)
|
|
|
|
}
|
|
|
|
|
2014-08-13 10:42:55 -06:00
|
|
|
if flags&flagNoScan != 0 {
|
2015-01-16 12:43:38 -07:00
|
|
|
// All objects are pre-marked as noscan. Nothing to do.
|
|
|
|
} else {
|
|
|
|
// If allocating a defer+arg block, now that we've picked a malloc size
|
|
|
|
// large enough to hold everything, cut the "asked for" size down to
|
|
|
|
// just the defer header, so that the GC bitmap will record the arg block
|
|
|
|
// as containing nothing at all (as if it were unused space at the end of
|
|
|
|
// a malloc block caused by size rounding).
|
|
|
|
// The defer arg areas are scanned as part of scanstack.
|
|
|
|
if typ == deferType {
|
|
|
|
dataSize = unsafe.Sizeof(_defer{})
|
2014-08-07 03:34:30 -06:00
|
|
|
}
|
2015-01-16 12:43:38 -07:00
|
|
|
heapBitsSetType(uintptr(x), size, dataSize, typ)
|
2015-05-04 14:10:49 -06:00
|
|
|
if dataSize > typ.size {
|
|
|
|
// Array allocation. If there are any
|
|
|
|
// pointers, GC has to scan to the last
|
|
|
|
// element.
|
|
|
|
if typ.ptrdata != 0 {
|
|
|
|
c.local_scan += dataSize - typ.size + typ.ptrdata
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
c.local_scan += typ.ptrdata
|
|
|
|
}
|
2015-06-15 10:30:23 -06:00
|
|
|
|
|
|
|
// Ensure that the stores above that initialize x to
|
|
|
|
// type-safe memory and set the heap bits occur before
|
|
|
|
// the caller can make x observable to the garbage
|
|
|
|
// collector. Otherwise, on weakly ordered machines,
|
|
|
|
// the garbage collector could follow a pointer to x,
|
|
|
|
// but see uninitialized memory or stale heap bits.
|
|
|
|
publicationBarrier()
|
2014-08-07 03:34:30 -06:00
|
|
|
}
|
2014-11-04 11:31:34 -07:00
|
|
|
|
|
|
|
// GCmarkterminate allocates black
|
|
|
|
// All slots hold nil so no scanning is needed.
|
|
|
|
// This may be racing with GC so do it atomically if there can be
|
|
|
|
// a race marking the bit.
|
2015-06-01 16:16:03 -06:00
|
|
|
if gcphase == _GCmarktermination || gcBlackenPromptly {
|
2014-11-15 06:00:38 -07:00
|
|
|
systemstack(func() {
|
2015-03-12 14:53:57 -06:00
|
|
|
gcmarknewobject_m(uintptr(x), size)
|
2014-11-15 06:00:38 -07:00
|
|
|
})
|
2014-11-04 11:31:34 -07:00
|
|
|
}
|
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
if raceenabled {
|
|
|
|
racemalloc(x, size)
|
|
|
|
}
|
2014-08-18 06:33:39 -06:00
|
|
|
|
2015-01-16 12:43:38 -07:00
|
|
|
mp.mallocing = 0
|
|
|
|
releasem(mp)
|
2014-08-18 06:33:39 -06:00
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
if debug.allocfreetrace != 0 {
|
|
|
|
tracealloc(x, size, typ)
|
|
|
|
}
|
2014-08-12 15:03:32 -06:00
|
|
|
|
|
|
|
if rate := MemProfileRate; rate > 0 {
|
|
|
|
if size < uintptr(rate) && int32(size) < c.next_sample {
|
|
|
|
c.next_sample -= int32(size)
|
|
|
|
} else {
|
2014-08-18 06:33:39 -06:00
|
|
|
mp := acquirem()
|
2014-08-12 15:03:32 -06:00
|
|
|
profilealloc(mp, x, size)
|
2014-08-18 06:33:39 -06:00
|
|
|
releasem(mp)
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-28 07:07:51 -06:00
|
|
|
if shouldhelpgc && shouldtriggergc() {
|
2015-02-19 13:48:40 -07:00
|
|
|
startGC(gcBackgroundMode)
|
2015-03-27 15:01:53 -06:00
|
|
|
} else if gcBlackenEnabled != 0 {
|
2015-03-16 12:22:00 -06:00
|
|
|
// Assist garbage collector. We delay this until the
|
|
|
|
// epilogue so that it doesn't interfere with the
|
|
|
|
// inner working of malloc such as mcache refills that
|
|
|
|
// might happen while doing the gcAssistAlloc.
|
|
|
|
gcAssistAlloc(size, shouldhelpgc)
|
2015-07-31 12:29:02 -06:00
|
|
|
} else if shouldhelpgc && bggc.working != 0 {
|
|
|
|
// The GC is starting up or shutting down, so we can't
|
|
|
|
// assist, but we also can't allocate unabated. Slow
|
|
|
|
// down this G's allocation and help the GC stay
|
|
|
|
// scheduled by yielding.
|
|
|
|
//
|
|
|
|
// TODO: This is a workaround. Either help the GC make
|
|
|
|
// the transition or block.
|
|
|
|
gp := getg()
|
|
|
|
if gp != gp.m.g0 && gp.m.locks == 0 && gp.m.preemptoff == "" {
|
|
|
|
Gosched()
|
|
|
|
}
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
func largeAlloc(size uintptr, flag uint32) *mspan {
|
|
|
|
// print("largeAlloc size=", size, "\n")
|
|
|
|
|
|
|
|
if size+_PageSize < size {
|
|
|
|
throw("out of memory")
|
|
|
|
}
|
|
|
|
npages := size >> _PageShift
|
|
|
|
if size&_PageMask != 0 {
|
|
|
|
npages++
|
|
|
|
}
|
|
|
|
s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
|
|
|
|
if s == nil {
|
|
|
|
throw("out of memory")
|
|
|
|
}
|
|
|
|
s.limit = uintptr(s.start)<<_PageShift + size
|
|
|
|
heapBitsForSpan(s.base()).initSpan(s.layout())
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
// implementation of new builtin
|
|
|
|
func newobject(typ *_type) unsafe.Pointer {
|
2014-11-03 11:26:46 -07:00
|
|
|
flags := uint32(0)
|
2014-07-30 10:01:52 -06:00
|
|
|
if typ.kind&kindNoPointers != 0 {
|
|
|
|
flags |= flagNoScan
|
|
|
|
}
|
2014-09-08 23:08:34 -06:00
|
|
|
return mallocgc(uintptr(typ.size), typ, flags)
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
|
2014-12-22 11:27:53 -07:00
|
|
|
//go:linkname reflect_unsafe_New reflect.unsafe_New
|
|
|
|
func reflect_unsafe_New(typ *_type) unsafe.Pointer {
|
|
|
|
return newobject(typ)
|
|
|
|
}
|
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
// implementation of make builtin for slices
|
|
|
|
func newarray(typ *_type, n uintptr) unsafe.Pointer {
|
2014-11-03 11:26:46 -07:00
|
|
|
flags := uint32(0)
|
2014-07-30 10:01:52 -06:00
|
|
|
if typ.kind&kindNoPointers != 0 {
|
|
|
|
flags |= flagNoScan
|
|
|
|
}
|
2014-11-11 15:05:02 -07:00
|
|
|
if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
|
2014-07-30 10:01:52 -06:00
|
|
|
panic("runtime: allocation size out of range")
|
|
|
|
}
|
2014-09-08 23:08:34 -06:00
|
|
|
return mallocgc(uintptr(typ.size)*n, typ, flags)
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
|
2014-12-22 11:27:53 -07:00
|
|
|
//go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
|
|
|
|
func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer {
|
|
|
|
return newarray(typ, n)
|
|
|
|
}
|
|
|
|
|
2014-07-31 13:43:40 -06:00
|
|
|
// rawmem returns a chunk of pointerless memory. It is
|
|
|
|
// not zeroed.
|
|
|
|
func rawmem(size uintptr) unsafe.Pointer {
|
2014-09-08 23:08:34 -06:00
|
|
|
return mallocgc(size, nil, flagNoScan|flagNoZero)
|
2014-07-31 13:43:40 -06:00
|
|
|
}
|
|
|
|
|
2014-07-30 10:01:52 -06:00
|
|
|
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
|
|
|
|
c := mp.mcache
|
|
|
|
rate := MemProfileRate
|
|
|
|
if size < uintptr(rate) {
|
|
|
|
// pick next profile time
|
|
|
|
// If you change this, also change allocmcache.
|
|
|
|
if rate > 0x3fffffff { // make 2*rate not overflow
|
|
|
|
rate = 0x3fffffff
|
|
|
|
}
|
2014-09-02 15:33:33 -06:00
|
|
|
next := int32(fastrand1()) % (2 * int32(rate))
|
2014-07-30 10:01:52 -06:00
|
|
|
// Subtract the "remainder" of the current allocation.
|
|
|
|
// Otherwise objects that are close in size to sampling rate
|
|
|
|
// will be under-sampled, because we consistently discard this remainder.
|
|
|
|
next -= (int32(size) - c.next_sample)
|
|
|
|
if next < 0 {
|
|
|
|
next = 0
|
|
|
|
}
|
|
|
|
c.next_sample = next
|
|
|
|
}
|
2014-09-01 16:51:12 -06:00
|
|
|
|
|
|
|
mProf_Malloc(x, size)
|
2014-07-30 10:01:52 -06:00
|
|
|
}
|
|
|
|
|
2015-03-08 18:56:15 -06:00
|
|
|
type persistentAlloc struct {
|
2015-01-14 12:13:55 -07:00
|
|
|
base unsafe.Pointer
|
|
|
|
off uintptr
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
|
|
|
|
2015-03-08 18:56:15 -06:00
|
|
|
var globalAlloc struct {
|
|
|
|
mutex
|
|
|
|
persistentAlloc
|
|
|
|
}
|
|
|
|
|
2014-09-03 22:54:06 -06:00
|
|
|
// Wrapper around sysAlloc that can allocate small chunks.
|
|
|
|
// There is no associated free operation.
|
|
|
|
// Intended for things like function/type/debug-related persistent data.
|
|
|
|
// If align is 0, uses default align (currently 8).
|
2015-04-16 15:32:18 -06:00
|
|
|
func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
|
2015-06-07 19:45:39 -06:00
|
|
|
var p unsafe.Pointer
|
|
|
|
systemstack(func() {
|
|
|
|
p = persistentalloc1(size, align, sysStat)
|
|
|
|
})
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
|
|
|
// Must run on system stack because stack growth can (re)invoke it.
|
|
|
|
// See issue 9174.
|
|
|
|
//go:systemstack
|
|
|
|
func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
|
2014-09-03 22:54:06 -06:00
|
|
|
const (
|
|
|
|
chunk = 256 << 10
|
|
|
|
maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
|
|
|
|
)
|
|
|
|
|
2015-01-14 12:13:55 -07:00
|
|
|
if size == 0 {
|
|
|
|
throw("persistentalloc: size == 0")
|
|
|
|
}
|
2014-09-03 22:54:06 -06:00
|
|
|
if align != 0 {
|
|
|
|
if align&(align-1) != 0 {
|
2014-12-27 21:58:00 -07:00
|
|
|
throw("persistentalloc: align is not a power of 2")
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
|
|
|
if align > _PageSize {
|
2014-12-27 21:58:00 -07:00
|
|
|
throw("persistentalloc: align is too large")
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
align = 8
|
|
|
|
}
|
|
|
|
|
|
|
|
if size >= maxBlock {
|
2015-04-16 15:32:18 -06:00
|
|
|
return sysAlloc(size, sysStat)
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
|
|
|
|
2015-03-08 18:56:15 -06:00
|
|
|
mp := acquirem()
|
|
|
|
var persistent *persistentAlloc
|
2015-04-16 22:21:30 -06:00
|
|
|
if mp != nil && mp.p != 0 {
|
|
|
|
persistent = &mp.p.ptr().palloc
|
2015-03-08 18:56:15 -06:00
|
|
|
} else {
|
|
|
|
lock(&globalAlloc.mutex)
|
|
|
|
persistent = &globalAlloc.persistentAlloc
|
|
|
|
}
|
2015-01-14 12:13:55 -07:00
|
|
|
persistent.off = round(persistent.off, align)
|
2015-01-14 13:48:32 -07:00
|
|
|
if persistent.off+size > chunk || persistent.base == nil {
|
2015-01-14 12:13:55 -07:00
|
|
|
persistent.base = sysAlloc(chunk, &memstats.other_sys)
|
|
|
|
if persistent.base == nil {
|
2015-03-08 18:56:15 -06:00
|
|
|
if persistent == &globalAlloc.persistentAlloc {
|
|
|
|
unlock(&globalAlloc.mutex)
|
|
|
|
}
|
2014-12-27 21:58:00 -07:00
|
|
|
throw("runtime: cannot allocate memory")
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
2015-01-14 12:13:55 -07:00
|
|
|
persistent.off = 0
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
2015-01-14 12:13:55 -07:00
|
|
|
p := add(persistent.base, persistent.off)
|
|
|
|
persistent.off += size
|
2015-03-08 18:56:15 -06:00
|
|
|
releasem(mp)
|
|
|
|
if persistent == &globalAlloc.persistentAlloc {
|
|
|
|
unlock(&globalAlloc.mutex)
|
|
|
|
}
|
2014-09-03 22:54:06 -06:00
|
|
|
|
2015-04-16 15:32:18 -06:00
|
|
|
if sysStat != &memstats.other_sys {
|
|
|
|
mSysStatInc(sysStat, size)
|
|
|
|
mSysStatDec(&memstats.other_sys, size)
|
2014-09-03 22:54:06 -06:00
|
|
|
}
|
|
|
|
return p
|
|
|
|
}
|