// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package runtime import ( "unsafe" ) const ( debugMalloc = false flagNoScan = 1 << 0 // GC doesn't have to scan object flagNoZero = 1 << 1 // don't zero memory kindArray = 17 kindFunc = 19 kindInterface = 20 kindPtr = 22 kindStruct = 25 kindMask = 1<<6 - 1 kindGCProg = 1 << 6 kindNoPointers = 1 << 7 maxTinySize = 16 tinySizeClass = 2 maxSmallSize = 32 << 10 pageShift = 13 pageSize = 1 << pageShift pageMask = pageSize - 1 wordsPerBitmapWord = ptrSize * 8 / 4 gcBits = 4 bitsPerPointer = 2 bitsMask = 1< 32 kB) are allocated straight from the heap. func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { if size == 0 { return unsafe.Pointer(&zeroObject) } mp := acquirem() if mp.mallocing != 0 { gothrow("malloc/free - deadlock") } mp.mallocing = 1 size0 := size c := mp.mcache var s *mspan var x unsafe.Pointer if size <= maxSmallSize { if flags&flagNoScan != 0 && size < maxTinySize { // Tiny allocator. // // Tiny allocator combines several tiny allocation requests // into a single memory block. The resulting memory block // is freed when all subobjects are unreachable. The subobjects // must be FlagNoScan (don't have pointers), this ensures that // the amount of potentially wasted memory is bounded. // // Size of the memory block used for combining (maxTinySize) is tunable. // Current setting is 16 bytes, which relates to 2x worst case memory // wastage (when all but one subobjects are unreachable). // 8 bytes would result in no wastage at all, but provides less // opportunities for combining. // 32 bytes provides more opportunities for combining, // but can lead to 4x worst case wastage. // The best case winning is 8x regardless of block size. // // Objects obtained from tiny allocator must not be freed explicitly. // So when an object will be freed explicitly, we ensure that // its size >= maxTinySize. // // SetFinalizer has a special case for objects potentially coming // from tiny allocator, it such case it allows to set finalizers // for an inner byte of a memory block. // // The main targets of tiny allocator are small strings and // standalone escaping variables. On a json benchmark // the allocator reduces number of allocations by ~12% and // reduces heap size by ~20%. tinysize := uintptr(c.tinysize) if size <= tinysize { tiny := unsafe.Pointer(c.tiny) // Align tiny pointer for required (conservative) alignment. if size&7 == 0 { tiny = roundup(tiny, 8) } else if size&3 == 0 { tiny = roundup(tiny, 4) } else if size&1 == 0 { tiny = roundup(tiny, 2) } size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny))) if size1 <= tinysize { // The object fits into existing tiny block. x = tiny c.tiny = (*byte)(add(x, size)) c.tinysize -= uint(size1) mp.mallocing = 0 releasem(mp) return x } } // Allocate a new maxTinySize block. s = c.alloc[tinySizeClass] v := s.freelist if v == nil { mp.scalararg[0] = tinySizeClass onM(&mcacheRefill_m) s = c.alloc[tinySizeClass] v = s.freelist } s.freelist = v.next s.ref++ //TODO: prefetch v.next x = unsafe.Pointer(v) (*[2]uint64)(x)[0] = 0 (*[2]uint64)(x)[1] = 0 // See if we need to replace the existing tiny block with the new one // based on amount of remaining free space. if maxTinySize-size > tinysize { c.tiny = (*byte)(add(x, size)) c.tinysize = uint(maxTinySize - size) } size = maxTinySize } else { var sizeclass int8 if size <= 1024-8 { sizeclass = size_to_class8[(size+7)>>3] } else { sizeclass = size_to_class128[(size-1024+127)>>7] } size = uintptr(class_to_size[sizeclass]) s = c.alloc[sizeclass] v := s.freelist if v == nil { mp.scalararg[0] = uint(sizeclass) onM(&mcacheRefill_m) s = c.alloc[sizeclass] v = s.freelist } s.freelist = v.next s.ref++ //TODO: prefetch x = unsafe.Pointer(v) if flags&flagNoZero == 0 { v.next = nil if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 { memclr(unsafe.Pointer(v), size) } } } c.local_cachealloc += int(size) } else { mp.scalararg[0] = uint(size) mp.scalararg[1] = uint(flags) onM(&largeAlloc_m) s = (*mspan)(mp.ptrarg[0]) mp.ptrarg[0] = nil x = unsafe.Pointer(uintptr(s.start << pageShift)) size = uintptr(s.elemsize) } // From here till marked label marking the object as allocated // and storing type info in the GC bitmap. arena_start := uintptr(unsafe.Pointer(mheap_.arena_start)) off := (uintptr(x) - arena_start) / ptrSize xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) shift := (off % wordsPerBitmapWord) * gcBits if debugMalloc && (((*xbits)>>shift)&bitMask) != bitBoundary { gothrow("bad bits in markallocated") } var ti, te uintptr var ptrmask *uint8 if flags&flagNoScan != 0 { // bitsDead in the first quadruple means don't scan. if size == ptrSize { *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated + (bitsDead << 2)) << shift) } else { xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbitsb = bitAllocated + (bitsDead << 2) } goto marked } if size == ptrSize { // It's one word and it has pointers, it must be a pointer. *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated | (bitsPointer << 2)) << shift) goto marked } if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize { if typ.kind&kindGCProg != 0 { nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize masksize := nptr if masksize%2 != 0 { masksize *= 2 // repeated } masksize = masksize * pointersPerByte / 8 // 4 bits per word masksize++ // unroll flag in the beginning if masksize > maxGCMask && typ.gc[1] != 0 { // If the mask is too large, unroll the program directly // into the GC bitmap. It's 7 times slower than copying // from the pre-unrolled mask, but saves 1/16 of type size // memory for the mask. mp.ptrarg[0] = x mp.ptrarg[1] = unsafe.Pointer(typ) mp.scalararg[0] = uint(size) mp.scalararg[1] = uint(size0) onM(&unrollgcproginplace_m) goto marked } ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) // Check whether the program is already unrolled. if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 { mp.ptrarg[0] = unsafe.Pointer(typ) onM(&unrollgcprog_m) } ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte } else { ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask } if size == 2*ptrSize { xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbitsb = *ptrmask | bitAllocated goto marked } te = uintptr(typ.size) / ptrSize // If the type occupies odd number of words, its mask is repeated. if te%2 == 0 { te /= 2 } } if size == 2*ptrSize { xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitAllocated goto marked } // Copy pointer bitmask into the bitmap. for i := uintptr(0); i < size0; i += 2 * ptrSize { v := uint8((bitsPointer << 2) | (bitsPointer << 6)) if ptrmask != nil { v = *(*uint8)(add(unsafe.Pointer(ptrmask), ti)) ti++ if ti == te { ti = 0 } } if i == 0 { v |= bitAllocated } if i+ptrSize == size0 { v &= ^uint8(bitPtrMask << 4) } off := (uintptr(x) + i - arena_start) / ptrSize xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) shift := (off % wordsPerBitmapWord) * gcBits xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbitsb = v } if size0%(2*ptrSize) == 0 && size0 < size { // Mark the word after last object's word as bitsDead. off := (uintptr(x) + size0 - arena_start) / ptrSize xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) shift := (off % wordsPerBitmapWord) * gcBits xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbitsb = bitsDead << 2 } marked: mp.mallocing = 0 if raceenabled { racemalloc(x, size) } if debug.allocfreetrace != 0 { tracealloc(x, size, typ) } if rate := MemProfileRate; rate > 0 { if size < uintptr(rate) && int32(size) < c.next_sample { c.next_sample -= int32(size) } else { profilealloc(mp, x, size) } } releasem(mp) if memstats.heap_alloc >= memstats.next_gc { gogc(0) } return x } // cmallocgc is a trampoline used to call the Go malloc from C. func cmallocgc(size uintptr, typ *_type, flags int, ret *unsafe.Pointer) { *ret = gomallocgc(size, typ, flags) } // implementation of new builtin func newobject(typ *_type) unsafe.Pointer { flags := 0 if typ.kind&kindNoPointers != 0 { flags |= flagNoScan } return gomallocgc(uintptr(typ.size), typ, flags) } // implementation of make builtin for slices func newarray(typ *_type, n uintptr) unsafe.Pointer { flags := 0 if typ.kind&kindNoPointers != 0 { flags |= flagNoScan } if int(n) < 0 || (typ.size > 0 && n > maxMem/uintptr(typ.size)) { panic("runtime: allocation size out of range") } return gomallocgc(uintptr(typ.size)*n, typ, flags) } // rawmem returns a chunk of pointerless memory. It is // not zeroed. func rawmem(size uintptr) unsafe.Pointer { return gomallocgc(size, nil, flagNoScan|flagNoZero) } // round size up to next size class func goroundupsize(size uintptr) uintptr { if size < maxSmallSize { if size <= 1024-8 { return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) } return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) } if size+pageSize < size { return size } return (size + pageSize - 1) &^ pageMask } func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { c := mp.mcache rate := MemProfileRate if size < uintptr(rate) { // pick next profile time // If you change this, also change allocmcache. if rate > 0x3fffffff { // make 2*rate not overflow rate = 0x3fffffff } next := int32(fastrand2()) % (2 * int32(rate)) // Subtract the "remainder" of the current allocation. // Otherwise objects that are close in size to sampling rate // will be under-sampled, because we consistently discard this remainder. next -= (int32(size) - c.next_sample) if next < 0 { next = 0 } c.next_sample = next } mp.scalararg[0] = uint(size) mp.ptrarg[0] = x onM(&mprofMalloc_m) } // force = 1 - do GC regardless of current heap usage // force = 2 - go GC and eager sweep func gogc(force int32) { if memstats.enablegc == 0 { return } // TODO: should never happen? Only C calls malloc while holding a lock? mp := acquirem() if mp.locks > 1 { releasem(mp) return } releasem(mp) if panicking != 0 { return } if gcpercent == gcpercentUnknown { golock(&mheap_.lock) if gcpercent == gcpercentUnknown { gcpercent = goreadgogc() } gounlock(&mheap_.lock) } if gcpercent < 0 { return } semacquire(&worldsema, false) if force == 0 && memstats.heap_alloc < memstats.next_gc { // typically threads which lost the race to grab // worldsema exit here when gc is done. semrelease(&worldsema) return } // Ok, we're doing it! Stop everybody else startTime := gonanotime() mp = acquirem() mp.gcing = 1 stoptheworld() clearpools() // Run gc on the g0 stack. We do this so that the g stack // we're currently running on will no longer change. Cuts // the root set down a bit (g0 stacks are not scanned, and // we don't need to scan gc's internal state). We also // need to switch to g0 so we can shrink the stack. n := 1 if debug.gctrace > 1 { n = 2 } for i := 0; i < n; i++ { if i > 0 { startTime = gonanotime() } // switch to g0, call gc, then switch back mp.scalararg[0] = uint(startTime) if force >= 2 { mp.scalararg[1] = 1 // eagersweep } else { mp.scalararg[1] = 0 } onM(&gc_m) } // all done mp.gcing = 0 semrelease(&worldsema) starttheworld() releasem(mp) // now that gc is done, kick off finalizer thread if needed if !concurrentSweep { // give the queued finalizers, if any, a chance to run gosched() } } // GC runs a garbage collection. func GC() { gogc(2) } // SetFinalizer sets the finalizer associated with x to f. // When the garbage collector finds an unreachable block // with an associated finalizer, it clears the association and runs // f(x) in a separate goroutine. This makes x reachable again, but // now without an associated finalizer. Assuming that SetFinalizer // is not called again, the next time the garbage collector sees // that x is unreachable, it will free x. // // SetFinalizer(x, nil) clears any finalizer associated with x. // // The argument x must be a pointer to an object allocated by // calling new or by taking the address of a composite literal. // The argument f must be a function that takes a single argument // to which x's type can be assigned, and can have arbitrary ignored return // values. If either of these is not true, SetFinalizer aborts the // program. // // Finalizers are run in dependency order: if A points at B, both have // finalizers, and they are otherwise unreachable, only the finalizer // for A runs; once A is freed, the finalizer for B can run. // If a cyclic structure includes a block with a finalizer, that // cycle is not guaranteed to be garbage collected and the finalizer // is not guaranteed to run, because there is no ordering that // respects the dependencies. // // The finalizer for x is scheduled to run at some arbitrary time after // x becomes unreachable. // There is no guarantee that finalizers will run before a program exits, // so typically they are useful only for releasing non-memory resources // associated with an object during a long-running program. // For example, an os.File object could use a finalizer to close the // associated operating system file descriptor when a program discards // an os.File without calling Close, but it would be a mistake // to depend on a finalizer to flush an in-memory I/O buffer such as a // bufio.Writer, because the buffer would not be flushed at program exit. // // It is not guaranteed that a finalizer will run if the size of *x is // zero bytes. // // A single goroutine runs all finalizers for a program, sequentially. // If a finalizer must run for a long time, it should do so by starting // a new goroutine. func SetFinalizer(obj interface{}, finalizer interface{}) { // We do just enough work here to make the mcall type safe. // The rest is done on the M stack. e := (*eface)(unsafe.Pointer(&obj)) typ := e._type if typ == nil { gothrow("runtime.SetFinalizer: first argument is nil") } if typ.kind&kindMask != kindPtr { gothrow("runtime.SetFinalizer: first argument is " + *typ._string + ", not pointer") } f := (*eface)(unsafe.Pointer(&finalizer)) ftyp := f._type if ftyp != nil && ftyp.kind&kindMask != kindFunc { gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function") } mp := acquirem() mp.ptrarg[0] = unsafe.Pointer(typ) mp.ptrarg[1] = e.data mp.ptrarg[2] = unsafe.Pointer(ftyp) mp.ptrarg[3] = f.data onM(&setFinalizer_m) releasem(mp) }