1
0
mirror of https://github.com/golang/go synced 2024-11-22 19:54:39 -07:00

runtime: make alloc headers footers instead

The previous CL in this series (CL 437955) adds the allocation headers
experiment. However, this experiment puts the headers at the beginning
of each allocation, which decreases the default allocator alignment that
users can rely upon. Historically, Go's memory allocator has implicitly
provided 16-byte alignment (except for sizes where it doesn't make
sense, like 8 or 24 bytes), so it's not unlikely that users are
depending on it. It also complicates other changes that want higher
alignment. For example, the sync/atomic.Uint64Pair proposal would
(hypothetically; it's not yet accepted) introduce a type with 16-byte
alignment. The malloc fast path will require extra code to consider
alignment and will waste memory for any value containing such a type.

This change moves the allocation header to the end of the span's
allocation slot instead of the beginning. This means worse locality for
the GC when scanning, but it's still an overall win. It also means that
objects will still have the 16-byte alignment we've provided thus far.

This is broken out in a separate change just becauase it ended up that
way during development. But I've chosen to leave it this way in case we
want to try and move allocation headers to the front of objects again.

Below are the benchmark results of this CL series, comparing the
performance of this CL with GOEXPERIMENT=allocheaders vs. without this
CL series.

name                  old time/op            new time/op            delta
BiogoIgor                        12.5s ± 0%             12.4s ± 2%    ~     (p=0.079 n=9+10)
BiogoKrishna                     12.8s ±10%             12.4s ±10%    ~     (p=0.182 n=9+10)
BleveIndexBatch100               4.54s ± 3%             4.60s ± 3%    ~     (p=0.050 n=9+9)
EtcdPut                         21.1ms ± 2%            21.3ms ± 4%    ~     (p=0.669 n=7+10)
EtcdSTM                          107ms ± 3%             108ms ± 2%    ~     (p=0.497 n=9+10)
GoBuildKubelet                   34.1s ± 3%             33.1s ± 2%  -3.08%  (p=0.000 n=10+10)
GoBuildKubeletLink               7.94s ± 2%             7.95s ± 2%    ~     (p=0.631 n=10+10)
GoBuildIstioctl                  33.2s ± 1%             31.7s ± 0%  -4.37%  (p=0.000 n=9+9)
GoBuildIstioctlLink              8.07s ± 1%             8.05s ± 1%    ~     (p=0.356 n=9+10)
GoBuildFrontend                  12.1s ± 0%             11.5s ± 1%  -4.43%  (p=0.000 n=8+10)
GoBuildFrontendLink              1.20s ± 2%             1.20s ± 2%    ~     (p=0.905 n=9+10)
GopherLuaKNucleotide             19.9s ± 0%             19.5s ± 1%  -1.95%  (p=0.000 n=9+10)
MarkdownRenderXHTML              194ms ± 5%             194ms ± 2%    ~     (p=0.931 n=9+9)
Tile38QueryLoad                  518µs ± 1%             508µs ± 1%  -1.93%  (p=0.000 n=9+8)

name                  old average-RSS-bytes  new average-RSS-bytes  delta
BiogoIgor                       66.2MB ± 3%            65.6MB ± 1%    ~     (p=0.156 n=10+9)
BiogoKrishna                    4.34GB ± 2%            4.34GB ± 1%    ~     (p=0.315 n=10+9)
BleveIndexBatch100               189MB ± 3%             186MB ± 3%    ~     (p=0.052 n=10+10)
EtcdPut                          105MB ± 5%             107MB ± 6%    ~     (p=0.579 n=10+10)
EtcdSTM                         92.1MB ± 5%            93.2MB ± 4%    ~     (p=0.353 n=10+10)
GoBuildKubelet                  2.07GB ± 1%            2.07GB ± 1%    ~     (p=0.436 n=10+10)
GoBuildIstioctl                 1.44GB ± 1%            1.46GB ± 1%  +0.96%  (p=0.001 n=10+10)
GoBuildFrontend                  522MB ± 1%             512MB ± 2%  -1.98%  (p=0.000 n=10+10)
GopherLuaKNucleotide            37.4MB ± 5%            36.4MB ± 4%  -2.53%  (p=0.035 n=10+10)
MarkdownRenderXHTML             21.2MB ± 1%            20.9MB ± 3%  -1.53%  (p=0.003 n=8+10)
Tile38QueryLoad                 6.39GB ± 2%            6.24GB ± 2%  -2.40%  (p=0.000 n=10+10)

name                  old peak-RSS-bytes     new peak-RSS-bytes     delta
BiogoIgor                       88.5MB ± 4%            88.4MB ± 3%    ~     (p=0.971 n=10+10)
BiogoKrishna                    4.48GB ± 0%            4.42GB ± 0%  -1.49%  (p=0.000 n=10+10)
BleveIndexBatch100               268MB ± 3%             265MB ± 4%    ~     (p=0.315 n=9+10)
EtcdPut                          147MB ± 9%             146MB ± 5%    ~     (p=0.853 n=10+10)
EtcdSTM                          119MB ± 6%             120MB ± 5%    ~     (p=0.796 n=10+10)
GopherLuaKNucleotide            43.1MB ±17%            40.7MB ±12%    ~     (p=0.075 n=10+10)
MarkdownRenderXHTML             21.2MB ± 1%            21.1MB ± 3%    ~     (p=0.511 n=9+10)
Tile38QueryLoad                 6.65GB ± 4%            6.52GB ± 2%  -1.93%  (p=0.009 n=10+10)

name                  old peak-VM-bytes      new peak-VM-bytes      delta
BiogoIgor                       1.33GB ± 0%            1.33GB ± 0%  -0.16%  (p=0.000 n=10+10)
BiogoKrishna                    5.77GB ± 0%            5.69GB ± 0%  -1.23%  (p=0.000 n=10+10)
BleveIndexBatch100              2.62GB ± 0%            2.61GB ± 0%  -0.13%  (p=0.000 n=7+10)
EtcdPut                         12.1GB ± 0%            12.1GB ± 0%    ~     (p=0.160 n=8+10)
EtcdSTM                         12.1GB ± 0%            12.1GB ± 0%  -0.02%  (p=0.000 n=10+10)
GopherLuaKNucleotide            1.26GB ± 0%            1.26GB ± 0%  -0.09%  (p=0.000 n=10+10)
MarkdownRenderXHTML             1.26GB ± 0%            1.26GB ± 0%  -0.08%  (p=0.000 n=10+10)
Tile38QueryLoad                 7.89GB ± 4%            7.76GB ± 1%  -1.70%  (p=0.008 n=10+8)

name                  old p50-latency-ns     new p50-latency-ns     delta
EtcdPut                          20.1M ± 5%             20.2M ± 4%    ~     (p=0.529 n=10+10)
EtcdSTM                          79.8M ± 4%             79.9M ± 4%    ~     (p=0.971 n=10+10)
Tile38QueryLoad                   215k ± 1%              210k ± 3%  -2.04%  (p=0.021 n=8+10)

name                  old p90-latency-ns     new p90-latency-ns     delta
EtcdPut                          31.9M ± 6%             32.0M ± 7%    ~     (p=0.780 n=9+10)
EtcdSTM                           220M ± 6%              220M ± 2%    ~     (p=1.000 n=10+10)
Tile38QueryLoad                   622k ± 2%              646k ± 2%  +3.83%  (p=0.000 n=10+10)

name                  old p99-latency-ns     new p99-latency-ns     delta
EtcdPut                          47.6M ±32%             51.4M ±28%    ~     (p=0.529 n=10+10)
EtcdSTM                           452M ± 2%              457M ± 2%    ~     (p=0.182 n=9+10)
Tile38QueryLoad                  5.04M ± 2%             4.91M ± 3%  -2.56%  (p=0.001 n=9+9)

name                  old ops/s              new ops/s              delta
EtcdPut                          46.1k ± 2%             45.7k ± 4%    ~     (p=0.475 n=7+10)
EtcdSTM                          9.18k ± 5%             9.20k ± 3%    ~     (p=0.971 n=10+10)
Tile38QueryLoad                  17.4k ± 1%             17.7k ± 1%  +1.97%  (p=0.000 n=9+8)

Change-Id: I637f48fb9e8c181912db785ae9186d7f16769870
Reviewed-on: https://go-review.googlesource.com/c/go/+/537886
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Michael Anthony Knyszek 2023-10-31 04:50:26 +00:00 committed by Michael Knyszek
parent 38ac7c41aa
commit 1e250a2199
3 changed files with 7 additions and 15 deletions

View File

@ -1149,8 +1149,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
memclrNoHeapPointers(x, size)
}
if goexperiment.AllocHeaders && hasHeader {
header = (**_type)(x)
x = add(x, mallocHeaderSize)
header = (**_type)(unsafe.Pointer(uintptr(v) + size - mallocHeaderSize))
size -= mallocHeaderSize
}
}

View File

@ -48,9 +48,9 @@
// is zeroed, so the GC just observes nil pointers.
// Note that this "tiled" bitmap isn't stored anywhere; it is generated on-the-fly.
//
// For objects without their own span, the type metadata is stored in the first
// word before the object at the beginning of the allocation slot. For objects
// with their own span, the type metadata is stored in the mspan.
// For objects without their own span, the type metadata is stored in the last
// word of the allocation slot. For objects with their own span, the type metadata
// is stored in the mspan.
//
// The bitmap for small unallocated objects in scannable spans is not maintained
// (can be junk).
@ -191,9 +191,8 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers {
// All of these objects have a header.
var typ *_type
if spc.sizeclass() != 0 {
// Pull the allocation header from the first word of the object.
typ = *(**_type)(unsafe.Pointer(addr))
addr += mallocHeaderSize
// Pull the allocation header from the last word of the object.
typ = *(**_type)(unsafe.Pointer(addr + span.elemsize - mallocHeaderSize))
} else {
typ = span.largeType
}

View File

@ -9,7 +9,6 @@ package runtime
import (
"internal/abi"
"internal/goarch"
"internal/goexperiment"
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
@ -411,7 +410,7 @@ func SetFinalizer(obj any, finalizer any) {
}
// find the containing object
base, span, _ := findObject(uintptr(e.data), 0, 0)
base, _, _ := findObject(uintptr(e.data), 0, 0)
if base == 0 {
if isGoPointerWithoutSpan(e.data) {
@ -420,11 +419,6 @@ func SetFinalizer(obj any, finalizer any) {
throw("runtime.SetFinalizer: pointer not in allocated block")
}
// Move base forward if we've got an allocation header.
if goexperiment.AllocHeaders && !span.spanclass.noscan() && !heapBitsInSpan(span.elemsize) && span.spanclass.sizeclass() != 0 {
base += mallocHeaderSize
}
if uintptr(e.data) != base {
// As an implementation detail we allow to set finalizers for an inner byte
// of an object if it could come from tiny alloc (see mallocgc for details).