1
0
mirror of https://github.com/golang/go synced 2024-11-26 09:08:07 -07:00

debug/buildinfo: read data in chunks

Rather than reading the entire data segment into memory, read it in
smaller chunks to keep memory usage low.

For typically Go binaries, this doesn't matter much. For those, we read
the .go.buildinfo section, which should be quite small. But for non-Go
binaries (or Go binaries with section headers stripped), we search the
entire loadable data segment, which could be quite large.

This reduces the time for `go version` on a 2.5GB non-Go binary from
~1.2s and 1GB RSS (!!) to ~1s and ~25MB RSS.

Fixes #68592.

Change-Id: I9218854c5b6f2aa1331f561ab0850a9fd62ef23b
Reviewed-on: https://go-review.googlesource.com/c/go/+/601459
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Michael Pratt 2024-07-26 13:41:12 -04:00
parent 8246171bae
commit b6e81a5129
2 changed files with 258 additions and 26 deletions

View File

@ -57,6 +57,11 @@ var errNotGoExe = errors.New("not a Go executable")
// fields.
var buildInfoMagic = []byte("\xff Go buildinf:")
const (
buildInfoAlign = 16
buildInfoHeaderSize = 32
)
// ReadFile returns build information embedded in a Go binary
// file at the given path. Most information is only available for binaries built
// with module support.
@ -165,14 +170,19 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
if dataSize == 0 {
return "", "", errNotGoExe
}
data, err := x.ReadData(dataAddr, dataSize)
addr, err := searchMagic(x, dataAddr, dataSize)
if err != nil {
return "", "", err
}
const (
buildInfoAlign = 16
buildInfoHeaderSize = 32
// Read in the full header first.
header, err := x.ReadData(addr, buildInfoHeaderSize)
if err != nil {
return "", "", err
}
const (
ptrSizeOffset = 14
flagsOffset = 15
versPtrOffset = 16
@ -185,17 +195,6 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
flagsVersionPtr = 0x0
flagsVersionInl = 0x2
)
for {
i := bytes.Index(data, buildInfoMagic)
if i < 0 || len(data)-i < buildInfoHeaderSize {
return "", "", errNotGoExe
}
if i%buildInfoAlign == 0 && len(data)-i >= buildInfoHeaderSize {
data = data[i:]
break
}
data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):]
}
// Decode the blob. The blob is a 32-byte header, optionally followed
// by 2 varint-prefixed string contents.
@ -220,13 +219,19 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
// the header is followed by the string contents inline as
// length-prefixed (as varint) string contents. First is the version
// string, followed immediately by the modinfo string.
flags := data[flagsOffset]
flags := header[flagsOffset]
if flags&flagsVersionMask == flagsVersionInl {
vers, data = decodeString(data[buildInfoHeaderSize:])
mod, data = decodeString(data)
vers, addr, err = decodeString(x, addr+buildInfoHeaderSize)
if err != nil {
return "", "", err
}
mod, _, err = decodeString(x, addr)
if err != nil {
return "", "", err
}
} else {
// flagsVersionPtr (<1.18)
ptrSize := int(data[ptrSizeOffset])
ptrSize := int(header[ptrSizeOffset])
bigEndian := flags&flagsEndianMask == flagsEndianBig
var bo binary.ByteOrder
if bigEndian {
@ -242,8 +247,8 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
} else {
return "", "", errNotGoExe
}
vers = readString(x, ptrSize, readPtr, readPtr(data[versPtrOffset:]))
mod = readString(x, ptrSize, readPtr, readPtr(data[versPtrOffset+ptrSize:]))
vers = readString(x, ptrSize, readPtr, readPtr(header[versPtrOffset:]))
mod = readString(x, ptrSize, readPtr, readPtr(header[versPtrOffset+ptrSize:]))
}
if vers == "" {
return "", "", errNotGoExe
@ -270,12 +275,33 @@ func hasPlan9Magic(magic []byte) bool {
return false
}
func decodeString(data []byte) (s string, rest []byte) {
u, n := binary.Uvarint(data)
if n <= 0 || u > uint64(len(data)-n) {
return "", nil
func decodeString(x exe, addr uint64) (string, uint64, error) {
// varint length followed by length bytes of data.
// N.B. ReadData reads _up to_ size bytes from the section containing
// addr. So we don't need to check that size doesn't overflow the
// section.
b, err := x.ReadData(addr, binary.MaxVarintLen64)
if err != nil {
return "", 0, err
}
return string(data[n : uint64(n)+u]), data[uint64(n)+u:]
length, n := binary.Uvarint(b)
if n <= 0 {
return "", 0, errNotGoExe
}
addr += uint64(n)
b, err = x.ReadData(addr, length)
if err != nil {
return "", 0, err
}
if uint64(len(b)) < length {
// Section ended before we could read the full string.
return "", 0, errNotGoExe
}
return string(b), addr + length, nil
}
// readString returns the string at address addr in the executable x.
@ -293,6 +319,64 @@ func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) st
return string(data)
}
const searchChunkSize = 1 << 20 // 1 MB
// searchMagic returns the aligned first instance of buildInfoMagic in the data
// range [addr, addr+size). Returns false if not found.
func searchMagic(x exe, start, size uint64) (uint64, error) {
end := start + size
if end < start {
// Overflow.
return 0, errUnrecognizedFormat
}
// Round up start; magic can't occur in the initial unaligned portion.
start = (start + buildInfoAlign - 1) &^ (buildInfoAlign - 1)
if start >= end {
return 0, errNotGoExe
}
for start < end {
// Read in chunks to avoid consuming too much memory if data is large.
//
// Normally it would be somewhat painful to handle the magic crossing a
// chunk boundary, but since it must be 16-byte aligned we know it will
// fall within a single chunk.
remaining := end - start
chunkSize := uint64(searchChunkSize)
if chunkSize > remaining {
chunkSize = remaining
}
data, err := x.ReadData(start, chunkSize)
if err != nil {
return 0, err
}
for len(data) > 0 {
i := bytes.Index(data, buildInfoMagic)
if i < 0 {
break
}
if remaining-uint64(i) < buildInfoHeaderSize {
// Found magic, but not enough space left for the full header.
return 0, errNotGoExe
}
if i%buildInfoAlign != 0 {
// Found magic, but misaligned. Keep searching.
data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):]
continue
}
// Good match!
return start + uint64(i), nil
}
start += chunkSize
}
return 0, errNotGoExe
}
// elfExe is the ELF implementation of the exe interface.
type elfExe struct {
f *elf.File

View File

@ -0,0 +1,148 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package buildinfo
import (
"fmt"
"testing"
)
type byteExe struct {
b []byte
}
func (x *byteExe) ReadData(addr, size uint64) ([]byte, error) {
end := addr + size
if end < addr {
return nil, fmt.Errorf("ReadData(%d, %d) overflow", addr, size)
}
if addr >= uint64(len(x.b)) || end-1 >= uint64(len(x.b)) {
return nil, fmt.Errorf("ReadData(%d, %d) out of bounds of %d-byte slice", addr, size, len(x.b))
}
return x.b[addr:end], nil
}
func (x *byteExe) DataStart() (uint64, uint64) {
return 0, uint64(len(x.b))
}
func TestSearchMagic(t *testing.T) {
tests := []struct {
name string
data []byte
want uint64
wantErr error
}{
{
name: "beginning",
data: func() []byte {
b := make([]byte, buildInfoHeaderSize)
copy(b, buildInfoMagic)
return b
}(),
want: 0,
},
{
name: "offset",
data: func() []byte {
b := make([]byte, 512)
copy(b[4*buildInfoAlign:], buildInfoMagic)
return b
}(),
want: 4 * buildInfoAlign,
},
{
name: "second_chunk",
data: func() []byte {
b := make([]byte, 4*searchChunkSize)
copy(b[searchChunkSize+4*buildInfoAlign:], buildInfoMagic)
return b
}(),
want: searchChunkSize + 4*buildInfoAlign,
},
{
name: "second_chunk_short",
data: func() []byte {
// Magic is 64-bytes into the second chunk,
// which is short; only exactly long enough to
// hold the header.
b := make([]byte, searchChunkSize+4*buildInfoAlign+buildInfoHeaderSize)
copy(b[searchChunkSize+4*buildInfoAlign:], buildInfoMagic)
return b
}(),
want: searchChunkSize + 4*buildInfoAlign,
},
{
name: "missing",
data: func() []byte {
b := make([]byte, buildInfoHeaderSize)
return b
}(),
wantErr: errNotGoExe,
},
{
name: "too_short",
data: func() []byte {
// There needs to be space for the entire
// header, not just the magic.
b := make([]byte, len(buildInfoMagic))
copy(b, buildInfoMagic)
return b
}(),
wantErr: errNotGoExe,
},
{
name: "misaligned",
data: func() []byte {
b := make([]byte, 512)
copy(b[7:], buildInfoMagic)
return b
}(),
wantErr: errNotGoExe,
},
{
name: "misaligned_across_chunk",
data: func() []byte {
// Magic crosses chunk boundary. By definition,
// it has to be misaligned.
b := make([]byte, 2*searchChunkSize)
copy(b[searchChunkSize-8:], buildInfoMagic)
return b
}(),
wantErr: errNotGoExe,
},
{
name: "header_across_chunk",
data: func() []byte {
// The magic is aligned within the first chunk,
// but the rest of the 32-byte header crosses
// the chunk boundary.
b := make([]byte, 2*searchChunkSize)
copy(b[searchChunkSize-buildInfoAlign:], buildInfoMagic)
return b
}(),
want: searchChunkSize - buildInfoAlign,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
x := &byteExe{tc.data}
dataAddr, dataSize := x.DataStart()
addr, err := searchMagic(x, dataAddr, dataSize)
if tc.wantErr == nil {
if err != nil {
t.Errorf("searchMagic got err %v want nil", err)
}
if addr != tc.want {
t.Errorf("searchMagic got addr %d want %d", addr, tc.want)
}
} else {
if err != tc.wantErr {
t.Errorf("searchMagic got err %v want %v", err, tc.wantErr)
}
}
})
}
}