mirror of
https://github.com/golang/go
synced 2024-11-22 18:34:51 -07:00
debug/buildinfo: read data in chunks
Rather than reading the entire data segment into memory, read it in smaller chunks to keep memory usage low. For typically Go binaries, this doesn't matter much. For those, we read the .go.buildinfo section, which should be quite small. But for non-Go binaries (or Go binaries with section headers stripped), we search the entire loadable data segment, which could be quite large. This reduces the time for `go version` on a 2.5GB non-Go binary from ~1.2s and 1GB RSS (!!) to ~1s and ~25MB RSS. Fixes #68592. Change-Id: I9218854c5b6f2aa1331f561ab0850a9fd62ef23b Reviewed-on: https://go-review.googlesource.com/c/go/+/601459 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
8246171bae
commit
b6e81a5129
@ -57,6 +57,11 @@ var errNotGoExe = errors.New("not a Go executable")
|
||||
// fields.
|
||||
var buildInfoMagic = []byte("\xff Go buildinf:")
|
||||
|
||||
const (
|
||||
buildInfoAlign = 16
|
||||
buildInfoHeaderSize = 32
|
||||
)
|
||||
|
||||
// ReadFile returns build information embedded in a Go binary
|
||||
// file at the given path. Most information is only available for binaries built
|
||||
// with module support.
|
||||
@ -165,14 +170,19 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
|
||||
if dataSize == 0 {
|
||||
return "", "", errNotGoExe
|
||||
}
|
||||
data, err := x.ReadData(dataAddr, dataSize)
|
||||
|
||||
addr, err := searchMagic(x, dataAddr, dataSize)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
const (
|
||||
buildInfoAlign = 16
|
||||
buildInfoHeaderSize = 32
|
||||
|
||||
// Read in the full header first.
|
||||
header, err := x.ReadData(addr, buildInfoHeaderSize)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
const (
|
||||
ptrSizeOffset = 14
|
||||
flagsOffset = 15
|
||||
versPtrOffset = 16
|
||||
@ -185,17 +195,6 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
|
||||
flagsVersionPtr = 0x0
|
||||
flagsVersionInl = 0x2
|
||||
)
|
||||
for {
|
||||
i := bytes.Index(data, buildInfoMagic)
|
||||
if i < 0 || len(data)-i < buildInfoHeaderSize {
|
||||
return "", "", errNotGoExe
|
||||
}
|
||||
if i%buildInfoAlign == 0 && len(data)-i >= buildInfoHeaderSize {
|
||||
data = data[i:]
|
||||
break
|
||||
}
|
||||
data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):]
|
||||
}
|
||||
|
||||
// Decode the blob. The blob is a 32-byte header, optionally followed
|
||||
// by 2 varint-prefixed string contents.
|
||||
@ -220,13 +219,19 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
|
||||
// the header is followed by the string contents inline as
|
||||
// length-prefixed (as varint) string contents. First is the version
|
||||
// string, followed immediately by the modinfo string.
|
||||
flags := data[flagsOffset]
|
||||
flags := header[flagsOffset]
|
||||
if flags&flagsVersionMask == flagsVersionInl {
|
||||
vers, data = decodeString(data[buildInfoHeaderSize:])
|
||||
mod, data = decodeString(data)
|
||||
vers, addr, err = decodeString(x, addr+buildInfoHeaderSize)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
mod, _, err = decodeString(x, addr)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
} else {
|
||||
// flagsVersionPtr (<1.18)
|
||||
ptrSize := int(data[ptrSizeOffset])
|
||||
ptrSize := int(header[ptrSizeOffset])
|
||||
bigEndian := flags&flagsEndianMask == flagsEndianBig
|
||||
var bo binary.ByteOrder
|
||||
if bigEndian {
|
||||
@ -242,8 +247,8 @@ func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
|
||||
} else {
|
||||
return "", "", errNotGoExe
|
||||
}
|
||||
vers = readString(x, ptrSize, readPtr, readPtr(data[versPtrOffset:]))
|
||||
mod = readString(x, ptrSize, readPtr, readPtr(data[versPtrOffset+ptrSize:]))
|
||||
vers = readString(x, ptrSize, readPtr, readPtr(header[versPtrOffset:]))
|
||||
mod = readString(x, ptrSize, readPtr, readPtr(header[versPtrOffset+ptrSize:]))
|
||||
}
|
||||
if vers == "" {
|
||||
return "", "", errNotGoExe
|
||||
@ -270,12 +275,33 @@ func hasPlan9Magic(magic []byte) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func decodeString(data []byte) (s string, rest []byte) {
|
||||
u, n := binary.Uvarint(data)
|
||||
if n <= 0 || u > uint64(len(data)-n) {
|
||||
return "", nil
|
||||
func decodeString(x exe, addr uint64) (string, uint64, error) {
|
||||
// varint length followed by length bytes of data.
|
||||
|
||||
// N.B. ReadData reads _up to_ size bytes from the section containing
|
||||
// addr. So we don't need to check that size doesn't overflow the
|
||||
// section.
|
||||
b, err := x.ReadData(addr, binary.MaxVarintLen64)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
return string(data[n : uint64(n)+u]), data[uint64(n)+u:]
|
||||
|
||||
length, n := binary.Uvarint(b)
|
||||
if n <= 0 {
|
||||
return "", 0, errNotGoExe
|
||||
}
|
||||
addr += uint64(n)
|
||||
|
||||
b, err = x.ReadData(addr, length)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
if uint64(len(b)) < length {
|
||||
// Section ended before we could read the full string.
|
||||
return "", 0, errNotGoExe
|
||||
}
|
||||
|
||||
return string(b), addr + length, nil
|
||||
}
|
||||
|
||||
// readString returns the string at address addr in the executable x.
|
||||
@ -293,6 +319,64 @@ func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) st
|
||||
return string(data)
|
||||
}
|
||||
|
||||
const searchChunkSize = 1 << 20 // 1 MB
|
||||
|
||||
// searchMagic returns the aligned first instance of buildInfoMagic in the data
|
||||
// range [addr, addr+size). Returns false if not found.
|
||||
func searchMagic(x exe, start, size uint64) (uint64, error) {
|
||||
end := start + size
|
||||
if end < start {
|
||||
// Overflow.
|
||||
return 0, errUnrecognizedFormat
|
||||
}
|
||||
|
||||
// Round up start; magic can't occur in the initial unaligned portion.
|
||||
start = (start + buildInfoAlign - 1) &^ (buildInfoAlign - 1)
|
||||
if start >= end {
|
||||
return 0, errNotGoExe
|
||||
}
|
||||
|
||||
for start < end {
|
||||
// Read in chunks to avoid consuming too much memory if data is large.
|
||||
//
|
||||
// Normally it would be somewhat painful to handle the magic crossing a
|
||||
// chunk boundary, but since it must be 16-byte aligned we know it will
|
||||
// fall within a single chunk.
|
||||
remaining := end - start
|
||||
chunkSize := uint64(searchChunkSize)
|
||||
if chunkSize > remaining {
|
||||
chunkSize = remaining
|
||||
}
|
||||
|
||||
data, err := x.ReadData(start, chunkSize)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
for len(data) > 0 {
|
||||
i := bytes.Index(data, buildInfoMagic)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
if remaining-uint64(i) < buildInfoHeaderSize {
|
||||
// Found magic, but not enough space left for the full header.
|
||||
return 0, errNotGoExe
|
||||
}
|
||||
if i%buildInfoAlign != 0 {
|
||||
// Found magic, but misaligned. Keep searching.
|
||||
data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):]
|
||||
continue
|
||||
}
|
||||
// Good match!
|
||||
return start + uint64(i), nil
|
||||
}
|
||||
|
||||
start += chunkSize
|
||||
}
|
||||
|
||||
return 0, errNotGoExe
|
||||
}
|
||||
|
||||
// elfExe is the ELF implementation of the exe interface.
|
||||
type elfExe struct {
|
||||
f *elf.File
|
||||
|
148
src/debug/buildinfo/search_test.go
Normal file
148
src/debug/buildinfo/search_test.go
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package buildinfo
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type byteExe struct {
|
||||
b []byte
|
||||
}
|
||||
|
||||
func (x *byteExe) ReadData(addr, size uint64) ([]byte, error) {
|
||||
end := addr + size
|
||||
if end < addr {
|
||||
return nil, fmt.Errorf("ReadData(%d, %d) overflow", addr, size)
|
||||
}
|
||||
if addr >= uint64(len(x.b)) || end-1 >= uint64(len(x.b)) {
|
||||
return nil, fmt.Errorf("ReadData(%d, %d) out of bounds of %d-byte slice", addr, size, len(x.b))
|
||||
}
|
||||
return x.b[addr:end], nil
|
||||
}
|
||||
|
||||
func (x *byteExe) DataStart() (uint64, uint64) {
|
||||
return 0, uint64(len(x.b))
|
||||
}
|
||||
|
||||
func TestSearchMagic(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
want uint64
|
||||
wantErr error
|
||||
}{
|
||||
{
|
||||
name: "beginning",
|
||||
data: func() []byte {
|
||||
b := make([]byte, buildInfoHeaderSize)
|
||||
copy(b, buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "offset",
|
||||
data: func() []byte {
|
||||
b := make([]byte, 512)
|
||||
copy(b[4*buildInfoAlign:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
want: 4 * buildInfoAlign,
|
||||
},
|
||||
{
|
||||
name: "second_chunk",
|
||||
data: func() []byte {
|
||||
b := make([]byte, 4*searchChunkSize)
|
||||
copy(b[searchChunkSize+4*buildInfoAlign:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
want: searchChunkSize + 4*buildInfoAlign,
|
||||
},
|
||||
{
|
||||
name: "second_chunk_short",
|
||||
data: func() []byte {
|
||||
// Magic is 64-bytes into the second chunk,
|
||||
// which is short; only exactly long enough to
|
||||
// hold the header.
|
||||
b := make([]byte, searchChunkSize+4*buildInfoAlign+buildInfoHeaderSize)
|
||||
copy(b[searchChunkSize+4*buildInfoAlign:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
want: searchChunkSize + 4*buildInfoAlign,
|
||||
},
|
||||
{
|
||||
name: "missing",
|
||||
data: func() []byte {
|
||||
b := make([]byte, buildInfoHeaderSize)
|
||||
return b
|
||||
}(),
|
||||
wantErr: errNotGoExe,
|
||||
},
|
||||
{
|
||||
name: "too_short",
|
||||
data: func() []byte {
|
||||
// There needs to be space for the entire
|
||||
// header, not just the magic.
|
||||
b := make([]byte, len(buildInfoMagic))
|
||||
copy(b, buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
wantErr: errNotGoExe,
|
||||
},
|
||||
{
|
||||
name: "misaligned",
|
||||
data: func() []byte {
|
||||
b := make([]byte, 512)
|
||||
copy(b[7:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
wantErr: errNotGoExe,
|
||||
},
|
||||
{
|
||||
name: "misaligned_across_chunk",
|
||||
data: func() []byte {
|
||||
// Magic crosses chunk boundary. By definition,
|
||||
// it has to be misaligned.
|
||||
b := make([]byte, 2*searchChunkSize)
|
||||
copy(b[searchChunkSize-8:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
wantErr: errNotGoExe,
|
||||
},
|
||||
{
|
||||
name: "header_across_chunk",
|
||||
data: func() []byte {
|
||||
// The magic is aligned within the first chunk,
|
||||
// but the rest of the 32-byte header crosses
|
||||
// the chunk boundary.
|
||||
b := make([]byte, 2*searchChunkSize)
|
||||
copy(b[searchChunkSize-buildInfoAlign:], buildInfoMagic)
|
||||
return b
|
||||
}(),
|
||||
want: searchChunkSize - buildInfoAlign,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
x := &byteExe{tc.data}
|
||||
dataAddr, dataSize := x.DataStart()
|
||||
addr, err := searchMagic(x, dataAddr, dataSize)
|
||||
if tc.wantErr == nil {
|
||||
if err != nil {
|
||||
t.Errorf("searchMagic got err %v want nil", err)
|
||||
}
|
||||
if addr != tc.want {
|
||||
t.Errorf("searchMagic got addr %d want %d", addr, tc.want)
|
||||
}
|
||||
} else {
|
||||
if err != tc.wantErr {
|
||||
t.Errorf("searchMagic got err %v want %v", err, tc.wantErr)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user