mirror of
https://github.com/golang/go
synced 2024-11-17 14:04:48 -07:00
[dev.fuzz] internal/fuzz: don't store corpus in memory
Instead of holding all corpus data/values in memory, only store seed inputs added via F.Add in memory, and only load corpus entries which are written to disk when we need them. This should significantly reduce the memory required by the coordinator process. Additionally only load the corpus in the coordinator process, since the worker has no need for it. Fixes #46669. Change-Id: Ic3b0c5e929fdb3e2877b963e6b0fa14e140c1e1d Reviewed-on: https://go-review.googlesource.com/c/go/+/345096 Trust: Roland Shoemaker <roland@golang.org> Run-TryBot: Roland Shoemaker <roland@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Jay Conrod <jayconrod@google.com>
This commit is contained in:
parent
5bc273aca5
commit
e5247f7886
@ -254,7 +254,7 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
|
||||
c.opts.Log,
|
||||
"DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n",
|
||||
time.Since(c.startTime),
|
||||
result.entry.Name,
|
||||
fileName,
|
||||
result.entry.Parent,
|
||||
result.entry.Generation,
|
||||
len(result.entry.Data),
|
||||
@ -303,35 +303,39 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err
|
||||
// number of new edges that this result expanded.
|
||||
// TODO(jayconrod, katiehockman): Don't write a value that's already
|
||||
// in the corpus.
|
||||
if printDebugInfo() {
|
||||
fmt.Fprintf(
|
||||
c.opts.Log,
|
||||
"DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
|
||||
time.Since(c.startTime),
|
||||
result.entry.Name,
|
||||
result.entry.Parent,
|
||||
result.entry.Generation,
|
||||
countBits(keepCoverage),
|
||||
countBits(c.coverageMask),
|
||||
len(result.entry.Data),
|
||||
result.entryDuration,
|
||||
)
|
||||
}
|
||||
if !result.minimizeAttempted && crashMinimizing == nil && c.canMinimize() {
|
||||
// Send back to workers to find a smaller value that preserves
|
||||
// at least one new coverage bit.
|
||||
c.queueForMinimization(result, keepCoverage)
|
||||
} else {
|
||||
// Update the coordinator's coverage mask and save the value.
|
||||
inputSize := len(result.entry.Data)
|
||||
if opts.CacheDir != "" {
|
||||
if _, err := writeToCorpus(result.entry.Data, opts.CacheDir); err != nil {
|
||||
filename, err := writeToCorpus(result.entry.Data, opts.CacheDir)
|
||||
if err != nil {
|
||||
stop(err)
|
||||
}
|
||||
result.entry.Data = nil
|
||||
result.entry.Name = filename
|
||||
}
|
||||
c.updateCoverage(keepCoverage)
|
||||
c.corpus.entries = append(c.corpus.entries, result.entry)
|
||||
c.inputQueue.enqueue(result.entry)
|
||||
c.interestingCount++
|
||||
if printDebugInfo() {
|
||||
fmt.Fprintf(
|
||||
c.opts.Log,
|
||||
"DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
|
||||
time.Since(c.startTime),
|
||||
result.entry.Name,
|
||||
result.entry.Parent,
|
||||
result.entry.Generation,
|
||||
countBits(keepCoverage),
|
||||
countBits(c.coverageMask),
|
||||
inputSize,
|
||||
result.entryDuration,
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if printDebugInfo() {
|
||||
@ -393,18 +397,17 @@ type corpus struct {
|
||||
// packages, but testing can't import this package directly, and we don't want
|
||||
// to export this type from testing. Instead, we use the same struct type and
|
||||
// use a type alias (not a defined type) for convenience.
|
||||
//
|
||||
// TODO: split marshalled and unmarshalled types. In most places, we only need
|
||||
// one or the other.
|
||||
type CorpusEntry = struct {
|
||||
Parent string
|
||||
|
||||
// Name is the name of the corpus file, if the entry was loaded from the
|
||||
// seed corpus. It can be used with -run. For entries added with f.Add and
|
||||
// entries generated by the mutator, Name is empty.
|
||||
// entries generated by the mutator, Name is empty and Data is populated.
|
||||
Name string
|
||||
|
||||
// Data is the raw data loaded from a corpus file.
|
||||
// Data is the raw input data. Data should only be populated for initial
|
||||
// seed values added with f.Add. For on-disk corpus files, Data will
|
||||
// be nil.
|
||||
Data []byte
|
||||
|
||||
// Values is the unmarshaled values from a corpus file.
|
||||
@ -413,6 +416,16 @@ type CorpusEntry = struct {
|
||||
Generation int
|
||||
}
|
||||
|
||||
// Data returns the raw input bytes, either from the data struct field,
|
||||
// or from disk.
|
||||
func CorpusEntryData(ce CorpusEntry) ([]byte, error) {
|
||||
if ce.Data != nil {
|
||||
return ce.Data, nil
|
||||
}
|
||||
|
||||
return os.ReadFile(ce.Name)
|
||||
}
|
||||
|
||||
type fuzzInput struct {
|
||||
// entry is the value to test initially. The worker will randomly mutate
|
||||
// values from this starting point.
|
||||
@ -580,7 +593,7 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
|
||||
data := marshalCorpusFile(vals...)
|
||||
h := sha256.Sum256(data)
|
||||
name := fmt.Sprintf("%x", h[:4])
|
||||
corpus.entries = append(corpus.entries, CorpusEntry{Name: name, Data: data, Values: vals})
|
||||
corpus.entries = append(corpus.entries, CorpusEntry{Name: name, Data: data})
|
||||
}
|
||||
c := &coordinator{
|
||||
opts: opts,
|
||||
@ -875,7 +888,7 @@ func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
|
||||
errs = append(errs, fmt.Errorf("%q: %v", filename, err))
|
||||
continue
|
||||
}
|
||||
corpus = append(corpus, CorpusEntry{Name: filename, Data: data, Values: vals})
|
||||
corpus = append(corpus, CorpusEntry{Name: filename, Values: vals})
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
return corpus, &MalformedCorpusError{errs: errs}
|
||||
|
@ -1002,7 +1002,11 @@ func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args
|
||||
return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
|
||||
}
|
||||
mem.header().count = 0
|
||||
mem.setValue(entryIn.Data)
|
||||
inp, err := CorpusEntryData(entryIn)
|
||||
if err != nil {
|
||||
return CorpusEntry{}, minimizeResponse{}, err
|
||||
}
|
||||
mem.setValue(inp)
|
||||
wc.memMu <- mem
|
||||
defer func() { wc.memMu <- mem }()
|
||||
|
||||
@ -1013,10 +1017,6 @@ func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args
|
||||
return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
|
||||
}
|
||||
entryOut.Data = mem.valueCopy()
|
||||
entryOut.Values, err = unmarshalCorpusFile(entryOut.Data)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("workerClient.minimize unmarshaling minimized value: %v", err))
|
||||
}
|
||||
resp.Count = mem.header().count
|
||||
|
||||
return entryOut, resp, callErr
|
||||
@ -1032,7 +1032,11 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
|
||||
return CorpusEntry{}, fuzzResponse{}, errSharedMemClosed
|
||||
}
|
||||
mem.header().count = 0
|
||||
mem.setValue(entryIn.Data)
|
||||
inp, err := CorpusEntryData(entryIn)
|
||||
if err != nil {
|
||||
return CorpusEntry{}, fuzzResponse{}, err
|
||||
}
|
||||
mem.setValue(inp)
|
||||
wc.memMu <- mem
|
||||
|
||||
c := call{Fuzz: &args}
|
||||
@ -1044,10 +1048,10 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
|
||||
defer func() { wc.memMu <- mem }()
|
||||
resp.Count = mem.header().count
|
||||
|
||||
if !bytes.Equal(entryIn.Data, mem.valueRef()) {
|
||||
if !bytes.Equal(inp, mem.valueRef()) {
|
||||
panic("workerServer.fuzz modified input")
|
||||
}
|
||||
valuesOut, err := unmarshalCorpusFile(entryIn.Data)
|
||||
valuesOut, err := unmarshalCorpusFile(inp)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("unmarshaling fuzz input value after call: %v", err))
|
||||
}
|
||||
@ -1063,7 +1067,6 @@ func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzz
|
||||
Name: name,
|
||||
Parent: entryIn.Name,
|
||||
Data: dataOut,
|
||||
Values: valuesOut,
|
||||
Generation: entryIn.Generation + 1,
|
||||
}
|
||||
|
||||
|
@ -305,21 +305,33 @@ func (f *F) Fuzz(ff interface{}) {
|
||||
types = append(types, t)
|
||||
}
|
||||
|
||||
// Check the corpus provided by f.Add
|
||||
for _, c := range f.corpus {
|
||||
if err := f.fuzzContext.checkCorpus(c.Values, types); err != nil {
|
||||
// TODO: Is there a way to save which line number is associated
|
||||
// with the f.Add call that failed?
|
||||
// Only load the corpus if we need it
|
||||
if f.fuzzContext.runFuzzWorker == nil {
|
||||
// Check the corpus provided by f.Add
|
||||
for _, c := range f.corpus {
|
||||
if err := f.fuzzContext.checkCorpus(c.Values, types); err != nil {
|
||||
// TODO: Is there a way to save which line number is associated
|
||||
// with the f.Add call that failed?
|
||||
f.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Load seed corpus
|
||||
c, err := f.fuzzContext.readCorpus(filepath.Join(corpusDir, f.name), types)
|
||||
if err != nil {
|
||||
f.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Load seed corpus
|
||||
c, err := f.fuzzContext.readCorpus(filepath.Join(corpusDir, f.name), types)
|
||||
if err != nil {
|
||||
f.Fatal(err)
|
||||
// If this is the coordinator process, zero the values, since we don't need to hold
|
||||
// onto them.
|
||||
if f.fuzzContext.coordinateFuzzing != nil {
|
||||
for i := range c {
|
||||
c[i].Values = nil
|
||||
}
|
||||
}
|
||||
|
||||
f.corpus = append(f.corpus, c...)
|
||||
}
|
||||
f.corpus = append(f.corpus, c...)
|
||||
|
||||
// run calls fn on a given input, as a subtest with its own T.
|
||||
// run is analogous to T.Run. The test filtering and cleanup works similarly.
|
||||
|
Loading…
Reference in New Issue
Block a user