1
0
mirror of https://github.com/golang/go synced 2024-11-18 21:44:45 -07:00

go/packages: deduplicate file parsing

When loading packages from source, many files are being parsed
repeatedly, for example due to test variants. While the median number of
times a file gets parsed is 2, it is significantly higher (up to 28
times) when parsing the standard library, because of test variant
shenanigans.

By caching file contents and their parsed representations we can cut
down on processing time and garbage produced. When loading individual
packages or 3rd party projects, the effect is rather small. However when
loading the entire standard library, the effect is substantial.

name       old time/op    new time/op    delta
Jaeger-8      2.95s ± 7%     2.84s ± 8%     ~     (p=0.089 n=10+10)
Std-8         4.96s ± 7%     4.23s ± 3%  -14.62%  (p=0.000 n=9+9)
Strconv-8     892ms ±34%     877ms ±21%     ~     (p=0.853 n=10+10)

name       old alloc/op   new alloc/op   delta
Jaeger-8     1.22GB ± 0%    1.21GB ± 0%   -0.84%  (p=0.000 n=10+10)
Std-8        2.57GB ± 0%    2.20GB ± 0%  -14.61%  (p=0.000 n=10+8)
Strconv-8     201MB ± 1%     200MB ± 1%     ~     (p=0.105 n=10+10)

name       old allocs/op  new allocs/op  delta
Jaeger-8      12.7M ± 0%     12.4M ± 0%   -1.82%  (p=0.000 n=9+10)
Std-8         26.4M ± 0%     17.3M ± 0%  -34.62%  (p=0.000 n=9+9)
Strconv-8     1.94M ± 0%     1.91M ± 0%   -1.50%  (p=0.000 n=10+10)

When loading std, peak RSS decreases from 1.96 GB to 1.57 GB.

While we're here, we simplify our ParseFile implementation. The contract
of ParseFile specifies that implementations must use src for parsing,
and use filename only for display purposes. As such, we mustn't ever
call it with a nil src, making the check for a nil src in our own
implementation superfluous.

Change-Id: I33daac20fc52ccdb3187a336633f712d01b71d86
Reviewed-on: https://go-review.googlesource.com/c/tools/+/171377
Run-TryBot: Ian Cottrell <iancottrell@google.com>
Run-TryBot: Michael Matloob <matloob@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
This commit is contained in:
Dominik Honnef 2019-04-10 15:47:02 +02:00
parent f62e16ca41
commit cb2dda6eab

View File

@ -419,6 +419,8 @@ type loader struct {
pkgs map[string]*loaderPackage
Config
sizes types.Sizes
parseCache map[string]*parseValue
parseCacheMu sync.Mutex
exportMu sync.Mutex // enforces mutual exclusion of exportdata operations
// TODO(matloob): Add an implied mode here and use that instead of mode.
@ -428,8 +430,16 @@ type loader struct {
// where we need certain modes right.
}
type parseValue struct {
f *ast.File
err error
ready chan struct{}
}
func newLoader(cfg *Config) *loader {
ld := &loader{}
ld := &loader{
parseCache: map[string]*parseValue{},
}
if cfg != nil {
ld.Config = *cfg
}
@ -457,12 +467,8 @@ func newLoader(cfg *Config) *loader {
// because we load source if export data is missing.
if ld.ParseFile == nil {
ld.ParseFile = func(fset *token.FileSet, filename string, src []byte) (*ast.File, error) {
var isrc interface{}
if src != nil {
isrc = src
}
const mode = parser.AllErrors | parser.ParseComments
return parser.ParseFile(fset, filename, isrc, mode)
return parser.ParseFile(fset, filename, src, mode)
}
}
}
@ -864,6 +870,42 @@ func (f importerFunc) Import(path string) (*types.Package, error) { return f(pat
// the number of parallel I/O calls per process.
var ioLimit = make(chan bool, 20)
func (ld *loader) parseFile(filename string) (*ast.File, error) {
ld.parseCacheMu.Lock()
v, ok := ld.parseCache[filename]
if ok {
// cache hit
ld.parseCacheMu.Unlock()
<-v.ready
} else {
// cache miss
v = &parseValue{ready: make(chan struct{})}
ld.parseCache[filename] = v
ld.parseCacheMu.Unlock()
var src []byte
for f, contents := range ld.Config.Overlay {
if sameFile(f, filename) {
src = contents
}
}
var err error
if src == nil {
ioLimit <- true // wait
src, err = ioutil.ReadFile(filename)
<-ioLimit // signal
}
if err != nil {
v.err = err
} else {
v.f, v.err = ld.ParseFile(ld.Fset, filename, src)
}
close(v.ready)
}
return v.f, v.err
}
// parseFiles reads and parses the Go source files and returns the ASTs
// of the ones that could be at least partially parsed, along with a
// list of I/O and parse errors encountered.
@ -884,24 +926,7 @@ func (ld *loader) parseFiles(filenames []string) ([]*ast.File, []error) {
}
wg.Add(1)
go func(i int, filename string) {
ioLimit <- true // wait
// ParseFile may return both an AST and an error.
var src []byte
for f, contents := range ld.Config.Overlay {
if sameFile(f, filename) {
src = contents
}
}
var err error
if src == nil {
src, err = ioutil.ReadFile(filename)
}
if err != nil {
parsed[i], errors[i] = nil, err
} else {
parsed[i], errors[i] = ld.ParseFile(ld.Fset, filename, src)
}
<-ioLimit // signal
parsed[i], errors[i] = ld.parseFile(filename)
wg.Done()
}(i, file)
}