1
0
mirror of https://github.com/golang/go synced 2024-11-18 16:44:43 -07:00

internal/imports: don't block completions on walks

Filesystem walks of large GOPATHs/module caches can take seconds,
especially on systems with slow filesystems like MacOS and WSL. We don't
want to block completion requests on walks finishing. At the same time,
cancelling a walk midway through results in an unusable cache, where we
don't know which parts have been scanned so far.

The best option is to run the walks in a separate goroutine. Then we can
detach and let them finish. On the other side, we need to be able to
reattach for the next completion request.

Introduce a new method on caches, ScanAndListen, which first processes
all the items in the cache, then notifies of any new items. This allows
us to reattach to an existing scan without missing anything.

The background scan introduces concurrency to the resolvers where there
wasn't any before. We can't use mutexes, because there's no way to stop
Lock() when a context expires. Use a 1-element semaphore channel to
accomplish the same effect.

Along the way: Only rescan GOPATH if the resolver has been cleared. None
of this makes sense for GOPATH without that. Fix a bug where we were
scanning the main module twice in module mode. Stop loading exports in
module tests, it slows them down a ton.

Change-Id: I978efae733ccba0c0cdc8e8fe6892bf5f15feac8
Reviewed-on: https://go-review.googlesource.com/c/tools/+/213217
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
This commit is contained in:
Heschi Kreinick 2020-01-02 16:10:45 -05:00
parent a9a43c4726
commit 5d34a75004
4 changed files with 175 additions and 68 deletions

View File

@ -992,20 +992,33 @@ func ImportPathToAssumedName(importPath string) string {
// gopathResolver implements resolver for GOPATH workspaces.
type gopathResolver struct {
env *ProcessEnv
cache *dirInfoCache
env *ProcessEnv
walked bool
cache *dirInfoCache
scanSema chan struct{} // scanSema prevents concurrent scans.
}
func (r *gopathResolver) init() {
if r.cache == nil {
r.cache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
dirs: map[string]*directoryPackageInfo{},
listeners: map[*int]cacheListener{},
}
}
if r.scanSema == nil {
r.scanSema = make(chan struct{}, 1)
r.scanSema <- struct{}{}
}
}
func (r *gopathResolver) ClearForNewScan() {
r.cache = nil
<-r.scanSema
*r = gopathResolver{
env: r.env,
scanSema: r.scanSema,
}
r.init()
r.scanSema <- struct{}{}
}
func (r *gopathResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) {
@ -1154,21 +1167,15 @@ func (r *gopathResolver) scan(ctx context.Context, callback *scanCallback) error
}
r.cache.Store(dir, info)
}
roots := filterRoots(gopathwalk.SrcDirsRoots(r.env.buildContext()), callback.rootFound)
gopathwalk.Walk(roots, add, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: false})
for _, dir := range r.cache.Keys() {
if ctx.Err() != nil {
return nil
}
info, ok := r.cache.Load(dir)
if !ok {
continue
processDir := func(info directoryPackageInfo) {
// Skip this directory if we were not able to get the package information successfully.
if scanned, err := info.reachedStatus(directoryScanned); !scanned || err != nil {
return
}
p := &pkg{
importPathShort: info.nonCanonicalImportPath,
dir: dir,
dir: info.dir,
relevance: MaxRelevance - 1,
}
if info.rootType == gopathwalk.RootGOROOT {
@ -1176,21 +1183,42 @@ func (r *gopathResolver) scan(ctx context.Context, callback *scanCallback) error
}
if !callback.dirFound(p) {
continue
return
}
var err error
p.packageName, err = r.cache.CachePackageName(info)
if err != nil {
continue
return
}
if !callback.packageNameLoaded(p) {
continue
return
}
if _, exports, err := r.loadExports(ctx, p); err == nil {
callback.exportsLoaded(p, exports)
}
}
stop := r.cache.ScanAndListen(ctx, processDir)
defer stop()
// The callback is not necessarily safe to use in the goroutine below. Process roots eagerly.
roots := filterRoots(gopathwalk.SrcDirsRoots(r.env.buildContext()), callback.rootFound)
// We can't cancel walks, because we need them to finish to have a usable
// cache. Instead, run them in a separate goroutine and detach.
scanDone := make(chan struct{})
go func() {
select {
case <-ctx.Done():
return
case <-r.scanSema:
}
defer func() { r.scanSema <- struct{}{} }()
gopathwalk.Walk(roots, add, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: false})
close(scanDone)
}()
select {
case <-ctx.Done():
case <-scanDone:
}
return nil
}

View File

@ -26,6 +26,7 @@ type ModuleResolver struct {
moduleCacheDir string
dummyVendorMod *ModuleJSON // If vendoring is enabled, the pseudo-module that represents the /vendor directory.
roots []gopathwalk.Root
scanSema chan struct{} // scanSema prevents concurrent scans and guards scannedRoots.
scannedRoots map[gopathwalk.Root]bool
Initialized bool
@ -106,12 +107,12 @@ func (r *ModuleResolver) init() error {
}
// Walk dependent modules before scanning the full mod cache, direct deps first.
for _, mod := range r.ModsByModPath {
if !mod.Indirect {
if !mod.Indirect && !mod.Main {
addDep(mod)
}
}
for _, mod := range r.ModsByModPath {
if mod.Indirect {
if mod.Indirect && !mod.Main {
addDep(mod)
}
}
@ -119,14 +120,20 @@ func (r *ModuleResolver) init() error {
}
r.scannedRoots = map[gopathwalk.Root]bool{}
if r.scanSema == nil {
r.scanSema = make(chan struct{}, 1)
r.scanSema <- struct{}{}
}
if r.moduleCacheCache == nil {
r.moduleCacheCache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
dirs: map[string]*directoryPackageInfo{},
listeners: map[*int]cacheListener{},
}
}
if r.otherCache == nil {
r.otherCache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
dirs: map[string]*directoryPackageInfo{},
listeners: map[*int]cacheListener{},
}
}
r.Initialized = true
@ -160,18 +167,24 @@ func (r *ModuleResolver) initAllMods() error {
}
func (r *ModuleResolver) ClearForNewScan() {
<-r.scanSema
r.scannedRoots = map[gopathwalk.Root]bool{}
r.otherCache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
}
r.scanSema <- struct{}{}
}
func (r *ModuleResolver) ClearForNewMod() {
env := r.env
<-r.scanSema
*r = ModuleResolver{
env: env,
env: r.env,
moduleCacheCache: r.moduleCacheCache,
otherCache: r.otherCache,
scanSema: r.scanSema,
}
r.init()
r.scanSema <- struct{}{}
}
// findPackage returns the module and directory that contains the package at
@ -401,18 +414,12 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error
callback.exportsLoaded(pkg, exports)
}
// Everything we already had is in the cache. Process it now, in hopes we
// we don't need anything new.
for _, dir := range r.cacheKeys() {
if ctx.Err() != nil {
return nil
}
info, ok := r.cacheLoad(dir)
if !ok {
continue
}
processDir(info)
}
// Start processing everything in the cache, and listen for the new stuff
// we discover in the walk below.
stop1 := r.moduleCacheCache.ScanAndListen(ctx, processDir)
defer stop1()
stop2 := r.otherCache.ScanAndListen(ctx, processDir)
defer stop2()
// We assume cached directories are fully cached, including all their
// children, and have not changed. We can skip them.
@ -428,32 +435,41 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error
return packageScanned
}
// Add anything new to the cache, and process it if we're still looking.
// Add anything new to the cache, and process it if we're still listening.
add := func(root gopathwalk.Root, dir string) {
info := r.scanDirForPackage(root, dir)
r.cacheStore(info)
if ctx.Err() == nil {
processDir(info)
}
r.cacheStore(r.scanDirForPackage(root, dir))
}
// r.roots and the callback are not necessarily safe to use in the
// goroutine below. Process them eagerly.
roots := filterRoots(r.roots, callback.rootFound)
// We can't cancel walks, because we need them to finish to have a usable
// cache. We can do them one by one and stop in between.
// TODO(heschi): Run asynchronously and detach on cancellation? Would risk
// racy callbacks.
for _, root := range r.roots {
if ctx.Err() != nil {
return nil
// cache. Instead, run them in a separate goroutine and detach.
scanDone := make(chan struct{})
go func() {
select {
case <-ctx.Done():
return
case <-r.scanSema:
}
defer func() { r.scanSema <- struct{}{} }()
// We have the lock on r.scannedRoots, and no other scans can run.
for _, root := range roots {
if ctx.Err() != nil {
return
}
if r.scannedRoots[root] {
continue
if r.scannedRoots[root] {
continue
}
gopathwalk.WalkSkip([]gopathwalk.Root{root}, add, skip, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: true})
r.scannedRoots[root] = true
}
if !callback.rootFound(root) {
continue
}
gopathwalk.WalkSkip([]gopathwalk.Root{root}, add, skip, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: true})
r.scannedRoots[root] = true
close(scanDone)
}()
select {
case <-ctx.Done():
case <-scanDone:
}
return nil
}

View File

@ -93,15 +93,85 @@ func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) (
type dirInfoCache struct {
mu sync.Mutex
// dirs stores information about packages in directories, keyed by absolute path.
dirs map[string]*directoryPackageInfo
dirs map[string]*directoryPackageInfo
listeners map[*int]cacheListener
}
type cacheListener func(directoryPackageInfo)
// ScanAndListen calls listener on all the items in the cache, and on anything
// newly added. The returned stop function waits for all in-flight callbacks to
// finish and blocks new ones.
func (d *dirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() {
ctx, cancel := context.WithCancel(ctx)
// Flushing out all the callbacks is tricky without knowing how many there
// are going to be. Setting an arbitrary limit makes it much easier.
const maxInFlight = 10
sema := make(chan struct{}, maxInFlight)
for i := 0; i < maxInFlight; i++ {
sema <- struct{}{}
}
cookie := new(int) // A unique ID we can use for the listener.
// We can't hold mu while calling the listener.
d.mu.Lock()
var keys []string
for key := range d.dirs {
keys = append(keys, key)
}
d.listeners[cookie] = func(info directoryPackageInfo) {
select {
case <-ctx.Done():
return
case <-sema:
}
listener(info)
sema <- struct{}{}
}
d.mu.Unlock()
// Process the pre-existing keys.
for _, k := range keys {
select {
case <-ctx.Done():
cancel()
return func() {}
default:
}
if v, ok := d.Load(k); ok {
listener(v)
}
}
return func() {
cancel()
d.mu.Lock()
delete(d.listeners, cookie)
d.mu.Unlock()
for i := 0; i < maxInFlight; i++ {
<-sema
}
}
}
// Store stores the package info for dir.
func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) {
d.mu.Lock()
defer d.mu.Unlock()
stored := info // defensive copy
d.dirs[dir] = &stored
_, old := d.dirs[dir]
d.dirs[dir] = &info
var listeners []cacheListener
for _, l := range d.listeners {
listeners = append(listeners, l)
}
d.mu.Unlock()
if !old {
for _, l := range listeners {
l(info)
}
}
}
// Load returns a copy of the directoryPackageInfo for absolute directory dir.

View File

@ -151,7 +151,6 @@ import _ "example.com"
mt.assertScanFinds("example.com", "x")
mt.assertScanFinds("example.com", "x")
}
// Tests that scanning the module cache > 1 time is able to find the same module
@ -211,12 +210,8 @@ import _ "rsc.io/quote"
}
// Uninitialize the go.mod dependent cached information and make sure it still finds the package.
mt.resolver.Initialized = false
mt.resolver.Main = nil
mt.resolver.ModsByModPath = nil
mt.resolver.ModsByDir = nil
mt.resolver.ClearForNewMod()
mt.assertScanFinds("rsc.io/quote", "quote")
}
// Tests that -mod=vendor works. Adapted from mod_vendor_build.txt.
@ -606,9 +601,7 @@ func scanToSlice(resolver Resolver, exclude []gopathwalk.RootType) ([]*pkg, erro
mu.Lock()
defer mu.Unlock()
result = append(result, pkg)
return true
},
exportsLoaded: func(pkg *pkg, exports []string) {
return false
},
}
err := resolver.scan(context.Background(), filter)