1
0
mirror of https://github.com/golang/go synced 2024-11-05 15:06:09 -07:00
go/internal/imports/mod_cache.go

233 lines
6.8 KiB
Go
Raw Normal View History

package imports
import (
"context"
"fmt"
"sync"
"golang.org/x/tools/internal/gopathwalk"
)
// To find packages to import, the resolver needs to know about all of the
// the packages that could be imported. This includes packages that are
// already in modules that are in (1) the current module, (2) replace targets,
// and (3) packages in the module cache. Packages in (1) and (2) may change over
// time, as the client may edit the current module and locally replaced modules.
// The module cache (which includes all of the packages in (3)) can only
// ever be added to.
//
// The resolver can thus save state about packages in the module cache
// and guarantee that this will not change over time. To obtain information
// about new modules added to the module cache, the module cache should be
// rescanned.
//
// It is OK to serve information about modules that have been deleted,
// as they do still exist.
// TODO(suzmue): can we share information with the caller about
// what module needs to be downloaded to import this package?
type directoryPackageStatus int
const (
_ directoryPackageStatus = iota
directoryScanned
nameLoaded
exportsLoaded
)
type directoryPackageInfo struct {
// status indicates the extent to which this struct has been filled in.
status directoryPackageStatus
// err is non-nil when there was an error trying to reach status.
err error
// Set when status >= directoryScanned.
// dir is the absolute directory of this package.
dir string
rootType gopathwalk.RootType
// nonCanonicalImportPath is the package's expected import path. It may
// not actually be importable at that path.
nonCanonicalImportPath string
// Module-related information.
moduleDir string // The directory that is the module root of this dir.
moduleName string // The module name that contains this dir.
// Set when status >= nameLoaded.
packageName string // the package name, as declared in the source.
// Set when status >= exportsLoaded.
exports []string
}
// reachedStatus returns true when info has a status at least target and any error associated with
// an attempt to reach target.
func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) (bool, error) {
if info.err == nil {
return info.status >= target, nil
}
if info.status == target {
return true, info.err
}
return true, nil
}
// dirInfoCache is a concurrency safe map for storing information about
// directories that may contain packages.
//
// The information in this cache is built incrementally. Entries are initialized in scan.
// No new keys should be added in any other functions, as all directories containing
// packages are identified in scan.
//
// Other functions, including loadExports and findPackage, may update entries in this cache
// as they discover new things about the directory.
//
// The information in the cache is not expected to change for the cache's
// lifetime, so there is no protection against competing writes. Users should
// take care not to hold the cache across changes to the underlying files.
//
// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc)
type dirInfoCache struct {
mu sync.Mutex
// dirs stores information about packages in directories, keyed by absolute path.
internal/imports: don't block completions on walks Filesystem walks of large GOPATHs/module caches can take seconds, especially on systems with slow filesystems like MacOS and WSL. We don't want to block completion requests on walks finishing. At the same time, cancelling a walk midway through results in an unusable cache, where we don't know which parts have been scanned so far. The best option is to run the walks in a separate goroutine. Then we can detach and let them finish. On the other side, we need to be able to reattach for the next completion request. Introduce a new method on caches, ScanAndListen, which first processes all the items in the cache, then notifies of any new items. This allows us to reattach to an existing scan without missing anything. The background scan introduces concurrency to the resolvers where there wasn't any before. We can't use mutexes, because there's no way to stop Lock() when a context expires. Use a 1-element semaphore channel to accomplish the same effect. Along the way: Only rescan GOPATH if the resolver has been cleared. None of this makes sense for GOPATH without that. Fix a bug where we were scanning the main module twice in module mode. Stop loading exports in module tests, it slows them down a ton. Change-Id: I978efae733ccba0c0cdc8e8fe6892bf5f15feac8 Reviewed-on: https://go-review.googlesource.com/c/tools/+/213217 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-01-02 14:10:45 -07:00
dirs map[string]*directoryPackageInfo
listeners map[*int]cacheListener
}
type cacheListener func(directoryPackageInfo)
// ScanAndListen calls listener on all the items in the cache, and on anything
// newly added. The returned stop function waits for all in-flight callbacks to
// finish and blocks new ones.
func (d *dirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() {
ctx, cancel := context.WithCancel(ctx)
// Flushing out all the callbacks is tricky without knowing how many there
// are going to be. Setting an arbitrary limit makes it much easier.
const maxInFlight = 10
sema := make(chan struct{}, maxInFlight)
for i := 0; i < maxInFlight; i++ {
sema <- struct{}{}
}
cookie := new(int) // A unique ID we can use for the listener.
// We can't hold mu while calling the listener.
d.mu.Lock()
var keys []string
for key := range d.dirs {
keys = append(keys, key)
}
d.listeners[cookie] = func(info directoryPackageInfo) {
select {
case <-ctx.Done():
return
case <-sema:
}
listener(info)
sema <- struct{}{}
}
d.mu.Unlock()
stop := func() {
cancel()
d.mu.Lock()
delete(d.listeners, cookie)
d.mu.Unlock()
for i := 0; i < maxInFlight; i++ {
<-sema
}
}
internal/imports: don't block completions on walks Filesystem walks of large GOPATHs/module caches can take seconds, especially on systems with slow filesystems like MacOS and WSL. We don't want to block completion requests on walks finishing. At the same time, cancelling a walk midway through results in an unusable cache, where we don't know which parts have been scanned so far. The best option is to run the walks in a separate goroutine. Then we can detach and let them finish. On the other side, we need to be able to reattach for the next completion request. Introduce a new method on caches, ScanAndListen, which first processes all the items in the cache, then notifies of any new items. This allows us to reattach to an existing scan without missing anything. The background scan introduces concurrency to the resolvers where there wasn't any before. We can't use mutexes, because there's no way to stop Lock() when a context expires. Use a 1-element semaphore channel to accomplish the same effect. Along the way: Only rescan GOPATH if the resolver has been cleared. None of this makes sense for GOPATH without that. Fix a bug where we were scanning the main module twice in module mode. Stop loading exports in module tests, it slows them down a ton. Change-Id: I978efae733ccba0c0cdc8e8fe6892bf5f15feac8 Reviewed-on: https://go-review.googlesource.com/c/tools/+/213217 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-01-02 14:10:45 -07:00
// Process the pre-existing keys.
for _, k := range keys {
select {
case <-ctx.Done():
return stop
internal/imports: don't block completions on walks Filesystem walks of large GOPATHs/module caches can take seconds, especially on systems with slow filesystems like MacOS and WSL. We don't want to block completion requests on walks finishing. At the same time, cancelling a walk midway through results in an unusable cache, where we don't know which parts have been scanned so far. The best option is to run the walks in a separate goroutine. Then we can detach and let them finish. On the other side, we need to be able to reattach for the next completion request. Introduce a new method on caches, ScanAndListen, which first processes all the items in the cache, then notifies of any new items. This allows us to reattach to an existing scan without missing anything. The background scan introduces concurrency to the resolvers where there wasn't any before. We can't use mutexes, because there's no way to stop Lock() when a context expires. Use a 1-element semaphore channel to accomplish the same effect. Along the way: Only rescan GOPATH if the resolver has been cleared. None of this makes sense for GOPATH without that. Fix a bug where we were scanning the main module twice in module mode. Stop loading exports in module tests, it slows them down a ton. Change-Id: I978efae733ccba0c0cdc8e8fe6892bf5f15feac8 Reviewed-on: https://go-review.googlesource.com/c/tools/+/213217 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-01-02 14:10:45 -07:00
default:
}
if v, ok := d.Load(k); ok {
listener(v)
}
}
return stop
}
// Store stores the package info for dir.
func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) {
d.mu.Lock()
internal/imports: don't block completions on walks Filesystem walks of large GOPATHs/module caches can take seconds, especially on systems with slow filesystems like MacOS and WSL. We don't want to block completion requests on walks finishing. At the same time, cancelling a walk midway through results in an unusable cache, where we don't know which parts have been scanned so far. The best option is to run the walks in a separate goroutine. Then we can detach and let them finish. On the other side, we need to be able to reattach for the next completion request. Introduce a new method on caches, ScanAndListen, which first processes all the items in the cache, then notifies of any new items. This allows us to reattach to an existing scan without missing anything. The background scan introduces concurrency to the resolvers where there wasn't any before. We can't use mutexes, because there's no way to stop Lock() when a context expires. Use a 1-element semaphore channel to accomplish the same effect. Along the way: Only rescan GOPATH if the resolver has been cleared. None of this makes sense for GOPATH without that. Fix a bug where we were scanning the main module twice in module mode. Stop loading exports in module tests, it slows them down a ton. Change-Id: I978efae733ccba0c0cdc8e8fe6892bf5f15feac8 Reviewed-on: https://go-review.googlesource.com/c/tools/+/213217 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-01-02 14:10:45 -07:00
_, old := d.dirs[dir]
d.dirs[dir] = &info
var listeners []cacheListener
for _, l := range d.listeners {
listeners = append(listeners, l)
}
d.mu.Unlock()
if !old {
for _, l := range listeners {
l(info)
}
}
}
// Load returns a copy of the directoryPackageInfo for absolute directory dir.
func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) {
d.mu.Lock()
defer d.mu.Unlock()
info, ok := d.dirs[dir]
if !ok {
return directoryPackageInfo{}, false
}
return *info, true
}
// Keys returns the keys currently present in d.
func (d *dirInfoCache) Keys() (keys []string) {
d.mu.Lock()
defer d.mu.Unlock()
for key := range d.dirs {
keys = append(keys, key)
}
return keys
}
func (d *dirInfoCache) CachePackageName(info directoryPackageInfo) (string, error) {
if loaded, err := info.reachedStatus(nameLoaded); loaded {
return info.packageName, err
}
if scanned, err := info.reachedStatus(directoryScanned); !scanned || err != nil {
return "", fmt.Errorf("cannot read package name, scan error: %v", err)
}
info.packageName, info.err = packageDirToName(info.dir)
info.status = nameLoaded
d.Store(info.dir, info)
return info.packageName, info.err
}
func (d *dirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) {
if reached, _ := info.reachedStatus(exportsLoaded); reached {
return info.packageName, info.exports, info.err
}
if reached, err := info.reachedStatus(nameLoaded); reached && err != nil {
return "", nil, err
}
info.packageName, info.exports, info.err = loadExportsFromFiles(ctx, env, info.dir, false)
if info.err == context.Canceled || info.err == context.DeadlineExceeded {
return info.packageName, info.exports, info.err
}
// The cache structure wants things to proceed linearly. We can skip a
// step here, but only if we succeed.
if info.status == nameLoaded || info.err == nil {
info.status = exportsLoaded
} else {
info.status = nameLoaded
}
d.Store(info.dir, info)
return info.packageName, info.exports, info.err
}