1
0
mirror of https://github.com/golang/go synced 2024-11-05 18:56:10 -07:00
go/internal/lsp/cache/cache.go
Heschi Kreinick c1903db4db internal/memoize: switch from GC-driven to explicit deletion
The GC-based cache has given us a number of problems. First, memory
leaks driven by reference cycles: the Go runtime cannot collect cycles
involving finalizers, which prevents us from writing natural code in
Bind callbacks. If we screw it up, we get a mysterious leak that takes a
long time to track down. Second, the behavior is generally mysterious;
it's hard to predict how long a value lasts, and harder to tell if a
value being live is a bug. Third, we think that it may be interacting
poorly with the GC, resulting in unnecessary memory usage.

The structure of the values we put in the cache is not actually that
complicated -- there are only 5 significant types: parse, typecheck,
analyze, parse mod, and analyze mod. Managing them manually should not
be conceptually difficult, and in fact we already do most of the work
in (*snapshot).clone.

In this CL the cache adds the concept of "generations", which function
as reference counts on cache entries. Entries are still global and
shared across generations, but will be explicitly deleted once no
generations refer to them. The idea is that each snapshot is a new
generation, and can inherit entries from the previous snapshot or leave
them behind to be deleted.

One obvious risk of this scheme is that we'll leave dangling references
to values without actually inheriting them across generations. To
prevent that, getting a value requires passing in the generation at
which it's being read, and an error will be returned if that generation
is dead.

Change-Id: I4b30891efd7be4e10f2b84f4c067b0dee43dcf9c
Reviewed-on: https://go-review.googlesource.com/c/tools/+/242838
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
2020-08-10 19:02:17 +00:00

265 lines
6.0 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cache
import (
"context"
"crypto/sha256"
"fmt"
"go/ast"
"go/token"
"go/types"
"html/template"
"io/ioutil"
"os"
"reflect"
"sort"
"strconv"
"sync"
"sync/atomic"
"time"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/gocommand"
"golang.org/x/tools/internal/lsp/debug/tag"
"golang.org/x/tools/internal/lsp/source"
"golang.org/x/tools/internal/memoize"
"golang.org/x/tools/internal/span"
)
func New(ctx context.Context, options func(*source.Options)) *Cache {
index := atomic.AddInt64(&cacheIndex, 1)
c := &Cache{
id: strconv.FormatInt(index, 10),
fset: token.NewFileSet(),
options: options,
fileContent: map[span.URI]*fileHandle{},
}
return c
}
type Cache struct {
id string
fset *token.FileSet
options func(*source.Options)
store memoize.Store
fileMu sync.Mutex
fileContent map[span.URI]*fileHandle
}
type fileHandle struct {
modTime time.Time
uri span.URI
bytes []byte
hash string
err error
}
func (c *Cache) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) {
return c.getFile(ctx, uri)
}
func (c *Cache) getFile(ctx context.Context, uri span.URI) (*fileHandle, error) {
fi, statErr := os.Stat(uri.Filename())
if statErr != nil {
return &fileHandle{err: statErr}, nil
}
c.fileMu.Lock()
fh, ok := c.fileContent[uri]
c.fileMu.Unlock()
if ok && fh.modTime.Equal(fi.ModTime()) {
return fh, nil
}
select {
case ioLimit <- struct{}{}:
case <-ctx.Done():
return nil, ctx.Err()
}
defer func() { <-ioLimit }()
fh = readFile(ctx, uri, fi.ModTime())
c.fileMu.Lock()
c.fileContent[uri] = fh
c.fileMu.Unlock()
return fh, nil
}
// ioLimit limits the number of parallel file reads per process.
var ioLimit = make(chan struct{}, 128)
func readFile(ctx context.Context, uri span.URI, modTime time.Time) *fileHandle {
ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename()))
_ = ctx
defer done()
data, err := ioutil.ReadFile(uri.Filename())
if err != nil {
return &fileHandle{
modTime: modTime,
err: err,
}
}
return &fileHandle{
modTime: modTime,
uri: uri,
bytes: data,
hash: hashContents(data),
}
}
func (c *Cache) NewSession(ctx context.Context) *Session {
index := atomic.AddInt64(&sessionIndex, 1)
s := &Session{
cache: c,
id: strconv.FormatInt(index, 10),
options: source.DefaultOptions(),
overlays: make(map[span.URI]*overlay),
gocmdRunner: &gocommand.Runner{},
}
event.Log(ctx, "New session", KeyCreateSession.Of(s))
return s
}
func (c *Cache) FileSet() *token.FileSet {
return c.fset
}
func (h *fileHandle) URI() span.URI {
return h.uri
}
func (h *fileHandle) Kind() source.FileKind {
return source.DetectLanguage("", h.uri.Filename())
}
func (h *fileHandle) Hash() string {
return h.hash
}
func (h *fileHandle) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
Hash: h.hash,
Kind: h.Kind(),
}
}
func (h *fileHandle) Read() ([]byte, error) {
return h.bytes, h.err
}
func hashContents(contents []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(contents))
}
var cacheIndex, sessionIndex, viewIndex int64
func (c *Cache) ID() string { return c.id }
func (c *Cache) MemStats() map[reflect.Type]int { return c.store.Stats() }
type packageStat struct {
id packageID
mode source.ParseMode
file int64
ast int64
types int64
typesInfo int64
total int64
}
func (c *Cache) PackageStats(withNames bool) template.HTML {
var packageStats []packageStat
c.store.DebugOnlyIterate(func(k, v interface{}) {
switch k.(type) {
case packageHandleKey:
v := v.(*packageData)
if v.pkg == nil {
break
}
var typsCost, typInfoCost int64
if v.pkg.types != nil {
typsCost = typesCost(v.pkg.types.Scope())
}
if v.pkg.typesInfo != nil {
typInfoCost = typesInfoCost(v.pkg.typesInfo)
}
stat := packageStat{
id: v.pkg.m.id,
mode: v.pkg.mode,
types: typsCost,
typesInfo: typInfoCost,
}
for _, f := range v.pkg.compiledGoFiles {
stat.file += int64(len(f.Src))
stat.ast += astCost(f.File)
}
stat.total = stat.file + stat.ast + stat.types + stat.typesInfo
packageStats = append(packageStats, stat)
}
})
var totalCost int64
for _, stat := range packageStats {
totalCost += stat.total
}
sort.Slice(packageStats, func(i, j int) bool {
return packageStats[i].total > packageStats[j].total
})
html := "<table><thead><td>Name</td><td>total = file + ast + types + types info</td></thead>\n"
human := func(n int64) string {
return fmt.Sprintf("%.2f", float64(n)/(1024*1024))
}
var printedCost int64
for _, stat := range packageStats {
name := stat.id
if !withNames {
name = "-"
}
html += fmt.Sprintf("<tr><td>%v (%v)</td><td>%v = %v + %v + %v + %v</td></tr>\n", name, stat.mode,
human(stat.total), human(stat.file), human(stat.ast), human(stat.types), human(stat.typesInfo))
printedCost += stat.total
if float64(printedCost) > float64(totalCost)*.9 {
break
}
}
html += "</table>\n"
return template.HTML(html)
}
func astCost(f *ast.File) int64 {
if f == nil {
return 0
}
var count int64
ast.Inspect(f, func(n ast.Node) bool {
count += 32 // nodes are pretty small.
return true
})
return count
}
func typesCost(scope *types.Scope) int64 {
cost := 64 + int64(scope.Len())*128 // types.object looks pretty big
for i := 0; i < scope.NumChildren(); i++ {
cost += typesCost(scope.Child(i))
}
return cost
}
func typesInfoCost(info *types.Info) int64 {
// Most of these refer to existing objects, with the exception of InitOrder, Selections, and Types.
cost := 24*len(info.Defs) +
32*len(info.Implicits) +
256*len(info.InitOrder) + // these are big, but there aren't many of them.
32*len(info.Scopes) +
128*len(info.Selections) + // wild guess
128*len(info.Types) + // wild guess
32*len(info.Uses)
return int64(cost)
}