2019-05-15 10:24:49 -06:00
|
|
|
// Copyright 2019 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package cache
|
|
|
|
|
|
|
|
import (
|
2019-05-30 07:03:31 -06:00
|
|
|
"context"
|
2020-07-28 16:00:54 -06:00
|
|
|
"crypto/sha256"
|
2019-05-17 10:15:22 -06:00
|
|
|
"fmt"
|
2020-06-02 15:42:13 -06:00
|
|
|
"go/ast"
|
2019-05-17 08:51:19 -06:00
|
|
|
"go/token"
|
2020-06-02 15:42:13 -06:00
|
|
|
"go/types"
|
|
|
|
"html/template"
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
2020-02-03 15:07:45 -07:00
|
|
|
"reflect"
|
2020-06-02 15:42:13 -06:00
|
|
|
"sort"
|
2019-05-29 12:55:52 -06:00
|
|
|
"strconv"
|
2020-07-16 16:01:42 -06:00
|
|
|
"sync"
|
2019-05-29 12:55:52 -06:00
|
|
|
"sync/atomic"
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
"time"
|
2019-05-17 08:51:19 -06:00
|
|
|
|
2020-06-03 15:06:45 -06:00
|
|
|
"golang.org/x/tools/internal/event"
|
2020-06-25 23:34:55 -06:00
|
|
|
"golang.org/x/tools/internal/gocommand"
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
"golang.org/x/tools/internal/lsp/debug/tag"
|
2019-05-15 10:24:49 -06:00
|
|
|
"golang.org/x/tools/internal/lsp/source"
|
2019-05-30 07:03:31 -06:00
|
|
|
"golang.org/x/tools/internal/memoize"
|
2019-05-17 08:51:19 -06:00
|
|
|
"golang.org/x/tools/internal/span"
|
2019-05-15 10:24:49 -06:00
|
|
|
)
|
|
|
|
|
2020-02-28 08:30:03 -07:00
|
|
|
func New(ctx context.Context, options func(*source.Options)) *Cache {
|
2019-05-29 12:55:52 -06:00
|
|
|
index := atomic.AddInt64(&cacheIndex, 1)
|
2020-02-18 18:59:37 -07:00
|
|
|
c := &Cache{
|
2020-07-16 16:01:42 -06:00
|
|
|
id: strconv.FormatInt(index, 10),
|
|
|
|
fset: token.NewFileSet(),
|
|
|
|
options: options,
|
|
|
|
fileContent: map[span.URI]*fileHandle{},
|
2019-05-17 08:51:19 -06:00
|
|
|
}
|
2019-05-29 12:55:52 -06:00
|
|
|
return c
|
2019-05-15 10:24:49 -06:00
|
|
|
}
|
|
|
|
|
2020-02-18 18:59:37 -07:00
|
|
|
type Cache struct {
|
2019-10-10 18:48:16 -06:00
|
|
|
id string
|
|
|
|
fset *token.FileSet
|
|
|
|
options func(*source.Options)
|
2019-05-30 07:03:31 -06:00
|
|
|
|
|
|
|
store memoize.Store
|
|
|
|
|
2020-07-16 16:01:42 -06:00
|
|
|
fileMu sync.Mutex
|
|
|
|
fileContent map[span.URI]*fileHandle
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
type fileHandle struct {
|
2020-07-16 16:01:42 -06:00
|
|
|
modTime time.Time
|
|
|
|
uri span.URI
|
internal/memoize: switch from GC-driven to explicit deletion
The GC-based cache has given us a number of problems. First, memory
leaks driven by reference cycles: the Go runtime cannot collect cycles
involving finalizers, which prevents us from writing natural code in
Bind callbacks. If we screw it up, we get a mysterious leak that takes a
long time to track down. Second, the behavior is generally mysterious;
it's hard to predict how long a value lasts, and harder to tell if a
value being live is a bug. Third, we think that it may be interacting
poorly with the GC, resulting in unnecessary memory usage.
The structure of the values we put in the cache is not actually that
complicated -- there are only 5 significant types: parse, typecheck,
analyze, parse mod, and analyze mod. Managing them manually should not
be conceptually difficult, and in fact we already do most of the work
in (*snapshot).clone.
In this CL the cache adds the concept of "generations", which function
as reference counts on cache entries. Entries are still global and
shared across generations, but will be explicitly deleted once no
generations refer to them. The idea is that each snapshot is a new
generation, and can inherit entries from the previous snapshot or leave
them behind to be deleted.
One obvious risk of this scheme is that we'll leave dangling references
to values without actually inheriting them across generations. To
prevent that, getting a value requires passing in the generation at
which it's being read, and an error will be returned if that generation
is dead.
Change-Id: I4b30891efd7be4e10f2b84f4c067b0dee43dcf9c
Reviewed-on: https://go-review.googlesource.com/c/tools/+/242838
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
2020-07-24 15:17:13 -06:00
|
|
|
bytes []byte
|
|
|
|
hash string
|
|
|
|
err error
|
2019-05-15 10:24:49 -06:00
|
|
|
}
|
|
|
|
|
2020-07-15 23:15:38 -06:00
|
|
|
func (c *Cache) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) {
|
|
|
|
return c.getFile(ctx, uri)
|
|
|
|
}
|
|
|
|
|
2020-06-19 17:07:57 -06:00
|
|
|
func (c *Cache) getFile(ctx context.Context, uri span.URI) (*fileHandle, error) {
|
2020-07-16 16:01:42 -06:00
|
|
|
fi, statErr := os.Stat(uri.Filename())
|
|
|
|
if statErr != nil {
|
|
|
|
return &fileHandle{err: statErr}, nil
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
|
|
|
|
2020-07-16 16:01:42 -06:00
|
|
|
c.fileMu.Lock()
|
|
|
|
fh, ok := c.fileContent[uri]
|
|
|
|
c.fileMu.Unlock()
|
|
|
|
if ok && fh.modTime.Equal(fi.ModTime()) {
|
|
|
|
return fh, nil
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
2020-07-16 16:01:42 -06:00
|
|
|
|
|
|
|
select {
|
|
|
|
case ioLimit <- struct{}{}:
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil, ctx.Err()
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
2020-07-16 16:01:42 -06:00
|
|
|
defer func() { <-ioLimit }()
|
|
|
|
|
|
|
|
fh = readFile(ctx, uri, fi.ModTime())
|
|
|
|
c.fileMu.Lock()
|
|
|
|
c.fileContent[uri] = fh
|
|
|
|
c.fileMu.Unlock()
|
|
|
|
return fh, nil
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// ioLimit limits the number of parallel file reads per process.
|
|
|
|
var ioLimit = make(chan struct{}, 128)
|
|
|
|
|
2020-07-16 16:01:42 -06:00
|
|
|
func readFile(ctx context.Context, uri span.URI, modTime time.Time) *fileHandle {
|
|
|
|
ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename()))
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
_ = ctx
|
|
|
|
defer done()
|
|
|
|
|
|
|
|
data, err := ioutil.ReadFile(uri.Filename())
|
|
|
|
if err != nil {
|
2020-07-16 16:01:42 -06:00
|
|
|
return &fileHandle{
|
|
|
|
modTime: modTime,
|
|
|
|
err: err,
|
|
|
|
}
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
2019-05-30 07:03:31 -06:00
|
|
|
return &fileHandle{
|
2020-07-16 16:01:42 -06:00
|
|
|
modTime: modTime,
|
|
|
|
uri: uri,
|
|
|
|
bytes: data,
|
|
|
|
hash: hashContents(data),
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
2019-05-31 17:41:39 -06:00
|
|
|
}
|
|
|
|
|
2020-02-28 08:30:03 -07:00
|
|
|
func (c *Cache) NewSession(ctx context.Context) *Session {
|
2019-05-29 12:55:52 -06:00
|
|
|
index := atomic.AddInt64(&sessionIndex, 1)
|
2020-02-18 18:59:37 -07:00
|
|
|
s := &Session{
|
2020-06-25 23:34:55 -06:00
|
|
|
cache: c,
|
|
|
|
id: strconv.FormatInt(index, 10),
|
|
|
|
options: source.DefaultOptions(),
|
|
|
|
overlays: make(map[span.URI]*overlay),
|
|
|
|
gocmdRunner: &gocommand.Runner{},
|
2019-05-15 10:24:49 -06:00
|
|
|
}
|
2020-06-03 15:06:45 -06:00
|
|
|
event.Log(ctx, "New session", KeyCreateSession.Of(s))
|
2019-05-29 12:55:52 -06:00
|
|
|
return s
|
2019-05-15 10:24:49 -06:00
|
|
|
}
|
2019-05-17 08:51:19 -06:00
|
|
|
|
2020-02-18 18:59:37 -07:00
|
|
|
func (c *Cache) FileSet() *token.FileSet {
|
2019-05-17 08:51:19 -06:00
|
|
|
return c.fset
|
|
|
|
}
|
2019-05-17 10:15:22 -06:00
|
|
|
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
func (h *fileHandle) URI() span.URI {
|
|
|
|
return h.uri
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
|
|
|
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
func (h *fileHandle) Kind() source.FileKind {
|
|
|
|
return source.DetectLanguage("", h.uri.Filename())
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
|
|
|
|
2020-07-26 16:01:39 -06:00
|
|
|
func (h *fileHandle) Hash() string {
|
|
|
|
return h.hash
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
|
|
|
|
2020-07-26 16:01:39 -06:00
|
|
|
func (h *fileHandle) FileIdentity() source.FileIdentity {
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
return source.FileIdentity{
|
2020-07-26 16:01:39 -06:00
|
|
|
URI: h.uri,
|
|
|
|
Hash: h.hash,
|
|
|
|
Kind: h.Kind(),
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
internal/lsp: read files eagerly
We use file identities pervasively throughout gopls. Prior to this
change, the identity is the modification date of an unopened file, or
the hash of an opened file. That means that opening a file changes its
identity, which causes unnecessary churn in the cache.
Unfortunately, there isn't an easy way to fix this. Changing the
cache key to something else, such as the modification time, means that
we won't unify cache entries if a change is made and then undone. The
approach here is to read files eagerly in GetFile, so that we know their
hashes immediately. That resolves the churn, but means that we do a ton
of file IO at startup.
Incidental changes:
Remove the FileSystem interface; there was only one implementation and
it added a fair amount of cruft. We have many other places that assume
os.Stat and such work.
Add direct accessors to FileHandle for URI, Kind, and Version. Most uses
of (FileHandle).Identity were for stuff that we derive solely from the
URI, and this helped me disentangle them. It is a *ton* of churn,
though. I can revert it if you want.
Change-Id: Ia2133bc527f71daf81c9d674951726a232ca5bc9
Reviewed-on: https://go-review.googlesource.com/c/tools/+/237037
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2020-06-08 13:21:24 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *fileHandle) Read() ([]byte, error) {
|
|
|
|
return h.bytes, h.err
|
2019-05-30 07:03:31 -06:00
|
|
|
}
|
|
|
|
|
2019-05-17 10:15:22 -06:00
|
|
|
func hashContents(contents []byte) string {
|
2020-07-28 16:00:54 -06:00
|
|
|
return fmt.Sprintf("%x", sha256.Sum256(contents))
|
2019-05-17 10:15:22 -06:00
|
|
|
}
|
2019-05-29 12:55:52 -06:00
|
|
|
|
|
|
|
var cacheIndex, sessionIndex, viewIndex int64
|
|
|
|
|
2020-06-02 08:57:20 -06:00
|
|
|
func (c *Cache) ID() string { return c.id }
|
|
|
|
func (c *Cache) MemStats() map[reflect.Type]int { return c.store.Stats() }
|
2020-06-02 15:42:13 -06:00
|
|
|
|
|
|
|
type packageStat struct {
|
|
|
|
id packageID
|
|
|
|
mode source.ParseMode
|
|
|
|
file int64
|
|
|
|
ast int64
|
|
|
|
types int64
|
|
|
|
typesInfo int64
|
|
|
|
total int64
|
|
|
|
}
|
|
|
|
|
2020-06-18 16:42:34 -06:00
|
|
|
func (c *Cache) PackageStats(withNames bool) template.HTML {
|
2020-06-02 15:42:13 -06:00
|
|
|
var packageStats []packageStat
|
|
|
|
c.store.DebugOnlyIterate(func(k, v interface{}) {
|
|
|
|
switch k.(type) {
|
|
|
|
case packageHandleKey:
|
|
|
|
v := v.(*packageData)
|
2020-06-26 15:18:10 -06:00
|
|
|
if v.pkg == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
var typsCost, typInfoCost int64
|
|
|
|
if v.pkg.types != nil {
|
|
|
|
typsCost = typesCost(v.pkg.types.Scope())
|
|
|
|
}
|
|
|
|
if v.pkg.typesInfo != nil {
|
|
|
|
typInfoCost = typesInfoCost(v.pkg.typesInfo)
|
|
|
|
}
|
2020-06-02 15:42:13 -06:00
|
|
|
stat := packageStat{
|
2020-07-15 15:15:09 -06:00
|
|
|
id: v.pkg.m.id,
|
2020-06-02 15:42:13 -06:00
|
|
|
mode: v.pkg.mode,
|
2020-06-26 15:18:10 -06:00
|
|
|
types: typsCost,
|
|
|
|
typesInfo: typInfoCost,
|
2020-06-02 15:42:13 -06:00
|
|
|
}
|
|
|
|
for _, f := range v.pkg.compiledGoFiles {
|
2020-07-21 13:15:06 -06:00
|
|
|
stat.file += int64(len(f.Src))
|
|
|
|
stat.ast += astCost(f.File)
|
2020-06-02 15:42:13 -06:00
|
|
|
}
|
|
|
|
stat.total = stat.file + stat.ast + stat.types + stat.typesInfo
|
|
|
|
packageStats = append(packageStats, stat)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
var totalCost int64
|
|
|
|
for _, stat := range packageStats {
|
|
|
|
totalCost += stat.total
|
|
|
|
}
|
|
|
|
sort.Slice(packageStats, func(i, j int) bool {
|
|
|
|
return packageStats[i].total > packageStats[j].total
|
|
|
|
})
|
|
|
|
html := "<table><thead><td>Name</td><td>total = file + ast + types + types info</td></thead>\n"
|
|
|
|
human := func(n int64) string {
|
|
|
|
return fmt.Sprintf("%.2f", float64(n)/(1024*1024))
|
|
|
|
}
|
|
|
|
var printedCost int64
|
|
|
|
for _, stat := range packageStats {
|
2020-06-18 16:42:34 -06:00
|
|
|
name := stat.id
|
|
|
|
if !withNames {
|
|
|
|
name = "-"
|
|
|
|
}
|
|
|
|
html += fmt.Sprintf("<tr><td>%v (%v)</td><td>%v = %v + %v + %v + %v</td></tr>\n", name, stat.mode,
|
2020-06-02 15:42:13 -06:00
|
|
|
human(stat.total), human(stat.file), human(stat.ast), human(stat.types), human(stat.typesInfo))
|
|
|
|
printedCost += stat.total
|
|
|
|
if float64(printedCost) > float64(totalCost)*.9 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
html += "</table>\n"
|
|
|
|
return template.HTML(html)
|
|
|
|
}
|
|
|
|
|
|
|
|
func astCost(f *ast.File) int64 {
|
2020-07-15 11:13:59 -06:00
|
|
|
if f == nil {
|
|
|
|
return 0
|
|
|
|
}
|
2020-06-02 15:42:13 -06:00
|
|
|
var count int64
|
|
|
|
ast.Inspect(f, func(n ast.Node) bool {
|
|
|
|
count += 32 // nodes are pretty small.
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
|
|
|
func typesCost(scope *types.Scope) int64 {
|
|
|
|
cost := 64 + int64(scope.Len())*128 // types.object looks pretty big
|
|
|
|
for i := 0; i < scope.NumChildren(); i++ {
|
|
|
|
cost += typesCost(scope.Child(i))
|
|
|
|
}
|
|
|
|
return cost
|
|
|
|
}
|
|
|
|
|
|
|
|
func typesInfoCost(info *types.Info) int64 {
|
|
|
|
// Most of these refer to existing objects, with the exception of InitOrder, Selections, and Types.
|
|
|
|
cost := 24*len(info.Defs) +
|
|
|
|
32*len(info.Implicits) +
|
|
|
|
256*len(info.InitOrder) + // these are big, but there aren't many of them.
|
|
|
|
32*len(info.Scopes) +
|
|
|
|
128*len(info.Selections) + // wild guess
|
|
|
|
128*len(info.Types) + // wild guess
|
|
|
|
32*len(info.Uses)
|
|
|
|
return int64(cost)
|
|
|
|
}
|