diff --git a/src/cmd/compile/internal/types2/api_test.go b/src/cmd/compile/internal/types2/api_test.go index e6de955a6ee..f5526bb25a3 100644 --- a/src/cmd/compile/internal/types2/api_test.go +++ b/src/cmd/compile/internal/types2/api_test.go @@ -38,7 +38,7 @@ func pkgFor(path, source string, info *Info) (*Package, error) { return conf.Check(f.PkgName.Value, []*syntax.File{f}, info) } -func mustTypecheck(t *testing.T, path, source string, info *Info) string { +func mustTypecheck(t testing.TB, path, source string, info *Info) string { pkg, err := pkgFor(path, source, info) if err != nil { name := path diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index b6f81aa8a5f..008d3698b7e 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -522,8 +522,8 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *Named assert(rhs != nil) named.fromRHS = rhs - // If the underlying was not set while type-checking the right-hand side, it - // is invalid and an error should have been reported elsewhere. + // If the underlying type was not set while type-checking the right-hand + // side, it is invalid and an error should have been reported elsewhere. if named.underlying == nil { named.underlying = Typ[Invalid] } diff --git a/src/cmd/compile/internal/types2/instantiate.go b/src/cmd/compile/internal/types2/instantiate.go index bb90ab37368..44ed0319c82 100644 --- a/src/cmd/compile/internal/types2/instantiate.go +++ b/src/cmd/compile/internal/types2/instantiate.go @@ -79,9 +79,6 @@ func (check *Checker) instance(pos syntax.Pos, orig Type, targs []Type, ctxt *Co tname := NewTypeName(pos, orig.obj.pkg, orig.obj.name, nil) named := check.newNamed(tname, orig, nil, nil) // underlying, tparams, and methods are set when named is resolved named.targs = newTypeList(targs) - named.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - return expandNamed(ctxt, n, pos) - } res = named case *Signature: diff --git a/src/cmd/compile/internal/types2/named.go b/src/cmd/compile/internal/types2/named.go index 849398a6f43..0a2b2aa6b10 100644 --- a/src/cmd/compile/internal/types2/named.go +++ b/src/cmd/compile/internal/types2/named.go @@ -5,32 +5,116 @@ package types2 import ( - "cmd/compile/internal/syntax" "sync" + "sync/atomic" ) +// Type-checking Named types is subtle, because they may be recursively +// defined, and because their full details may be spread across multiple +// declarations (via methods). For this reason they are type-checked lazily, +// to avoid information being accessed before it is complete. +// +// Conceptually, it is helpful to think of named types as having two distinct +// sets of information: +// - "LHS" information, defining their identity: Obj() and TypeArgs() +// - "RHS" information, defining their details: TypeParams(), Underlying(), +// and methods. +// +// In this taxonomy, LHS information is available immediately, but RHS +// information is lazy. Specifically, a named type N may be constructed in any +// of the following ways: +// 1. type-checked from the source +// 2. loaded eagerly from export data +// 3. loaded lazily from export data (when using unified IR) +// 4. instantiated from a generic type +// +// In cases 1, 3, and 4, it is possible that the underlying type or methods of +// N may not be immediately available. +// - During type-checking, we allocate N before type-checking its underlying +// type or methods, so that we may resolve recursive references. +// - When loading from export data, we may load its methods and underlying +// type lazily using a provided load function. +// - After instantiating, we lazily expand the underlying type and methods +// (note that instances may be created while still in the process of +// type-checking the original type declaration). +// +// In cases 3 and 4 this lazy construction may also occur concurrently, due to +// concurrent use of the type checker API (after type checking or importing has +// finished). It is critical that we keep track of state, so that Named types +// are constructed exactly once and so that we do not access their details too +// soon. +// +// We achieve this by tracking state with an atomic state variable, and +// guarding potentially concurrent calculations with a mutex. At any point in +// time this state variable determines which data on N may be accessed. As +// state monotonically progresses, any data available at state M may be +// accessed without acquiring the mutex at state N, provided N >= M. +// +// GLOSSARY: Here are a few terms used in this file to describe Named types: +// - We say that a Named type is "instantiated" if it has been constructed by +// instantiating a generic named type with type arguments. +// - We say that a Named type is "declared" if it corresponds to a type +// declaration in the source. Instantiated named types correspond to a type +// instantiation in the source, not a declaration. But their Origin type is +// a declared type. +// - We say that a Named type is "resolved" if its RHS information has been +// loaded or fully type-checked. For Named types constructed from export +// data, this may involve invoking a loader function to extract information +// from export data. For instantiated named types this involves reading +// information from their origin. +// - We say that a Named type is "expanded" if it is an instantiated type and +// type parameters in its underlying type and methods have been substituted +// with the type arguments from the instantiation. A type may be partially +// expanded if some but not all of these details have been substituted. +// Similarly, we refer to these individual details (underlying type or +// method) as being "expanded". +// - When all information is known for a named type, we say it is "complete". +// +// Some invariants to keep in mind: each declared Named type has a single +// corresponding object, and that object's type is the (possibly generic) Named +// type. Declared Named types are identical if and only if their pointers are +// identical. On the other hand, multiple instantiated Named types may be +// identical even though their pointers are not identical. One has to use +// Identical to compare them. For instantiated named types, their obj is a +// synthetic placeholder that records their position of the corresponding +// instantiation in the source (if they were constructed during type checking). + // A Named represents a named (defined) type. type Named struct { - check *Checker - obj *TypeName // corresponding declared object for declared types; placeholder for instantiated types - orig *Named // original, uninstantiated type - fromRHS Type // type (on RHS of declaration) this *Named type is derived from (for cycle reporting) + check *Checker // non-nil during type-checking; nil otherwise + obj *TypeName // corresponding declared object for declared types; see above for instantiated types + orig *Named // origin type for instantiated types, this type for declared types + targs *TypeList // type arguments (after instantiation), or nil + + // fromRHS holds the type (on RHS of declaration) this *Named type is derived + // from (for cycle reporting). Only used by validType, and therefore does not + // require synchronization. + fromRHS Type + + mu sync.Mutex // guards all fields below + state_ uint32 // the current state of this type; must only be accessed atomically underlying Type // possibly a *Named during setup; never a *Named once set up completely tparams *TypeParamList // type parameters, or nil - targs *TypeList // type arguments (after instantiation), or nil // methods declared for this type (not the method set of this type). // Signatures are type-checked lazily. // For non-instantiated types, this is a fully populated list of methods. For - // instantiated types, this is a 'lazy' list, and methods are instantiated - // when they are first accessed. + // instantiated types, this is a 'lazy' list, and methods are individually + // expanded when they are first accessed. methods *methodList - // resolver may be provided to lazily resolve type parameters, underlying, and methods. - resolver func(*Context, *Named) (tparams *TypeParamList, underlying Type, methods *methodList) - once sync.Once // ensures that tparams, underlying, and methods are resolved before accessing + // loader may be provided to lazily load type parameters, underlying, and methods. + loader func(*Named) (tparams []*TypeParam, underlying Type, methods []*Func) } +// namedState represents the possible states that a named type may assume. +type namedState uint32 + +const ( + unresolved namedState = iota // tparams, underlying type and methods might be unavailable + resolved +) + // NewNamed returns a new named type for the given type name, underlying type, and associated methods. // If the given type name obj doesn't have a type yet, its type is set to the returned named type. // The underlying type must not be a *Named. @@ -41,24 +125,74 @@ func NewNamed(obj *TypeName, underlying Type, methods []*Func) *Named { return (*Checker)(nil).newNamed(obj, nil, underlying, newMethodList(methods)) } -func (t *Named) resolve(ctxt *Context) *Named { - if t.resolver == nil { - return t +// resolve resolves the type parameters, methods, and underlying type of n. +// This information may be loaded from a provided loader function, or computed +// from an origin type (in the case of instances). +// +// After resolution, the type parameters, methods, and underlying type of n are +// accessible; but if n is an instantiated type, its methods may still be +// unexpanded. +func (n *Named) resolve(ctxt *Context) *Named { + if n.state() >= resolved { // avoid locking below + return n } - t.once.Do(func() { - // TODO(mdempsky): Since we're passing t to the resolver anyway - // (necessary because types2 expects the receiver type for methods - // on defined interface types to be the Named rather than the - // underlying Interface), maybe it should just handle calling - // SetTypeParams, SetUnderlying, and AddMethod instead? Those - // methods would need to support reentrant calls though. It would - // also make the API more future-proof towards further extensions - // (like SetTypeParams). - t.tparams, t.underlying, t.methods = t.resolver(ctxt, t) - t.fromRHS = t.underlying // for cycle detection - }) - return t + // TODO(rfindley): if n.check is non-nil we can avoid locking here, since + // type-checking is not concurrent. Evaluate if this is worth doing. + n.mu.Lock() + defer n.mu.Unlock() + + if n.state() >= resolved { + return n + } + + if n.TypeArgs().Len() > 0 { + assert(n.underlying == nil) // n is an unresolved instance + assert(n.loader == nil) // instances are created by instantiation, in which case n.loader is nil + n.orig.resolve(ctxt) + + underlying := n.expandUnderlying(ctxt) + + n.tparams = n.orig.tparams + n.underlying = underlying + n.fromRHS = n.orig.fromRHS // for cycle detection + n.methods = newLazyMethodList(n.orig.methods.Len()) + n.setState(resolved) + return n + } + + // TODO(mdempsky): Since we're passing n to the loader anyway + // (necessary because types2 expects the receiver type for methods + // on defined interface types to be the Named rather than the + // underlying Interface), maybe it should just handle calling + // SetTypeParams, SetUnderlying, and AddMethod instead? Those + // methods would need to support reentrant calls though. It would + // also make the API more future-proof towards further extensions. + if n.loader != nil { + assert(n.underlying == nil) + + tparams, underlying, methods := n.loader(n) + + n.tparams = bindTParams(tparams) + n.underlying = underlying + n.fromRHS = underlying // for cycle detection + n.methods = newMethodList(methods) + n.loader = nil + } + + n.setState(resolved) + return n +} + +// state atomically accesses the current state of the receiver. +func (n *Named) state() namedState { + return namedState(atomic.LoadUint32(&n.state_)) +} + +// setState atomically stores the given state for n. +// Must only be called while holding n.mu. +func (n *Named) setState(state namedState) { + atomic.StoreUint32(&n.state_, uint32(state)) } // newNamed is like NewNamed but with a *Checker receiver and additional orig argument. @@ -80,16 +214,16 @@ func (check *Checker) newNamed(obj *TypeName, orig *Named, underlying Type, meth func (t *Named) cleanup() { assert(t.orig.orig == t.orig) // Ensure that every defined type created in the course of type-checking has - // either non-*Named underlying, or is unresolved. + // either non-*Named underlying type, or is unexpanded. // - // This guarantees that we don't leak any types whose underlying is *Named, - // because any unresolved instances will lazily compute their underlying by - // substituting in the underlying of their origin. The origin must have - // either been imported or type-checked and expanded here, and in either case - // its underlying will be fully expanded. + // This guarantees that we don't leak any types whose underlying type is + // *Named, because any unexpanded instances will lazily compute their + // underlying type by substituting in the underlying type of their origin. + // The origin must have either been imported or type-checked and expanded + // here, and in either case its underlying type will be fully expanded. switch t.underlying.(type) { case nil: - if t.resolver == nil { + if t.TypeArgs().Len() == 0 { panic("nil underlying") } case *Named: @@ -134,12 +268,13 @@ func (t *Named) NumMethods() int { return t.resolve(nil).methods.Len() } func (t *Named) Method(i int) *Func { t.resolve(nil) return t.methods.At(i, func() *Func { - return t.instantiateMethod(i) + return t.expandMethod(i) }) } -// instantiateMethod instantiates the i'th method for an instantiated receiver. -func (t *Named) instantiateMethod(i int) *Func { +// expandMethod substitutes type arguments in the i'th method for an +// instantiated receiver. +func (t *Named) expandMethod(i int) *Func { assert(t.TypeArgs().Len() > 0) // t must be an instance // t.orig.methods is not lazy. origm is the method instantiated with its @@ -275,7 +410,7 @@ func (n0 *Named) under() Type { check := n0.check n := n0 - seen := make(map[*Named]int) // types that need their underlying resolved + seen := make(map[*Named]int) // types that need their underlying type resolved var path []Object // objects encountered, for cycle reporting loop: @@ -352,66 +487,64 @@ func (check *Checker) bestContext(ctxt *Context) *Context { return NewContext() } -// expandNamed ensures that the underlying type of n is instantiated. -// The underlying type will be Typ[Invalid] if there was an error. -func expandNamed(ctxt *Context, n *Named, instPos syntax.Pos) (tparams *TypeParamList, underlying Type, methods *methodList) { +// expandUnderlying substitutes type arguments in the underlying type n.orig, +// returning the result. Returns Typ[Invalid] if there was an error. +func (n *Named) expandUnderlying(ctxt *Context) Type { check := n.check if check != nil && check.conf.Trace { - check.trace(instPos, "-- expandNamed %s", n) + check.trace(n.obj.pos, "-- Named.expandUnderlying %s", n) check.indent++ defer func() { check.indent-- - check.trace(instPos, "=> %s (tparams = %s, under = %s)", n, tparams.list(), underlying) + check.trace(n.obj.pos, "=> %s (tparams = %s, under = %s)", n, n.tparams.list(), n.underlying) }() } - n.orig.resolve(ctxt) assert(n.orig.underlying != nil) if _, unexpanded := n.orig.underlying.(*Named); unexpanded { - // We should only get an unexpanded underlying here during type checking + // We should only get a Named underlying type here during type checking // (for example, in recursive type declarations). assert(check != nil) } - // Mismatching arg and tparam length may be checked elsewhere. - if n.orig.tparams.Len() == n.targs.Len() { - // We must always have a context, to avoid infinite recursion. - ctxt = check.bestContext(ctxt) - h := ctxt.instanceHash(n.orig, n.targs.list()) - // ensure that an instance is recorded for h to avoid infinite recursion. - ctxt.update(h, n.orig, n.TypeArgs().list(), n) - - smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) - underlying = n.check.subst(instPos, n.orig.underlying, smap, ctxt) - // If the underlying of n is an interface, we need to set the receiver of - // its methods accurately -- we set the receiver of interface methods on - // the RHS of a type declaration to the defined type. - if iface, _ := underlying.(*Interface); iface != nil { - if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { - // If the underlying doesn't actually use type parameters, it's possible - // that it wasn't substituted. In this case we need to create a new - // *Interface before modifying receivers. - if iface == n.orig.underlying { - old := iface - iface = check.newInterface() - iface.embeddeds = old.embeddeds - iface.complete = old.complete - iface.implicit = old.implicit // should be false but be conservative - underlying = iface - } - iface.methods = methods - } - } - } else { - underlying = Typ[Invalid] + if n.orig.tparams.Len() != n.targs.Len() { + // Mismatching arg and tparam length may be checked elsewhere. + return Typ[Invalid] } - return n.orig.tparams, underlying, newLazyMethodList(n.orig.methods.Len()) + // We must always have a context, to avoid infinite recursion. + ctxt = check.bestContext(ctxt) + h := ctxt.instanceHash(n.orig, n.targs.list()) + // ensure that an instance is recorded for h to avoid infinite recursion. + ctxt.update(h, n.orig, n.TypeArgs().list(), n) + + smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) + underlying := n.check.subst(n.obj.pos, n.orig.underlying, smap, ctxt) + // If the underlying type of n is an interface, we need to set the receiver + // of its methods accurately -- we set the receiver of interface methods on + // the RHS of a type declaration to the defined type. + if iface, _ := underlying.(*Interface); iface != nil { + if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { + // If the underlying type doesn't actually use type parameters, it's + // possible that it wasn't substituted. In this case we need to create + // a new *Interface before modifying receivers. + if iface == n.orig.underlying { + old := iface + iface = check.newInterface() + iface.embeddeds = old.embeddeds + iface.complete = old.complete + iface.implicit = old.implicit // should be false but be conservative + underlying = iface + } + iface.methods = methods + } + } + return underlying } -// safeUnderlying returns the underlying of typ without expanding instances, to -// avoid infinite recursion. +// safeUnderlying returns the underlying type of typ without expanding +// instances, to avoid infinite recursion. // // TODO(rfindley): eliminate this function or give it a better name. func safeUnderlying(typ Type) Type { diff --git a/src/cmd/compile/internal/types2/named_test.go b/src/cmd/compile/internal/types2/named_test.go new file mode 100644 index 00000000000..14a982048a2 --- /dev/null +++ b/src/cmd/compile/internal/types2/named_test.go @@ -0,0 +1,75 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types2_test + +import ( + "testing" + + . "cmd/compile/internal/types2" +) + +func BenchmarkNamed(b *testing.B) { + const src = ` +package p + +type T struct { + P int +} + +func (T) M(int) {} +func (T) N() (i int) { return } + +type G[P any] struct { + F P +} + +func (G[P]) M(P) {} +func (G[P]) N() (p P) { return } + +type Inst = G[int] + ` + pkg, err := pkgFor("p", src, nil) + if err != nil { + b.Fatal(err) + } + + var ( + T = pkg.Scope().Lookup("T").Type() + G = pkg.Scope().Lookup("G").Type() + SrcInst = pkg.Scope().Lookup("Inst").Type() + UserInst = mustInstantiate(b, G, Typ[Int]) + ) + + tests := []struct { + name string + typ Type + }{ + {"nongeneric", T}, + {"generic", G}, + {"src instance", SrcInst}, + {"user instance", UserInst}, + } + + b.Run("Underlying", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = test.typ.Underlying() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = test.typ.Underlying() + } + }) + } + }) +} + +func mustInstantiate(tb testing.TB, orig Type, targs ...Type) Type { + inst, err := Instantiate(nil, orig, targs, true) + if err != nil { + tb.Fatal(err) + } + return inst +} diff --git a/src/cmd/compile/internal/types2/object.go b/src/cmd/compile/internal/types2/object.go index 75f7ea5b12a..df080f071c3 100644 --- a/src/cmd/compile/internal/types2/object.go +++ b/src/cmd/compile/internal/types2/object.go @@ -279,19 +279,7 @@ func NewTypeName(pos syntax.Pos, pkg *Package, name string, typ Type) *TypeName // lazily calls resolve to finish constructing the Named object. func NewTypeNameLazy(pos syntax.Pos, pkg *Package, name string, load func(named *Named) (tparams []*TypeParam, underlying Type, methods []*Func)) *TypeName { obj := NewTypeName(pos, pkg, name, nil) - - resolve := func(_ *Context, t *Named) (*TypeParamList, Type, *methodList) { - tparams, underlying, methods := load(t) - - switch underlying.(type) { - case nil, *Named: - panic(fmt.Sprintf("invalid underlying type %T", t.underlying)) - } - - return bindTParams(tparams), underlying, newMethodList(methods) - } - - NewNamed(obj, nil, nil).resolver = resolve + NewNamed(obj, nil, nil).loader = load return obj } diff --git a/src/cmd/compile/internal/types2/typexpr.go b/src/cmd/compile/internal/types2/typexpr.go index 1f8b40dba69..020653332d1 100644 --- a/src/cmd/compile/internal/types2/typexpr.go +++ b/src/cmd/compile/internal/types2/typexpr.go @@ -432,57 +432,20 @@ func (check *Checker) instantiatedType(x syntax.Expr, xlist []syntax.Expr, def * return Typ[Invalid] } - // enableTypeTypeInference controls whether to infer missing type arguments - // using constraint type inference. See issue #51527. - const enableTypeTypeInference = false - // create the instance ctxt := check.bestContext(nil) - h := ctxt.instanceHash(orig, targs) - // targs may be incomplete, and require inference. In any case we should de-duplicate. - inst, _ := ctxt.lookup(h, orig, targs).(*Named) - // If inst is non-nil, we can't just return here. Inst may have been - // constructed via recursive substitution, in which case we wouldn't do the - // validation below. Ensure that the validation (and resulting errors) runs - // for each instantiated type in the source. - if inst == nil { - // x may be a selector for an imported type; use its start pos rather than x.Pos(). - tname := NewTypeName(syntax.StartPos(x), orig.obj.pkg, orig.obj.name, nil) - inst = check.newNamed(tname, orig, nil, nil) // underlying, methods and tparams are set when named is resolved - inst.targs = newTypeList(targs) - inst = ctxt.update(h, orig, targs, inst).(*Named) - } + inst := check.instance(x.Pos(), orig, targs, ctxt).(*Named) def.setUnderlying(inst) - inst.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - tparams := n.orig.TypeParams().list() - - targs := n.targs.list() - if enableTypeTypeInference && len(targs) < len(tparams) { - // If inference fails, len(inferred) will be 0, and inst.underlying will - // be set to Typ[Invalid] in expandNamed. - inferred := check.infer(x.Pos(), tparams, targs, nil, nil) - if len(inferred) > len(targs) { - n.targs = newTypeList(inferred) - } - } - - return expandNamed(ctxt, n, x.Pos()) - } - // orig.tparams may not be set up, so we need to do expansion later. check.later(func() { // This is an instance from the source, not from recursive substitution, // and so it must be resolved during type-checking so that we can report // errors. - inst.resolve(ctxt) - // Since check is non-nil, we can still mutate inst. Unpinning the resolver - // frees some memory. - inst.resolver = nil check.recordInstance(x, inst.TypeArgs().list(), inst) - if check.validateTArgLen(x.Pos(), inst.tparams.Len(), inst.targs.Len()) { - if i, err := check.verify(x.Pos(), inst.tparams.list(), inst.targs.list()); err != nil { + if check.validateTArgLen(x.Pos(), inst.TypeParams().Len(), inst.targs.Len()) { + if i, err := check.verify(x.Pos(), inst.TypeParams().list(), inst.targs.list()); err != nil { // best position for error reporting pos := x.Pos() if i < len(xlist) { @@ -490,10 +453,13 @@ func (check *Checker) instantiatedType(x syntax.Expr, xlist []syntax.Expr, def * } check.softErrorf(pos, "%s", err) } else { - check.mono.recordInstance(check.pkg, x.Pos(), inst.tparams.list(), inst.targs.list(), xlist) + check.mono.recordInstance(check.pkg, x.Pos(), inst.TypeParams().list(), inst.targs.list(), xlist) } } + // TODO(rfindley): remove this call: we don't need to call validType here, + // as cycles can only occur for types used inside a Named type declaration, + // and so it suffices to call validType from declared types. check.validType(inst) }).describef(x, "resolve instance %s", inst) diff --git a/src/go/types/api_test.go b/src/go/types/api_test.go index eb17f9280dd..db2ace5feb1 100644 --- a/src/go/types/api_test.go +++ b/src/go/types/api_test.go @@ -43,7 +43,7 @@ func pkgForMode(path, source string, info *Info, mode parser.Mode) (*Package, er return conf.Check(f.Name.Name, fset, []*ast.File{f}, info) } -func mustTypecheck(t *testing.T, path, source string, info *Info) string { +func mustTypecheck(t testing.TB, path, source string, info *Info) string { pkg, err := pkgFor(path, source, info) if err != nil { name := path diff --git a/src/go/types/decl.go b/src/go/types/decl.go index 123d296791b..61c9696948f 100644 --- a/src/go/types/decl.go +++ b/src/go/types/decl.go @@ -579,8 +579,8 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *ast.TypeSpec, def *Named) { assert(rhs != nil) named.fromRHS = rhs - // If the underlying was not set while type-checking the right-hand side, it - // is invalid and an error should have been reported elsewhere. + // If the underlying type was not set while type-checking the right-hand + // side, it is invalid and an error should have been reported elsewhere. if named.underlying == nil { named.underlying = Typ[Invalid] } diff --git a/src/go/types/instantiate.go b/src/go/types/instantiate.go index 964a4f907ca..8be0eab4076 100644 --- a/src/go/types/instantiate.go +++ b/src/go/types/instantiate.go @@ -79,9 +79,6 @@ func (check *Checker) instance(pos token.Pos, orig Type, targs []Type, ctxt *Con tname := NewTypeName(pos, orig.obj.pkg, orig.obj.name, nil) named := check.newNamed(tname, orig, nil, nil) // underlying, tparams, and methods are set when named is resolved named.targs = newTypeList(targs) - named.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - return expandNamed(ctxt, n, pos) - } res = named case *Signature: diff --git a/src/go/types/named.go b/src/go/types/named.go index a82679eb100..bfb4a11da72 100644 --- a/src/go/types/named.go +++ b/src/go/types/named.go @@ -5,32 +5,116 @@ package types import ( - "go/token" "sync" + "sync/atomic" ) +// Type-checking Named types is subtle, because they may be recursively +// defined, and because their full details may be spread across multiple +// declarations (via methods). For this reason they are type-checked lazily, +// to avoid information being accessed before it is complete. +// +// Conceptually, it is helpful to think of named types as having two distinct +// sets of information: +// - "LHS" information, defining their identity: Obj() and TypeArgs() +// - "RHS" information, defining their details: TypeParams(), Underlying(), +// and methods. +// +// In this taxonomy, LHS information is available immediately, but RHS +// information is lazy. Specifically, a named type N may be constructed in any +// of the following ways: +// 1. type-checked from the source +// 2. loaded eagerly from export data +// 3. loaded lazily from export data (when using unified IR) +// 4. instantiated from a generic type +// +// In cases 1, 3, and 4, it is possible that the underlying type or methods of +// N may not be immediately available. +// - During type-checking, we allocate N before type-checking its underlying +// type or methods, so that we may resolve recursive references. +// - When loading from export data, we may load its methods and underlying +// type lazily using a provided load function. +// - After instantiating, we lazily expand the underlying type and methods +// (note that instances may be created while still in the process of +// type-checking the original type declaration). +// +// In cases 3 and 4 this lazy construction may also occur concurrently, due to +// concurrent use of the type checker API (after type checking or importing has +// finished). It is critical that we keep track of state, so that Named types +// are constructed exactly once and so that we do not access their details too +// soon. +// +// We achieve this by tracking state with an atomic state variable, and +// guarding potentially concurrent calculations with a mutex. At any point in +// time this state variable determines which data on N may be accessed. As +// state monotonically progresses, any data available at state M may be +// accessed without acquiring the mutex at state N, provided N >= M. +// +// GLOSSARY: Here are a few terms used in this file to describe Named types: +// - We say that a Named type is "instantiated" if it has been constructed by +// instantiating a generic named type with type arguments. +// - We say that a Named type is "declared" if it corresponds to a type +// declaration in the source. Instantiated named types correspond to a type +// instantiation in the source, not a declaration. But their Origin type is +// a declared type. +// - We say that a Named type is "resolved" if its RHS information has been +// loaded or fully type-checked. For Named types constructed from export +// data, this may involve invoking a loader function to extract information +// from export data. For instantiated named types this involves reading +// information from their origin. +// - We say that a Named type is "expanded" if it is an instantiated type and +// type parameters in its underlying type and methods have been substituted +// with the type arguments from the instantiation. A type may be partially +// expanded if some but not all of these details have been substituted. +// Similarly, we refer to these individual details (underlying type or +// method) as being "expanded". +// - When all information is known for a named type, we say it is "complete". +// +// Some invariants to keep in mind: each declared Named type has a single +// corresponding object, and that object's type is the (possibly generic) Named +// type. Declared Named types are identical if and only if their pointers are +// identical. On the other hand, multiple instantiated Named types may be +// identical even though their pointers are not identical. One has to use +// Identical to compare them. For instantiated named types, their obj is a +// synthetic placeholder that records their position of the corresponding +// instantiation in the source (if they were constructed during type checking). + // A Named represents a named (defined) type. type Named struct { - check *Checker - obj *TypeName // corresponding declared object for declared types; placeholder for instantiated types - orig *Named // original, uninstantiated type - fromRHS Type // type (on RHS of declaration) this *Named type is derived of (for cycle reporting) + check *Checker // non-nil during type-checking; nil otherwise + obj *TypeName // corresponding declared object for declared types; see above for instantiated types + orig *Named // origin type for instantiated types, this type for declared types + targs *TypeList // type arguments (after instantiation), or nil + + // fromRHS holds the type (on RHS of declaration) this *Named type is derived + // from (for cycle reporting). Only used by validType, and therefore does not + // require synchronization. + fromRHS Type + + mu sync.Mutex // guards all fields below + state_ uint32 // the current state of this type; must only be accessed atomically underlying Type // possibly a *Named during setup; never a *Named once set up completely tparams *TypeParamList // type parameters, or nil - targs *TypeList // type arguments (after instantiation), or nil // methods declared for this type (not the method set of this type). // Signatures are type-checked lazily. // For non-instantiated types, this is a fully populated list of methods. For - // instantiated types, this is a 'lazy' list, and methods are instantiated - // when they are first accessed. + // instantiated types, this is a 'lazy' list, and methods are individually + // expanded when they are first accessed. methods *methodList - // resolver may be provided to lazily resolve type parameters, underlying, and methods. - resolver func(*Context, *Named) (tparams *TypeParamList, underlying Type, methods *methodList) - once sync.Once // ensures that tparams, underlying, and methods are resolved before accessing + // loader may be provided to lazily load type parameters, underlying, and methods. + loader func(*Named) (tparams []*TypeParam, underlying Type, methods []*Func) } +// namedState represents the possible states that a named type may assume. +type namedState uint32 + +const ( + unresolved namedState = iota // tparams, underlying type and methods might be unavailable + resolved +) + // NewNamed returns a new named type for the given type name, underlying type, and associated methods. // If the given type name obj doesn't have a type yet, its type is set to the returned named type. // The underlying type must not be a *Named. @@ -41,24 +125,74 @@ func NewNamed(obj *TypeName, underlying Type, methods []*Func) *Named { return (*Checker)(nil).newNamed(obj, nil, underlying, newMethodList(methods)) } -func (t *Named) resolve(ctxt *Context) *Named { - if t.resolver == nil { - return t +// resolve resolves the type parameters, methods, and underlying type of n. +// This information may be loaded from a provided loader function, or computed +// from an origin type (in the case of instances). +// +// After resolution, the type parameters, methods, and underlying type of n are +// accessible; but if n is an instantiated type, its methods may still be +// unexpanded. +func (n *Named) resolve(ctxt *Context) *Named { + if n.state() >= resolved { // avoid locking below + return n } - t.once.Do(func() { - // TODO(mdempsky): Since we're passing t to the resolver anyway - // (necessary because types2 expects the receiver type for methods - // on defined interface types to be the Named rather than the - // underlying Interface), maybe it should just handle calling - // SetTypeParams, SetUnderlying, and AddMethod instead? Those - // methods would need to support reentrant calls though. It would - // also make the API more future-proof towards further extensions - // (like SetTypeParams). - t.tparams, t.underlying, t.methods = t.resolver(ctxt, t) - t.fromRHS = t.underlying // for cycle detection - }) - return t + // TODO(rfindley): if n.check is non-nil we can avoid locking here, since + // type-checking is not concurrent. Evaluate if this is worth doing. + n.mu.Lock() + defer n.mu.Unlock() + + if n.state() >= resolved { + return n + } + + if n.TypeArgs().Len() > 0 { + assert(n.underlying == nil) // n is an unresolved instance + assert(n.loader == nil) // instances are created by instantiation, in which case n.loader is nil + n.orig.resolve(ctxt) + + underlying := n.expandUnderlying(ctxt) + + n.tparams = n.orig.tparams + n.underlying = underlying + n.fromRHS = n.orig.fromRHS // for cycle detection + n.methods = newLazyMethodList(n.orig.methods.Len()) + n.setState(resolved) + return n + } + + // TODO(mdempsky): Since we're passing n to the loader anyway + // (necessary because types2 expects the receiver type for methods + // on defined interface types to be the Named rather than the + // underlying Interface), maybe it should just handle calling + // SetTypeParams, SetUnderlying, and AddMethod instead? Those + // methods would need to support reentrant calls though. It would + // also make the API more future-proof towards further extensions. + if n.loader != nil { + assert(n.underlying == nil) + + tparams, underlying, methods := n.loader(n) + + n.tparams = bindTParams(tparams) + n.underlying = underlying + n.fromRHS = underlying // for cycle detection + n.methods = newMethodList(methods) + n.loader = nil + } + + n.setState(resolved) + return n +} + +// state atomically accesses the current state of the receiver. +func (n *Named) state() namedState { + return namedState(atomic.LoadUint32(&n.state_)) +} + +// setState atomically stores the given state for n. +// Must only be called while holding n.mu. +func (n *Named) setState(state namedState) { + atomic.StoreUint32(&n.state_, uint32(state)) } // newNamed is like NewNamed but with a *Checker receiver and additional orig argument. @@ -80,16 +214,16 @@ func (check *Checker) newNamed(obj *TypeName, orig *Named, underlying Type, meth func (t *Named) cleanup() { assert(t.orig.orig == t.orig) // Ensure that every defined type created in the course of type-checking has - // either non-*Named underlying, or is unresolved. + // either non-*Named underlying type, or is unexpanded. // - // This guarantees that we don't leak any types whose underlying is *Named, - // because any unresolved instances will lazily compute their underlying by - // substituting in the underlying of their origin. The origin must have - // either been imported or type-checked and expanded here, and in either case - // its underlying will be fully expanded. + // This guarantees that we don't leak any types whose underlying type is + // *Named, because any unexpanded instances will lazily compute their + // underlying type by substituting in the underlying type of their origin. + // The origin must have either been imported or type-checked and expanded + // here, and in either case its underlying type will be fully expanded. switch t.underlying.(type) { case nil: - if t.resolver == nil { + if t.TypeArgs().Len() == 0 { panic("nil underlying") } case *Named: @@ -136,12 +270,13 @@ func (t *Named) NumMethods() int { return t.resolve(nil).methods.Len() } func (t *Named) Method(i int) *Func { t.resolve(nil) return t.methods.At(i, func() *Func { - return t.instantiateMethod(i) + return t.expandMethod(i) }) } -// instantiateMethod instantiates the i'th method for an instantiated receiver. -func (t *Named) instantiateMethod(i int) *Func { +// expandMethod substitutes type arguments in the i'th method for an +// instantiated receiver. +func (t *Named) expandMethod(i int) *Func { assert(t.TypeArgs().Len() > 0) // t must be an instance // t.orig.methods is not lazy. origm is the method instantiated with its @@ -277,7 +412,7 @@ func (n0 *Named) under() Type { check := n0.check n := n0 - seen := make(map[*Named]int) // types that need their underlying resolved + seen := make(map[*Named]int) // types that need their underlying type resolved var path []Object // objects encountered, for cycle reporting loop: @@ -354,66 +489,64 @@ func (check *Checker) bestContext(ctxt *Context) *Context { return NewContext() } -// expandNamed ensures that the underlying type of n is instantiated. -// The underlying type will be Typ[Invalid] if there was an error. -func expandNamed(ctxt *Context, n *Named, instPos token.Pos) (tparams *TypeParamList, underlying Type, methods *methodList) { +// expandUnderlying substitutes type arguments in the underlying type n.orig, +// returning the result. Returns Typ[Invalid] if there was an error. +func (n *Named) expandUnderlying(ctxt *Context) Type { check := n.check if check != nil && trace { - check.trace(instPos, "-- expandNamed %s", n) + check.trace(n.obj.pos, "-- Named.expandUnderlying %s", n) check.indent++ defer func() { check.indent-- - check.trace(instPos, "=> %s (tparams = %s, under = %s)", n, tparams.list(), underlying) + check.trace(n.obj.pos, "=> %s (tparams = %s, under = %s)", n, n.tparams.list(), n.underlying) }() } - n.orig.resolve(ctxt) assert(n.orig.underlying != nil) if _, unexpanded := n.orig.underlying.(*Named); unexpanded { - // We should only get an unexpanded underlying here during type checking + // We should only get a Named underlying type here during type checking // (for example, in recursive type declarations). assert(check != nil) } - // Mismatching arg and tparam length may be checked elsewhere. - if n.orig.tparams.Len() == n.targs.Len() { - // We must always have a context, to avoid infinite recursion. - ctxt = check.bestContext(ctxt) - h := ctxt.instanceHash(n.orig, n.targs.list()) - // ensure that an instance is recorded for h to avoid infinite recursion. - ctxt.update(h, n.orig, n.TypeArgs().list(), n) - - smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) - underlying = n.check.subst(instPos, n.orig.underlying, smap, ctxt) - // If the underlying of n is an interface, we need to set the receiver of - // its methods accurately -- we set the receiver of interface methods on - // the RHS of a type declaration to the defined type. - if iface, _ := underlying.(*Interface); iface != nil { - if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { - // If the underlying doesn't actually use type parameters, it's possible - // that it wasn't substituted. In this case we need to create a new - // *Interface before modifying receivers. - if iface == n.orig.underlying { - old := iface - iface = check.newInterface() - iface.embeddeds = old.embeddeds - iface.complete = old.complete - iface.implicit = old.implicit // should be false but be conservative - underlying = iface - } - iface.methods = methods - } - } - } else { - underlying = Typ[Invalid] + if n.orig.tparams.Len() != n.targs.Len() { + // Mismatching arg and tparam length may be checked elsewhere. + return Typ[Invalid] } - return n.orig.tparams, underlying, newLazyMethodList(n.orig.methods.Len()) + // We must always have a context, to avoid infinite recursion. + ctxt = check.bestContext(ctxt) + h := ctxt.instanceHash(n.orig, n.targs.list()) + // ensure that an instance is recorded for h to avoid infinite recursion. + ctxt.update(h, n.orig, n.TypeArgs().list(), n) + + smap := makeSubstMap(n.orig.tparams.list(), n.targs.list()) + underlying := n.check.subst(n.obj.pos, n.orig.underlying, smap, ctxt) + // If the underlying type of n is an interface, we need to set the receiver + // of its methods accurately -- we set the receiver of interface methods on + // the RHS of a type declaration to the defined type. + if iface, _ := underlying.(*Interface); iface != nil { + if methods, copied := replaceRecvType(iface.methods, n.orig, n); copied { + // If the underlying type doesn't actually use type parameters, it's + // possible that it wasn't substituted. In this case we need to create + // a new *Interface before modifying receivers. + if iface == n.orig.underlying { + old := iface + iface = check.newInterface() + iface.embeddeds = old.embeddeds + iface.complete = old.complete + iface.implicit = old.implicit // should be false but be conservative + underlying = iface + } + iface.methods = methods + } + } + return underlying } -// safeUnderlying returns the underlying of typ without expanding instances, to -// avoid infinite recursion. +// safeUnderlying returns the underlying type of typ without expanding +// instances, to avoid infinite recursion. // // TODO(rfindley): eliminate this function or give it a better name. func safeUnderlying(typ Type) Type { diff --git a/src/go/types/named_test.go b/src/go/types/named_test.go new file mode 100644 index 00000000000..74cdb488896 --- /dev/null +++ b/src/go/types/named_test.go @@ -0,0 +1,88 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types_test + +import ( + "testing" + + . "go/types" +) + +func BenchmarkNamed(b *testing.B) { + const src = ` +package p + +type T struct { + P int +} + +func (T) M(int) {} +func (T) N() (i int) { return } + +type G[P any] struct { + F P +} + +func (G[P]) M(P) {} +func (G[P]) N() (p P) { return } + +type Inst = G[int] + ` + pkg, err := pkgForMode("p", src, nil, 0) + if err != nil { + b.Fatal(err) + } + + var ( + T = pkg.Scope().Lookup("T").Type() + G = pkg.Scope().Lookup("G").Type() + SrcInst = pkg.Scope().Lookup("Inst").Type() + UserInst = mustInstantiate(b, G, Typ[Int]) + ) + + tests := []struct { + name string + typ Type + }{ + {"nongeneric", T}, + {"generic", G}, + {"src instance", SrcInst}, + {"user instance", UserInst}, + } + + b.Run("Underlying", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = test.typ.Underlying() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = test.typ.Underlying() + } + }) + } + }) + + b.Run("NewMethodSet", func(b *testing.B) { + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + // Access underlying once, to trigger any lazy calculation. + _ = NewMethodSet(test.typ) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = NewMethodSet(test.typ) + } + }) + } + }) +} + +func mustInstantiate(tb testing.TB, orig Type, targs ...Type) Type { + inst, err := Instantiate(nil, orig, targs, true) + if err != nil { + tb.Fatal(err) + } + return inst +} diff --git a/src/go/types/object.go b/src/go/types/object.go index ae138a58795..f203b0113d2 100644 --- a/src/go/types/object.go +++ b/src/go/types/object.go @@ -233,19 +233,7 @@ func NewTypeName(pos token.Pos, pkg *Package, name string, typ Type) *TypeName { // lazily calls resolve to finish constructing the Named object. func _NewTypeNameLazy(pos token.Pos, pkg *Package, name string, load func(named *Named) (tparams []*TypeParam, underlying Type, methods []*Func)) *TypeName { obj := NewTypeName(pos, pkg, name, nil) - - resolve := func(_ *Context, t *Named) (*TypeParamList, Type, *methodList) { - tparams, underlying, methods := load(t) - - switch underlying.(type) { - case nil, *Named: - panic(fmt.Sprintf("invalid underlying type %T", t.underlying)) - } - - return bindTParams(tparams), underlying, newMethodList(methods) - } - - NewNamed(obj, nil, nil).resolver = resolve + NewNamed(obj, nil, nil).loader = load return obj } diff --git a/src/go/types/typexpr.go b/src/go/types/typexpr.go index 7afc66a9251..c7161e00a55 100644 --- a/src/go/types/typexpr.go +++ b/src/go/types/typexpr.go @@ -385,14 +385,13 @@ func (check *Checker) typInternal(e0 ast.Expr, def *Named) (T Type) { } func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (res Type) { - pos := ix.X.Pos() if trace { - check.trace(pos, "-- instantiating type %s with %s", ix.X, ix.Indices) + check.trace(ix.Pos(), "-- instantiating type %s with %s", ix.X, ix.Indices) check.indent++ defer func() { check.indent-- // Don't format the underlying here. It will always be nil. - check.trace(pos, "=> %s", res) + check.trace(ix.Pos(), "=> %s", res) }() } @@ -417,57 +416,20 @@ func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (re return Typ[Invalid] } - // enableTypeTypeInference controls whether to infer missing type arguments - // using constraint type inference. See issue #51527. - const enableTypeTypeInference = false - // create the instance ctxt := check.bestContext(nil) - h := ctxt.instanceHash(orig, targs) - // targs may be incomplete, and require inference. In any case we should de-duplicate. - inst, _ := ctxt.lookup(h, orig, targs).(*Named) - // If inst is non-nil, we can't just return here. Inst may have been - // constructed via recursive substitution, in which case we wouldn't do the - // validation below. Ensure that the validation (and resulting errors) runs - // for each instantiated type in the source. - if inst == nil { - // x may be a selector for an imported type; use its start pos rather than x.Pos(). - tname := NewTypeName(ix.Pos(), orig.obj.pkg, orig.obj.name, nil) - inst = check.newNamed(tname, orig, nil, nil) // underlying, methods and tparams are set when named is resolved - inst.targs = newTypeList(targs) - inst = ctxt.update(h, orig, targs, inst).(*Named) - } + inst := check.instance(ix.Pos(), orig, targs, ctxt).(*Named) def.setUnderlying(inst) - inst.resolver = func(ctxt *Context, n *Named) (*TypeParamList, Type, *methodList) { - tparams := n.orig.TypeParams().list() - - targs := n.targs.list() - if enableTypeTypeInference && len(targs) < len(tparams) { - // If inference fails, len(inferred) will be 0, and inst.underlying will - // be set to Typ[Invalid] in expandNamed. - inferred := check.infer(ix.Orig, tparams, targs, nil, nil) - if len(inferred) > len(targs) { - n.targs = newTypeList(inferred) - } - } - - return expandNamed(ctxt, n, pos) - } - // orig.tparams may not be set up, so we need to do expansion later. check.later(func() { // This is an instance from the source, not from recursive substitution, // and so it must be resolved during type-checking so that we can report // errors. - inst.resolve(ctxt) - // Since check is non-nil, we can still mutate inst. Unpinning the resolver - // frees some memory. - inst.resolver = nil check.recordInstance(ix.Orig, inst.TypeArgs().list(), inst) - if check.validateTArgLen(pos, inst.tparams.Len(), inst.targs.Len()) { - if i, err := check.verify(pos, inst.tparams.list(), inst.targs.list()); err != nil { + if check.validateTArgLen(ix.Pos(), inst.TypeParams().Len(), inst.targs.Len()) { + if i, err := check.verify(ix.Pos(), inst.TypeParams().list(), inst.targs.list()); err != nil { // best position for error reporting pos := ix.Pos() if i < len(ix.Indices) { @@ -475,10 +437,13 @@ func (check *Checker) instantiatedType(ix *typeparams.IndexExpr, def *Named) (re } check.softErrorf(atPos(pos), _InvalidTypeArg, err.Error()) } else { - check.mono.recordInstance(check.pkg, pos, inst.tparams.list(), inst.targs.list(), ix.Indices) + check.mono.recordInstance(check.pkg, ix.Pos(), inst.TypeParams().list(), inst.targs.list(), ix.Indices) } } + // TODO(rfindley): remove this call: we don't need to call validType here, + // as cycles can only occur for types used inside a Named type declaration, + // and so it suffices to call validType from declared types. check.validType(inst) }).describef(ix, "resolve instance %s", inst)