1
0
mirror of https://github.com/golang/go synced 2024-11-16 20:04:52 -07:00

go/types, types2: use type nest to detect type cycles (fix validType)

validType was using a global type info map to detect invalid recursive
types, which was incorrect. Instead, change the algorithm as follows:

- Rather than using a "seen" (or typeInfo) map which is cumbersome to
  update correctly, use the stack of embedding types (the type nest)
  to check whether a type is embedded within itself, directly or
  indirectly.

- Use Identical for type comparisons which correctly considers identity
  of instantiated generic types.

- As before, maintain the full path of types leading to a cycle. But
  unlike before, track the named types rather than their objects, for
  a smaller slice ([]*Named rather than []Object), and convert to an
  object list only when needed for error reporting.

- As an optimization, keep track of valid *Named types (Checker.valids).
  This prevents pathological cases from consuming excessive computation
  time.

- Add clarifying comments and document invariants.

Based on earlier insights by David Chase (see also CL 408818).

Fixes #52698.

Change-Id: I5e4598c58afcf4ab987a426c5c4b7b28bdfcf5ea
Reviewed-on: https://go-review.googlesource.com/c/go/+/409694
Reviewed-by: Robert Findley <rfindley@google.com>
Run-TryBot: Robert Griesemer <gri@google.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Robert Griesemer 2022-05-31 20:59:55 -07:00
parent 770146d5a8
commit 07eca49055
6 changed files with 450 additions and 99 deletions

View File

@ -98,7 +98,7 @@ type Checker struct {
nextID uint64 // unique Id for type parameters (first valid Id is 1)
objMap map[Object]*declInfo // maps package-level objects and (non-interface) methods to declaration info
impMap map[importKey]*Package // maps (import path, source directory) to (complete or fake) package
infoMap map[*Named]typeInfo // maps named types to their associated type info (for cycle detection)
valids instanceLookup // valid *Named (incl. instantiated) types per the validType check
// pkgPathMap maps package names to the set of distinct import paths we've
// seen for that name, anywhere in the import graph. It is used for
@ -241,7 +241,6 @@ func NewChecker(conf *Config, pkg *Package, info *Info) *Checker {
version: version,
objMap: make(map[Object]*declInfo),
impMap: make(map[importKey]*Package),
infoMap: make(map[*Named]typeInfo),
}
}

View File

@ -0,0 +1,62 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
// correctness check: ensure that cycles through generic instantiations are detected
type T[P any] struct {
_ P
}
type S /* ERROR illegal cycle */ struct {
_ T[S]
}
// simplified test 1
var _ A1[A1[string]]
type A1[P any] struct {
_ B1[P]
}
type B1[P any] struct {
_ P
}
// simplified test 2
var _ B2[A2]
type A2 struct {
_ B2[string]
}
type B2[P any] struct {
_ C2[P]
}
type C2[P any] struct {
_ P
}
// test case from issue
type T23 interface {
~struct {
Field0 T13[T15]
}
}
type T1[P1 interface {
}] struct {
Field2 P1
}
type T13[P2 interface {
}] struct {
Field2 T1[P2]
}
type T15 struct {
Field0 T13[string]
}

View File

@ -5,29 +5,24 @@
package types2
// validType verifies that the given type does not "expand" indefinitely
// producing a cycle in the type graph. Cycles are detected by marking
// defined types.
// producing a cycle in the type graph.
// (Cycles involving alias types, as in "type A = [10]A" are detected
// earlier, via the objDecl cycle detection mechanism.)
func (check *Checker) validType(typ *Named) {
check.validType0(typ, nil, nil)
check.validType0(typ, nil, nil, nil)
}
type typeInfo uint
// validType0 checks if the given type is valid. If typ is a type parameter
// its value is looked up in the provided environment. The environment is
// nil if typ is not part of (the RHS of) an instantiated type, in that case
// any type parameter encountered must be from an enclosing function and can
// be ignored. The path is the list of type names that lead to the current typ.
func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeInfo {
const (
unknown typeInfo = iota
marked
valid
invalid
)
// be ignored. The nest list describes the stack (the "nest in memory") of
// types which contain (or embed in the case of interfaces) other types. For
// instance, a struct named S which contains a field of named type F contains
// (the memory of) F in S, leading to the nest S->F. If a type appears in its
// own nest (say S->F->S) we have an invalid recursive type. The path list is
// the full path of named types in a cycle, it is only needed for error reporting.
func (check *Checker) validType0(typ Type, env *tparamEnv, nest, path []*Named) bool {
switch t := typ.(type) {
case nil:
// We should never see a nil type but be conservative and panic
@ -37,60 +32,79 @@ func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeIn
}
case *Array:
return check.validType0(t.elem, env, path)
return check.validType0(t.elem, env, nest, path)
case *Struct:
for _, f := range t.fields {
if check.validType0(f.typ, env, path) == invalid {
return invalid
if !check.validType0(f.typ, env, nest, path) {
return false
}
}
case *Union:
for _, t := range t.terms {
if check.validType0(t.typ, env, path) == invalid {
return invalid
if !check.validType0(t.typ, env, nest, path) {
return false
}
}
case *Interface:
for _, etyp := range t.embeddeds {
if check.validType0(etyp, env, path) == invalid {
return invalid
if !check.validType0(etyp, env, nest, path) {
return false
}
}
case *Named:
// Exit early if we already know t is valid.
// This is purely an optimization but it prevents excessive computation
// times in pathological cases such as testdata/fixedbugs/issue6977.go.
// (Note: The valids map could also be allocated locally, once for each
// validType call.)
if check.valids.lookup(t) != nil {
break
}
// Don't report a 2nd error if we already know the type is invalid
// (e.g., if a cycle was detected earlier, via under).
// Note: ensure that t.orig is fully resolved by calling Underlying().
if t.Underlying() == Typ[Invalid] {
check.infoMap[t] = invalid
return invalid
return false
}
switch check.infoMap[t] {
case unknown:
check.infoMap[t] = marked
check.infoMap[t] = check.validType0(t.Origin().fromRHS, env.push(t), append(path, t.obj))
case marked:
// We have seen type t before and thus must have a cycle.
check.infoMap[t] = invalid
// t cannot be in an imported package otherwise that package
// would have reported a type cycle and couldn't have been
// imported in the first place.
assert(t.obj.pkg == check.pkg)
t.underlying = Typ[Invalid] // t is in the current package (no race possibility)
// Find the starting point of the cycle and report it.
for i, tn := range path {
if tn == t.obj {
check.cycleError(path[i:])
return invalid
// If the current type t is also found in nest, (the memory of) t is
// embedded in itself, indicating an invalid recursive type.
for _, e := range nest {
if Identical(e, t) {
// t cannot be in an imported package otherwise that package
// would have reported a type cycle and couldn't have been
// imported in the first place.
assert(t.obj.pkg == check.pkg)
t.underlying = Typ[Invalid] // t is in the current package (no race possibility)
// Find the starting point of the cycle and report it.
// Because each type in nest must also appear in path (see invariant below),
// type t must be in path since it was found in nest. But not every type in path
// is in nest. Specifically t may appear in path with an earlier index than the
// index of t in nest. Search again.
for start, p := range path {
if Identical(p, t) {
check.cycleError(makeObjList(path[start:]))
return false
}
}
panic("cycle start not found")
}
panic("cycle start not found")
}
return check.infoMap[t]
// No cycle was found. Check the RHS of t.
// Every type added to nest is also added to path; thus every type that is in nest
// must also be in path (invariant). But not every type in path is in nest, since
// nest may be pruned (see below, *TypeParam case).
if !check.validType0(t.Origin().fromRHS, env.push(t), append(nest, t), append(path, t)) {
return false
}
check.valids.add(t) // t is valid
case *TypeParam:
// A type parameter stands for the type (argument) it was instantiated with.
@ -98,13 +112,29 @@ func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeIn
if env != nil {
if targ := env.tmap[t]; targ != nil {
// Type arguments found in targ must be looked
// up in the enclosing environment env.link.
return check.validType0(targ, env.link, path)
// up in the enclosing environment env.link. The
// type argument must be valid in the enclosing
// type (where the current type was instantiated),
// hence we must check targ's validity in the type
// nest excluding the current (instantiated) type
// (see the example at the end of this file).
// For error reporting we keep the full path.
return check.validType0(targ, env.link, nest[:len(nest)-1], path)
}
}
}
return valid
return true
}
// makeObjList returns the list of type name objects for the given
// list of named types.
func makeObjList(tlist []*Named) []Object {
olist := make([]Object, len(tlist))
for i, t := range tlist {
olist[i] = t.obj
}
return olist
}
// A tparamEnv provides the environment for looking up the type arguments
@ -146,3 +176,93 @@ func (env *tparamEnv) push(typ *Named) *tparamEnv {
// same information should be available via the path:
// We should be able to just walk the path backwards
// and find the type arguments in the instance objects.
// Here is an example illustrating why we need to exclude the
// instantiated type from nest when evaluating the validity of
// a type parameter. Given the declarations
//
// var _ A[A[string]]
//
// type A[P any] struct { _ B[P] }
// type B[P any] struct { _ P }
//
// we want to determine if the type A[A[string]] is valid.
// We start evaluating A[A[string]] outside any type nest:
//
// A[A[string]]
// nest =
// path =
//
// The RHS of A is now evaluated in the A[A[string]] nest:
//
// struct{_ B[P₁]}
// nest = A[A[string]]
// path = A[A[string]]
//
// The struct has a single field of type B[P₁] with which
// we continue:
//
// B[P₁]
// nest = A[A[string]]
// path = A[A[string]]
//
// struct{_ P₂}
// nest = A[A[string]]->B[P]
// path = A[A[string]]->B[P]
//
// Eventutally we reach the type parameter P of type B (P₂):
//
// P₂
// nest = A[A[string]]->B[P]
// path = A[A[string]]->B[P]
//
// The type argument for P of B is the type parameter P of A (P₁).
// It must be evaluated in the type nest that existed when B was
// instantiated:
//
// P₁
// nest = A[A[string]] <== type nest at B's instantiation time
// path = A[A[string]]->B[P]
//
// If we'd use the current nest it would correspond to the path
// which will be wrong as we will see shortly. P's type argument
// is A[string], which again must be evaluated in the type nest
// that existed when A was instantiated with A[string]. That type
// nest is empty:
//
// A[string]
// nest = <== type nest at A's instantiation time
// path = A[A[string]]->B[P]
//
// Evaluation then proceeds as before for A[string]:
//
// struct{_ B[P₁]}
// nest = A[string]
// path = A[A[string]]->B[P]->A[string]
//
// Now we reach B[P] again. If we had not adjusted nest, it would
// correspond to path, and we would find B[P] in nest, indicating
// a cycle, which would clearly be wrong since there's no cycle in
// A[string]:
//
// B[P₁]
// nest = A[string]
// path = A[A[string]]->B[P]->A[string] <== path contains B[P]!
//
// But because we use the correct type nest, evaluation proceeds without
// errors and we get the evaluation sequence:
//
// struct{_ P₂}
// nest = A[string]->B[P]
// path = A[A[string]]->B[P]->A[string]->B[P]
// P₂
// nest = A[string]->B[P]
// path = A[A[string]]->B[P]->A[string]->B[P]
// P₁
// nest = A[string]
// path = A[A[string]]->B[P]->A[string]->B[P]
// string
// nest =
// path = A[A[string]]->B[P]->A[string]->B[P]
//
// At this point we're done and A[A[string]] and is valid.

View File

@ -105,7 +105,7 @@ type Checker struct {
nextID uint64 // unique Id for type parameters (first valid Id is 1)
objMap map[Object]*declInfo // maps package-level objects and (non-interface) methods to declaration info
impMap map[importKey]*Package // maps (import path, source directory) to (complete or fake) package
infoMap map[*Named]typeInfo // maps named types to their associated type info (for cycle detection)
valids instanceLookup // valid *Named (incl. instantiated) types per the validType check
// pkgPathMap maps package names to the set of distinct import paths we've
// seen for that name, anywhere in the import graph. It is used for
@ -249,7 +249,6 @@ func NewChecker(conf *Config, fset *token.FileSet, pkg *Package, info *Info) *Ch
version: version,
objMap: make(map[Object]*declInfo),
impMap: make(map[importKey]*Package),
infoMap: make(map[*Named]typeInfo),
}
}

View File

@ -0,0 +1,50 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
// sanity check
type T[P any] struct {
_ P
}
type S /* ERROR illegal cycle */ struct {
_ T[S]
}
// simplified test
var _ B[A]
type A struct {
_ B[string]
}
type B[P any] struct {
_ C[P]
}
type C[P any] struct {
_ P
}
// test case from issue
type T23 interface {
~struct {
Field0 T13[T15]
}
}
type T1[P1 interface {
}] struct {
Field2 P1
}
type T13[P2 interface {
}] struct {
Field2 T1[P2]
}
type T15 struct {
Field0 T13[string]
}

View File

@ -5,29 +5,24 @@
package types
// validType verifies that the given type does not "expand" indefinitely
// producing a cycle in the type graph. Cycles are detected by marking
// defined types.
// producing a cycle in the type graph.
// (Cycles involving alias types, as in "type A = [10]A" are detected
// earlier, via the objDecl cycle detection mechanism.)
func (check *Checker) validType(typ *Named) {
check.validType0(typ, nil, nil)
check.validType0(typ, nil, nil, nil)
}
type typeInfo uint
// validType0 checks if the given type is valid. If typ is a type parameter
// its value is looked up in the provided environment. The environment is
// nil if typ is not part of (the RHS of) an instantiated type, in that case
// any type parameter encountered must be from an enclosing function and can
// be ignored. The path is the list of type names that lead to the current typ.
func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeInfo {
const (
unknown typeInfo = iota
marked
valid
invalid
)
// be ignored. The nest list describes the stack (the "nest in memory") of
// types which contain (or embed in the case of interfaces) other types. For
// instance, a struct named S which contains a field of named type F contains
// (the memory of) F in S, leading to the nest S->F. If a type appears in its
// own nest (say S->F->S) we have an invalid recursive type. The path list is
// the full path of named types in a cycle, it is only needed for error reporting.
func (check *Checker) validType0(typ Type, env *tparamEnv, nest, path []*Named) bool {
switch t := typ.(type) {
case nil:
// We should never see a nil type but be conservative and panic
@ -37,59 +32,79 @@ func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeIn
}
case *Array:
return check.validType0(t.elem, env, path)
return check.validType0(t.elem, env, nest, path)
case *Struct:
for _, f := range t.fields {
if check.validType0(f.typ, env, path) == invalid {
return invalid
if !check.validType0(f.typ, env, nest, path) {
return false
}
}
case *Union:
for _, t := range t.terms {
if check.validType0(t.typ, env, path) == invalid {
return invalid
if !check.validType0(t.typ, env, nest, path) {
return false
}
}
case *Interface:
for _, etyp := range t.embeddeds {
if check.validType0(etyp, env, path) == invalid {
return invalid
if !check.validType0(etyp, env, nest, path) {
return false
}
}
case *Named:
// Don't report a 2nd error if we already know the type is invalid
// Note: ensure that t.orig is fully resolved by calling Underlying().
if t.Underlying() == Typ[Invalid] {
check.infoMap[t] = invalid
return invalid
// Exit early if we already know t is valid.
// This is purely an optimization but it prevents excessive computation
// times in pathological cases such as testdata/fixedbugs/issue6977.go.
// (Note: The valids map could also be allocated locally, once for each
// validType call.)
if check.valids.lookup(t) != nil {
break
}
switch check.infoMap[t] {
case unknown:
check.infoMap[t] = marked
check.infoMap[t] = check.validType0(t.Origin().fromRHS, env.push(t), append(path, t.obj))
case marked:
// We have seen type t before and thus must have a cycle.
check.infoMap[t] = invalid
// t cannot be in an imported package otherwise that package
// would have reported a type cycle and couldn't have been
// imported in the first place.
assert(t.obj.pkg == check.pkg)
t.underlying = Typ[Invalid] // t is in the current package (no race possibility)
// Find the starting point of the cycle and report it.
for i, tn := range path {
if tn == t.obj {
check.cycleError(path[i:])
return invalid
}
}
panic("cycle start not found")
// Don't report a 2nd error if we already know the type is invalid
// (e.g., if a cycle was detected earlier, via under).
// Note: ensure that t.orig is fully resolved by calling Underlying().
if t.Underlying() == Typ[Invalid] {
return false
}
return check.infoMap[t]
// If the current type t is also found in nest, (the memory of) t is
// embedded in itself, indicating an invalid recursive type.
for _, e := range nest {
if Identical(e, t) {
// t cannot be in an imported package otherwise that package
// would have reported a type cycle and couldn't have been
// imported in the first place.
assert(t.obj.pkg == check.pkg)
t.underlying = Typ[Invalid] // t is in the current package (no race possibility)
// Find the starting point of the cycle and report it.
// Because each type in nest must also appear in path (see invariant below),
// type t must be in path since it was found in nest. But not every type in path
// is in nest. Specifically t may appear in path with an earlier index than the
// index of t in nest. Search again.
for start, p := range path {
if Identical(p, t) {
check.cycleError(makeObjList(path[start:]))
return false
}
}
panic("cycle start not found")
}
}
// No cycle was found. Check the RHS of t.
// Every type added to nest is also added to path; thus every type that is in nest
// must also be in path (invariant). But not every type in path is in nest, since
// nest may be pruned (see below, *TypeParam case).
if !check.validType0(t.Origin().fromRHS, env.push(t), append(nest, t), append(path, t)) {
return false
}
check.valids.add(t) // t is valid
case *TypeParam:
// A type parameter stands for the type (argument) it was instantiated with.
@ -97,13 +112,29 @@ func (check *Checker) validType0(typ Type, env *tparamEnv, path []Object) typeIn
if env != nil {
if targ := env.tmap[t]; targ != nil {
// Type arguments found in targ must be looked
// up in the enclosing environment env.link.
return check.validType0(targ, env.link, path)
// up in the enclosing environment env.link. The
// type argument must be valid in the enclosing
// type (where the current type was instantiated),
// hence we must check targ's validity in the type
// nest excluding the current (instantiated) type
// (see the example at the end of this file).
// For error reporting we keep the full path.
return check.validType0(targ, env.link, nest[:len(nest)-1], path)
}
}
}
return valid
return true
}
// makeObjList returns the list of type name objects for the given
// list of named types.
func makeObjList(tlist []*Named) []Object {
olist := make([]Object, len(tlist))
for i, t := range tlist {
olist[i] = t.obj
}
return olist
}
// A tparamEnv provides the environment for looking up the type arguments
@ -145,3 +176,93 @@ func (env *tparamEnv) push(typ *Named) *tparamEnv {
// same information should be available via the path:
// We should be able to just walk the path backwards
// and find the type arguments in the instance objects.
// Here is an example illustrating why we need to exclude the
// instantiated type from nest when evaluating the validity of
// a type parameter. Given the declarations
//
// var _ A[A[string]]
//
// type A[P any] struct { _ B[P] }
// type B[P any] struct { _ P }
//
// we want to determine if the type A[A[string]] is valid.
// We start evaluating A[A[string]] outside any type nest:
//
// A[A[string]]
// nest =
// path =
//
// The RHS of A is now evaluated in the A[A[string]] nest:
//
// struct{_ B[P₁]}
// nest = A[A[string]]
// path = A[A[string]]
//
// The struct has a single field of type B[P₁] with which
// we continue:
//
// B[P₁]
// nest = A[A[string]]
// path = A[A[string]]
//
// struct{_ P₂}
// nest = A[A[string]]->B[P]
// path = A[A[string]]->B[P]
//
// Eventutally we reach the type parameter P of type B (P₂):
//
// P₂
// nest = A[A[string]]->B[P]
// path = A[A[string]]->B[P]
//
// The type argument for P of B is the type parameter P of A (P₁).
// It must be evaluated in the type nest that existed when B was
// instantiated:
//
// P₁
// nest = A[A[string]] <== type nest at B's instantiation time
// path = A[A[string]]->B[P]
//
// If we'd use the current nest it would correspond to the path
// which will be wrong as we will see shortly. P's type argument
// is A[string], which again must be evaluated in the type nest
// that existed when A was instantiated with A[string]. That type
// nest is empty:
//
// A[string]
// nest = <== type nest at A's instantiation time
// path = A[A[string]]->B[P]
//
// Evaluation then proceeds as before for A[string]:
//
// struct{_ B[P₁]}
// nest = A[string]
// path = A[A[string]]->B[P]->A[string]
//
// Now we reach B[P] again. If we had not adjusted nest, it would
// correspond to path, and we would find B[P] in nest, indicating
// a cycle, which would clearly be wrong since there's no cycle in
// A[string]:
//
// B[P₁]
// nest = A[string]
// path = A[A[string]]->B[P]->A[string] <== path contains B[P]!
//
// But because we use the correct type nest, evaluation proceeds without
// errors and we get the evaluation sequence:
//
// struct{_ P₂}
// nest = A[string]->B[P]
// path = A[A[string]]->B[P]->A[string]->B[P]
// P₂
// nest = A[string]->B[P]
// path = A[A[string]]->B[P]->A[string]->B[P]
// P₁
// nest = A[string]
// path = A[A[string]]->B[P]->A[string]->B[P]
// string
// nest =
// path = A[A[string]]->B[P]->A[string]->B[P]
//
// At this point we're done and A[A[string]] and is valid.