diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go index ac30b705e4..aa6424fe41 100644 --- a/src/cmd/compile/internal/ssa/nilcheck.go +++ b/src/cmd/compile/internal/ssa/nilcheck.go @@ -61,6 +61,11 @@ func nilcheckelim(f *Func) { } } + // allocate auxiliary date structures for computing store order + sset := f.newSparseSet(f.NumValues()) + defer f.retSparseSet(sset) + storeNumber := make([]int32, f.NumValues()) + // perform a depth first walk of the dominee tree for len(work) > 0 { node := work[len(work)-1] @@ -82,7 +87,10 @@ func nilcheckelim(f *Func) { } } - // Next, eliminate any redundant nil checks in this block. + // Next, order values in the current block w.r.t. stores. + b.Values = storeOrder(b.Values, sset, storeNumber) + + // Next, process values in the block. i := 0 for _, v := range b.Values { b.Values[i] = v @@ -109,6 +117,10 @@ func nilcheckelim(f *Func) { i-- continue } + // Record the fact that we know ptr is non nil, and remember to + // undo that information when this dominator subtree is done. + nonNilValues[ptr.ID] = true + work = append(work, bp{op: ClearPtr, ptr: ptr}) } } for j := i; j < len(b.Values); j++ { @@ -116,21 +128,6 @@ func nilcheckelim(f *Func) { } b.Values = b.Values[:i] - // Finally, find redundant nil checks for subsequent blocks. - // Note that we can't add these until the loop above is done, as the - // values in the block are not ordered in any way when this pass runs. - // This was the cause of issue #18725. - for _, v := range b.Values { - if v.Op != OpNilCheck { - continue - } - ptr := v.Args[0] - // Record the fact that we know ptr is non nil, and remember to - // undo that information when this dominator subtree is done. - nonNilValues[ptr.ID] = true - work = append(work, bp{op: ClearPtr, ptr: ptr}) - } - // Add all dominated blocks to the work list. for w := sdom[node.block.ID].child; w != nil; w = sdom[w.ID].sibling { work = append(work, bp{op: Work, block: w}) @@ -230,3 +227,163 @@ func nilcheckelim2(f *Func) { // more unnecessary nil checks. Would fix test/nilptr3_ssa.go:157. } } + +// storeOrder orders values with respect to stores. That is, +// if v transitively depends on store s, v is ordered after s, +// otherwise v is ordered before s. +// Specifically, values are ordered like +// store1 +// NilCheck that depends on store1 +// other values that depends on store1 +// store2 +// NilCheck that depends on store2 +// other values that depends on store2 +// ... +// The order of non-store and non-NilCheck values are undefined +// (not necessarily dependency order). This should be cheaper +// than a full scheduling as done in schedule.go. +// Note that simple dependency order won't work: there is no +// dependency between NilChecks and values like IsNonNil. +// Auxiliary data structures are passed in as arguments, so +// that they can be allocated in the caller and be reused. +// This function takes care of reset them. +func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { + // find all stores + var stores []*Value // members of values that are store values + hasNilCheck := false + sset.clear() // sset is the set of stores that are used in other values + for _, v := range values { + if v.Type.IsMemory() { + stores = append(stores, v) + if v.Op == OpInitMem || v.Op == OpPhi { + continue + } + a := v.Args[len(v.Args)-1] + if v.Op == OpSelect1 { + a = a.Args[len(a.Args)-1] + } + sset.add(a.ID) // record that a is used + } + if v.Op == OpNilCheck { + hasNilCheck = true + } + } + if len(stores) == 0 || !hasNilCheck { + // there is no store or nilcheck, the order does not matter + return values + } + + f := stores[0].Block.Func + + // find last store, which is the one that is not used by other stores + var last *Value + for _, v := range stores { + if !sset.contains(v.ID) { + if last != nil { + f.Fatalf("two stores live simutaneously: %v and %v", v, last) + } + last = v + } + } + + // We assign a store number to each value. Store number is the + // index of the latest store that this value transitively depends. + // The i-th store in the current block gets store number 3*i. A nil + // check that depends on the i-th store gets store number 3*i+1. + // Other values that depends on the i-th store gets store number 3*i+2. + // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends + // is in the previous block (or no store at all, e.g. value is Const). + // First we assign the number to all stores by walking back the store chain, + // then assign the number to other values in DFS order. + count := make([]int32, 3*(len(stores)+1)) + sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once + for n, w := len(stores), last; n > 0; n-- { + storeNumber[w.ID] = int32(3 * n) + count[3*n]++ + sset.add(w.ID) + if w.Op == OpInitMem || w.Op == OpPhi { + if n != 1 { + f.Fatalf("store order is wrong: there are stores before %v", w) + } + break + } + if w.Op == OpSelect1 { + w = w.Args[0] + } + w = w.Args[len(w.Args)-1] + } + var stack []*Value + for _, v := range values { + if sset.contains(v.ID) { + // in sset means v is a store, or already pushed to stack, or already assigned a store number + continue + } + stack = append(stack, v) + sset.add(v.ID) + + for len(stack) > 0 { + w := stack[len(stack)-1] + if storeNumber[w.ID] != 0 { + stack = stack[:len(stack)-1] + continue + } + if w.Op == OpPhi { + // Phi value doesn't depend on store in the current block. + // Do this early to avoid dependency cycle. + storeNumber[w.ID] = 2 + count[2]++ + stack = stack[:len(stack)-1] + continue + } + + max := int32(0) // latest store dependency + argsdone := true + for _, a := range w.Args { + if a.Block != w.Block { + continue + } + if !sset.contains(a.ID) { + stack = append(stack, a) + sset.add(a.ID) + argsdone = false + continue + } + if storeNumber[a.ID]/3 > max { + max = storeNumber[a.ID] / 3 + } + } + if !argsdone { + continue + } + + n := 3*max + 2 + if w.Op == OpNilCheck { + n = 3*max + 1 + } + storeNumber[w.ID] = n + count[n]++ + stack = stack[:len(stack)-1] + } + } + + // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] + for i := range count { + if i == 0 { + continue + } + count[i] += count[i-1] + } + if count[len(count)-1] != int32(len(values)) { + f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) + } + + // place values in count-indexed bins, which are in the desired store order + order := make([]*Value, len(values)) + for _, v := range values { + s := storeNumber[v.ID] + order[count[s-1]] = v + count[s-1]++ + } + + return order +} diff --git a/test/nilptr3.go b/test/nilptr3.go index 7af226b5f4..195c8ca043 100644 --- a/test/nilptr3.go +++ b/test/nilptr3.go @@ -40,23 +40,23 @@ var ( ) func f1() { - _ = *intp // ERROR "removed nil check" + _ = *intp // ERROR "generated nil check" // This one should be removed but the block copy needs // to be turned into its own pseudo-op in order to see // the indirect. - _ = *arrayp // ERROR "removed nil check" + _ = *arrayp // ERROR "generated nil check" // 0-byte indirect doesn't suffice. // we don't registerize globals, so there are no removed.* nil checks. - _ = *array0p // ERROR "removed nil check" _ = *array0p // ERROR "generated nil check" + _ = *array0p // ERROR "removed nil check" - _ = *intp // ERROR "generated nil check" + _ = *intp // ERROR "removed nil check" _ = *arrayp // ERROR "removed nil check" _ = *structp // ERROR "generated nil check" _ = *emptyp // ERROR "generated nil check" - _ = *arrayp // ERROR "generated nil check" + _ = *arrayp // ERROR "removed nil check" } func f2() { @@ -71,15 +71,15 @@ func f2() { empty1p *Empty1 ) - _ = *intp // ERROR "removed.* nil check" - _ = *arrayp // ERROR "removed.* nil check" - _ = *array0p // ERROR "removed.* nil check" - _ = *array0p // ERROR "generated nil check" _ = *intp // ERROR "generated nil check" + _ = *arrayp // ERROR "generated nil check" + _ = *array0p // ERROR "generated nil check" + _ = *array0p // ERROR "removed.* nil check" + _ = *intp // ERROR "removed.* nil check" _ = *arrayp // ERROR "removed.* nil check" _ = *structp // ERROR "generated nil check" _ = *emptyp // ERROR "generated nil check" - _ = *arrayp // ERROR "generated nil check" + _ = *arrayp // ERROR "removed.* nil check" _ = *bigarrayp // ERROR "generated nil check" ARM removed nil check before indirect!! _ = *bigstructp // ERROR "generated nil check" _ = *empty1p // ERROR "generated nil check" @@ -122,16 +122,16 @@ func f3(x *[10000]int) { // x wasn't going to change across the function call. // But it's a little complex to do and in practice doesn't // matter enough. - _ = x[9999] // ERROR "generated nil check" // TODO: fix + _ = x[9999] // ERROR "removed nil check" } func f3a() { x := fx10k() y := fx10k() z := fx10k() - _ = &x[9] // ERROR "removed.* nil check" - y = z _ = &x[9] // ERROR "generated nil check" + y = z + _ = &x[9] // ERROR "removed.* nil check" x = y _ = &x[9] // ERROR "generated nil check" } @@ -139,11 +139,11 @@ func f3a() { func f3b() { x := fx10k() y := fx10k() - _ = &x[9] // ERROR "removed.* nil check" + _ = &x[9] // ERROR "generated nil check" y = x _ = &x[9] // ERROR "removed.* nil check" x = y - _ = &x[9] // ERROR "generated nil check" + _ = &x[9] // ERROR "removed.* nil check" } func fx10() *[10]int @@ -179,15 +179,15 @@ func f4(x *[10]int) { _ = x[9] // ERROR "generated nil check" // bug would like to remove before indirect fx10() - _ = x[9] // ERROR "generated nil check" // TODO: fix + _ = x[9] // ERROR "removed nil check" x = fx10() y := fx10() - _ = &x[9] // ERROR "removed[a-z ]* nil check" + _ = &x[9] // ERROR "generated nil check" y = x _ = &x[9] // ERROR "removed[a-z ]* nil check" x = y - _ = &x[9] // ERROR "generated nil check" + _ = &x[9] // ERROR "removed[a-z ]* nil check" } func f5(p *float32, q *float64, r *float32, s *float64) float64 {