From 1a955bcdb701d788b048a39d7273621729e257bc Mon Sep 17 00:00:00 2001 From: vpachkov Date: Thu, 2 Dec 2021 16:18:29 +0300 Subject: [PATCH] reflectdata: unroll a loop in array equal function generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As josharian mentioned, a compare function could benefit from unrolling a loop for arrays. This commit introduces such functionality. name old time/op new time/op delta EqArrayOfStrings5-12 12.5ns ± 1% 8.4ns ± 1% -33.05% (p=0.008 n=5+5) EqArrayOfStrings64-12 71.7ns ± 1% 64.1ns ± 1% -10.57% (p=0.008 n=5+5) EqArrayOfStrings1024-12 1.12µs ± 1% 1.01µs ± 0% -9.77% (p=0.008 n=5+5) [Geo mean] 100ns 81ns -18.56% name old time/op new time/op delta EqArrayOfFloats5-12 4.50ns ± 2% 3.32ns ± 1% -26.09% (p=0.008 n=5+5) EqArrayOfFloats64-12 41.3ns ± 1% 35.7ns ± 0% -13.63% (p=0.016 n=5+4) EqArrayOfFloats1024-12 619ns ± 1% 557ns ± 1% -9.95% (p=0.008 n=5+5) [Geo mean] 48.6ns 40.4ns -16.85% Change-Id: If1b69c5cf3fb246bb0275a292118b0b93ad9c9a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/368614 Reviewed-by: Keith Randall Trust: Emmanuel Odeke Run-TryBot: Emmanuel Odeke TryBot-Result: Gopher Robot --- src/cmd/compile/internal/reflectdata/alg.go | 94 ++++++++++++------- .../compile/internal/reflectdata/alg_test.go | 76 +++++++++++++++ 2 files changed, 136 insertions(+), 34 deletions(-) create mode 100644 src/cmd/compile/internal/reflectdata/alg_test.go diff --git a/src/cmd/compile/internal/reflectdata/alg.go b/src/cmd/compile/internal/reflectdata/alg.go index d000618bd64..526315d5570 100644 --- a/src/cmd/compile/internal/reflectdata/alg.go +++ b/src/cmd/compile/internal/reflectdata/alg.go @@ -412,22 +412,25 @@ func geneq(t *types.Type) *obj.LSym { // // if eq(p[0], q[0]) && eq(p[1], q[1]) && ... { // } else { - // return + // goto neq // } // // And so on. // // Otherwise it generates: // - // for i := 0; i < nelem; i++ { - // if eq(p[i], q[i]) { + // iterateTo := nelem/unroll*unroll + // for i := 0; i < iterateTo; i += unroll { + // if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) { // } else { // goto neq // } // } + // if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... { + // } else { + // goto neq + // } // - // TODO(josharian): consider doing some loop unrolling - // for larger nelem as well, processing a few elements at a time in a loop. checkAll := func(unroll int64, last bool, eq func(pi, qi ir.Node) ir.Node) { // checkIdx generates a node to check for equality at index i. checkIdx := func(i ir.Node) ir.Node { @@ -442,38 +445,62 @@ func geneq(t *types.Type) *obj.LSym { return eq(pi, qi) } - if nelem <= unroll { - if last { - // Do last comparison in a different manner. - nelem-- - } - // Generate a series of checks. - for i := int64(0); i < nelem; i++ { - // if check {} else { goto neq } - nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(i)), nil, nil) - nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq)) - fn.Body.Append(nif) - } - if last { - fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem)))) - } - } else { - // Generate a for loop. - // for i := 0; i < nelem; i++ + iterations := nelem / unroll + iterateTo := iterations * unroll + // If a loop is iterated only once, there shouldn't be any loop at all. + if iterations == 1 { + iterateTo = 0 + } + + if iterateTo > 0 { + // Generate an unrolled for loop. + // for i := 0; i < nelem/unroll*unroll; i += unroll i := typecheck.Temp(types.Types[types.TINT]) init := ir.NewAssignStmt(base.Pos, i, ir.NewInt(0)) - cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(nelem)) - post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1))) - loop := ir.NewForStmt(base.Pos, nil, cond, post, nil) + cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(iterateTo)) + loop := ir.NewForStmt(base.Pos, nil, cond, nil, nil) loop.PtrInit().Append(init) - // if eq(pi, qi) {} else { goto neq } - nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil) - nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq)) - loop.Body.Append(nif) - fn.Body.Append(loop) - if last { - fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true))) + + // if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) { + // } else { + // goto neq + // } + for j := int64(0); j < unroll; j++ { + // if check {} else { goto neq } + nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil) + nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq)) + loop.Body.Append(nif) + post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1))) + loop.Body.Append(post) } + + fn.Body.Append(loop) + + if nelem == iterateTo { + if last { + fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true))) + } + return + } + } + + // Generate remaining checks, if nelem is not a multiple of unroll. + if last { + // Do last comparison in a different manner. + nelem-- + } + // if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... { + // } else { + // goto neq + // } + for j := iterateTo; j < nelem; j++ { + // if check {} else { goto neq } + nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(j)), nil, nil) + nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq)) + fn.Body.Append(nif) + } + if last { + fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem)))) } } @@ -481,7 +508,6 @@ func geneq(t *types.Type) *obj.LSym { case types.TSTRING: // Do two loops. First, check that all the lengths match (cheap). // Second, check that all the contents match (expensive). - // TODO: when the array size is small, unroll the length match checks. checkAll(3, false, func(pi, qi ir.Node) ir.Node { // Compare lengths. eqlen, _ := EqString(pi, qi) diff --git a/src/cmd/compile/internal/reflectdata/alg_test.go b/src/cmd/compile/internal/reflectdata/alg_test.go new file mode 100644 index 00000000000..1e57b913fda --- /dev/null +++ b/src/cmd/compile/internal/reflectdata/alg_test.go @@ -0,0 +1,76 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package reflectdata_test + +import "testing" + +func BenchmarkEqArrayOfStrings5(b *testing.B) { + var a [5]string + var c [5]string + + for i := 0; i < 5; i++ { + a[i] = "aaaa" + c[i] = "cccc" + } + + for j := 0; j < b.N; j++ { + _ = a == c + } +} + +func BenchmarkEqArrayOfStrings64(b *testing.B) { + var a [64]string + var c [64]string + + for i := 0; i < 64; i++ { + a[i] = "aaaa" + c[i] = "cccc" + } + + for j := 0; j < b.N; j++ { + _ = a == c + } +} + +func BenchmarkEqArrayOfStrings1024(b *testing.B) { + var a [1024]string + var c [1024]string + + for i := 0; i < 1024; i++ { + a[i] = "aaaa" + c[i] = "cccc" + } + + for j := 0; j < b.N; j++ { + _ = a == c + } +} + +func BenchmarkEqArrayOfFloats5(b *testing.B) { + var a [5]float32 + var c [5]float32 + + for i := 0; i < b.N; i++ { + _ = a == c + } +} + +func BenchmarkEqArrayOfFloats64(b *testing.B) { + var a [64]float32 + var c [64]float32 + + for i := 0; i < b.N; i++ { + _ = a == c + } +} + +func BenchmarkEqArrayOfFloats1024(b *testing.B) { + var a [1024]float32 + var c [1024]float32 + + for i := 0; i < b.N; i++ { + _ = a == c + } +}