From f03c9202c43e0abb130669852082117ca50aa9b1 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 7 Jan 2015 17:44:49 -0800 Subject: [PATCH] cmd/gc: optimize memclr of slices and arrays Recognize loops of the form for i := range a { a[i] = zero } in which the evaluation of a is free from side effects. Replace these loops with calls to memclr. This occurs in the stdlib in 18 places. The motivating example is clearing a byte slice: benchmark old ns/op new ns/op delta BenchmarkGoMemclr5 3.31 3.26 -1.51% BenchmarkGoMemclr16 13.7 3.28 -76.06% BenchmarkGoMemclr64 50.8 4.14 -91.85% BenchmarkGoMemclr256 157 6.02 -96.17% Update #5373. Change-Id: I99d3e6f5f268e8c6499b7e661df46403e5eb83e4 Reviewed-on: https://go-review.googlesource.com/2520 Reviewed-by: Keith Randall --- src/cmd/gc/builtin.c | 1 + src/cmd/gc/go.h | 1 + src/cmd/gc/range.c | 66 ++++++++++++++++++++++++++++++++++ src/cmd/gc/runtime.go | 1 + src/cmd/gc/typecheck.c | 2 +- src/runtime/memmove_test.go | 14 ++++++++ test/fixedbugs/issue5373.go | 71 +++++++++++++++++++++++++++++++++++++ 7 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 test/fixedbugs/issue5373.go diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c index b11aa1e6f9d..e2e14f03d2b 100644 --- a/src/cmd/gc/builtin.c +++ b/src/cmd/gc/builtin.c @@ -126,6 +126,7 @@ char *runtimeimport = "func @\"\".makeslice (@\"\".typ·2 *byte, @\"\".nel·3 int64, @\"\".cap·4 int64) (@\"\".ary·1 []any)\n" "func @\"\".growslice (@\"\".typ·2 *byte, @\"\".old·3 []any, @\"\".n·4 int64) (@\"\".ary·1 []any)\n" "func @\"\".memmove (@\"\".to·1 *any, @\"\".frm·2 *any, @\"\".length·3 uintptr)\n" + "func @\"\".memclr (@\"\".ptr·1 *byte, @\"\".length·2 uintptr)\n" "func @\"\".memequal (@\"\".x·2 *any, @\"\".y·3 *any, @\"\".size·4 uintptr) (? bool)\n" "func @\"\".memequal8 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n" "func @\"\".memequal16 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n" diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 5150026aec7..21cf8b894d1 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1451,6 +1451,7 @@ void walkswitch(Node *sw); * typecheck.c */ int islvalue(Node *n); +int samesafeexpr(Node *l, Node *r); Node* typecheck(Node **np, int top); void typechecklist(NodeList *l, int top); Node* typecheckdef(Node *n); diff --git a/src/cmd/gc/range.c b/src/cmd/gc/range.c index 4ed45283238..947b4580ed7 100644 --- a/src/cmd/gc/range.c +++ b/src/cmd/gc/range.c @@ -144,6 +144,72 @@ walkrange(Node *n) fatal("walkrange"); case TARRAY: + // Lower n into runtime·memclr if possible, for + // fast zeroing of slices and arrays (issue 5373). + // Look for instances of + // + // for i := range a { + // a[i] = zero + // } + // + // in which the evaluation of a is side-effect-free. + if(!debug['N']) + if(!flag_race) + if(v1 != N) + if(v2 == N) + if(n->nbody != nil) + if(n->nbody->n != N) // at least one statement in body + if(n->nbody->next == nil) { // at most one statement in body + tmp = n->nbody->n; // first statement of body + if(tmp->op == OAS) + if(tmp->left->op == OINDEX) + if(samesafeexpr(tmp->left->left, a)) + if(samesafeexpr(tmp->left->right, v1)) + if(t->type->width > 0) + if(iszero(tmp->right)) { + // Convert to + // if len(a) != 0 { + // hp = &a[0] + // hn = len(a)*sizeof(elem(a)) + // memclr(hp, hn) + // i = len(a) - 1 + // } + n->op = OIF; + n->nbody = nil; + n->ntest = nod(ONE, nod(OLEN, a, N), nodintconst(0)); + n->nincr = nil; + + // hp = &a[0] + hp = temp(ptrto(types[TUINT8])); + tmp = nod(OINDEX, a, nodintconst(0)); + tmp->bounded = 1; + tmp = nod(OADDR, tmp, N); + tmp = nod(OCONVNOP, tmp, N); + n->nbody = list(n->nbody, nod(OAS, hp, tmp)); + + // hn = len(a) * sizeof(elem(a)) + hn = temp(types[TUINTPTR]); + tmp = nod(OLEN, a, N); + tmp = nod(OMUL, tmp, nodintconst(t->type->width)); + tmp = conv(tmp, types[TUINTPTR]); + n->nbody = list(n->nbody, nod(OAS, hn, tmp)); + + // memclr(hp, hn) + fn = mkcall("memclr", T, nil, hp, hn); + n->nbody = list(n->nbody, fn); + + // i = len(a) - 1 + v1 = nod(OAS, v1, nod(OSUB, nod(OLEN, a, N), nodintconst(1))); + n->nbody = list(n->nbody, v1); + + typecheck(&n->ntest, Erv); + typechecklist(n->nbody, Etop); + walkstmt(&n); + lineno = lno; + return; + } + } + // orderstmt arranged for a copy of the array/slice variable if needed. ha = a; hv1 = temp(types[TINT]); diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go index a294456007a..463bb3a76da 100644 --- a/src/cmd/gc/runtime.go +++ b/src/cmd/gc/runtime.go @@ -160,6 +160,7 @@ func block() func makeslice(typ *byte, nel int64, cap int64) (ary []any) func growslice(typ *byte, old []any, n int64) (ary []any) func memmove(to *any, frm *any, length uintptr) +func memclr(ptr *byte, length uintptr) func memequal(x, y *any, size uintptr) bool func memequal8(x, y *any) bool diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c index aa693aff7f9..8a3b486bd66 100644 --- a/src/cmd/gc/typecheck.c +++ b/src/cmd/gc/typecheck.c @@ -2831,7 +2831,7 @@ checkassignlist(NodeList *l) // Check whether l and r are the same side effect-free expression, // so that it is safe to reuse one instead of computing both. -static int +int samesafeexpr(Node *l, Node *r) { if(l->op != r->op || !eqtype(l->type, r->type)) diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index ffda4fe6c53..29c62cc37d0 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -162,6 +162,20 @@ func BenchmarkMemclr256(b *testing.B) { bmMemclr(b, 256) } func BenchmarkMemclr4096(b *testing.B) { bmMemclr(b, 4096) } func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) } +func bmGoMemclr(b *testing.B, n int) { + x := make([]byte, n) + b.SetBytes(int64(n)) + for i := 0; i < b.N; i++ { + for j := range x { + x[j] = 0 + } + } +} +func BenchmarkGoMemclr5(b *testing.B) { bmGoMemclr(b, 5) } +func BenchmarkGoMemclr16(b *testing.B) { bmGoMemclr(b, 16) } +func BenchmarkGoMemclr64(b *testing.B) { bmGoMemclr(b, 64) } +func BenchmarkGoMemclr256(b *testing.B) { bmGoMemclr(b, 256) } + func BenchmarkClearFat8(b *testing.B) { for i := 0; i < b.N; i++ { var x [8 / 4]uint32 diff --git a/test/fixedbugs/issue5373.go b/test/fixedbugs/issue5373.go new file mode 100644 index 00000000000..17ce189f959 --- /dev/null +++ b/test/fixedbugs/issue5373.go @@ -0,0 +1,71 @@ +// run + +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Ensure that zeroing range loops have the requisite side-effects. + +package main + +import ( + "fmt" + "os" +) + +func check(n int) { + // When n == 0, i is untouched by the range loop. + // Picking an initial value of -1 for i makes the + // "want" calculation below correct in all cases. + i := -1 + s := make([]byte, n) + for i = range s { + s[i] = 0 + } + if want := n - 1; i != want { + fmt.Printf("index after range with side-effect = %d want %d\n", i, want) + os.Exit(1) + } + + i = n + 1 + // i is shadowed here, so its value should be unchanged. + for i := range s { + s[i] = 0 + } + if want := n + 1; i != want { + fmt.Printf("index after range without side-effect = %d want %d\n", i, want) + os.Exit(1) + } + + // Index variable whose evaluation has side-effects + var x int + f := func() int { + x++ + return 0 + } + var a [1]int + for a[f()] = range s { + s[a[f()]] = 0 + } + if want := n * 2; x != want { + fmt.Printf("index function calls = %d want %d\n", x, want) + os.Exit(1) + } + + // Range expression whose evaluation has side-effects + x = 0 + b := [1][]byte{s} + for i := range b[f()] { + b[f()][i] = 0 + } + if want := n + 1; x != n+1 { + fmt.Printf("range expr function calls = %d want %d\n", x, want) + os.Exit(1) + } +} + +func main() { + check(0) + check(1) + check(15) +}