mirror of
https://github.com/golang/go
synced 2024-11-26 23:01:23 -07:00
cmd/gc: optimize memclr of slices and arrays
Recognize loops of the form for i := range a { a[i] = zero } in which the evaluation of a is free from side effects. Replace these loops with calls to memclr. This occurs in the stdlib in 18 places. The motivating example is clearing a byte slice: benchmark old ns/op new ns/op delta BenchmarkGoMemclr5 3.31 3.26 -1.51% BenchmarkGoMemclr16 13.7 3.28 -76.06% BenchmarkGoMemclr64 50.8 4.14 -91.85% BenchmarkGoMemclr256 157 6.02 -96.17% Update #5373. Change-Id: I99d3e6f5f268e8c6499b7e661df46403e5eb83e4 Reviewed-on: https://go-review.googlesource.com/2520 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
1d0c7792f1
commit
f03c9202c4
@ -126,6 +126,7 @@ char *runtimeimport =
|
|||||||
"func @\"\".makeslice (@\"\".typ·2 *byte, @\"\".nel·3 int64, @\"\".cap·4 int64) (@\"\".ary·1 []any)\n"
|
"func @\"\".makeslice (@\"\".typ·2 *byte, @\"\".nel·3 int64, @\"\".cap·4 int64) (@\"\".ary·1 []any)\n"
|
||||||
"func @\"\".growslice (@\"\".typ·2 *byte, @\"\".old·3 []any, @\"\".n·4 int64) (@\"\".ary·1 []any)\n"
|
"func @\"\".growslice (@\"\".typ·2 *byte, @\"\".old·3 []any, @\"\".n·4 int64) (@\"\".ary·1 []any)\n"
|
||||||
"func @\"\".memmove (@\"\".to·1 *any, @\"\".frm·2 *any, @\"\".length·3 uintptr)\n"
|
"func @\"\".memmove (@\"\".to·1 *any, @\"\".frm·2 *any, @\"\".length·3 uintptr)\n"
|
||||||
|
"func @\"\".memclr (@\"\".ptr·1 *byte, @\"\".length·2 uintptr)\n"
|
||||||
"func @\"\".memequal (@\"\".x·2 *any, @\"\".y·3 *any, @\"\".size·4 uintptr) (? bool)\n"
|
"func @\"\".memequal (@\"\".x·2 *any, @\"\".y·3 *any, @\"\".size·4 uintptr) (? bool)\n"
|
||||||
"func @\"\".memequal8 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
|
"func @\"\".memequal8 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
|
||||||
"func @\"\".memequal16 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
|
"func @\"\".memequal16 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
|
||||||
|
@ -1451,6 +1451,7 @@ void walkswitch(Node *sw);
|
|||||||
* typecheck.c
|
* typecheck.c
|
||||||
*/
|
*/
|
||||||
int islvalue(Node *n);
|
int islvalue(Node *n);
|
||||||
|
int samesafeexpr(Node *l, Node *r);
|
||||||
Node* typecheck(Node **np, int top);
|
Node* typecheck(Node **np, int top);
|
||||||
void typechecklist(NodeList *l, int top);
|
void typechecklist(NodeList *l, int top);
|
||||||
Node* typecheckdef(Node *n);
|
Node* typecheckdef(Node *n);
|
||||||
|
@ -144,6 +144,72 @@ walkrange(Node *n)
|
|||||||
fatal("walkrange");
|
fatal("walkrange");
|
||||||
|
|
||||||
case TARRAY:
|
case TARRAY:
|
||||||
|
// Lower n into runtime·memclr if possible, for
|
||||||
|
// fast zeroing of slices and arrays (issue 5373).
|
||||||
|
// Look for instances of
|
||||||
|
//
|
||||||
|
// for i := range a {
|
||||||
|
// a[i] = zero
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// in which the evaluation of a is side-effect-free.
|
||||||
|
if(!debug['N'])
|
||||||
|
if(!flag_race)
|
||||||
|
if(v1 != N)
|
||||||
|
if(v2 == N)
|
||||||
|
if(n->nbody != nil)
|
||||||
|
if(n->nbody->n != N) // at least one statement in body
|
||||||
|
if(n->nbody->next == nil) { // at most one statement in body
|
||||||
|
tmp = n->nbody->n; // first statement of body
|
||||||
|
if(tmp->op == OAS)
|
||||||
|
if(tmp->left->op == OINDEX)
|
||||||
|
if(samesafeexpr(tmp->left->left, a))
|
||||||
|
if(samesafeexpr(tmp->left->right, v1))
|
||||||
|
if(t->type->width > 0)
|
||||||
|
if(iszero(tmp->right)) {
|
||||||
|
// Convert to
|
||||||
|
// if len(a) != 0 {
|
||||||
|
// hp = &a[0]
|
||||||
|
// hn = len(a)*sizeof(elem(a))
|
||||||
|
// memclr(hp, hn)
|
||||||
|
// i = len(a) - 1
|
||||||
|
// }
|
||||||
|
n->op = OIF;
|
||||||
|
n->nbody = nil;
|
||||||
|
n->ntest = nod(ONE, nod(OLEN, a, N), nodintconst(0));
|
||||||
|
n->nincr = nil;
|
||||||
|
|
||||||
|
// hp = &a[0]
|
||||||
|
hp = temp(ptrto(types[TUINT8]));
|
||||||
|
tmp = nod(OINDEX, a, nodintconst(0));
|
||||||
|
tmp->bounded = 1;
|
||||||
|
tmp = nod(OADDR, tmp, N);
|
||||||
|
tmp = nod(OCONVNOP, tmp, N);
|
||||||
|
n->nbody = list(n->nbody, nod(OAS, hp, tmp));
|
||||||
|
|
||||||
|
// hn = len(a) * sizeof(elem(a))
|
||||||
|
hn = temp(types[TUINTPTR]);
|
||||||
|
tmp = nod(OLEN, a, N);
|
||||||
|
tmp = nod(OMUL, tmp, nodintconst(t->type->width));
|
||||||
|
tmp = conv(tmp, types[TUINTPTR]);
|
||||||
|
n->nbody = list(n->nbody, nod(OAS, hn, tmp));
|
||||||
|
|
||||||
|
// memclr(hp, hn)
|
||||||
|
fn = mkcall("memclr", T, nil, hp, hn);
|
||||||
|
n->nbody = list(n->nbody, fn);
|
||||||
|
|
||||||
|
// i = len(a) - 1
|
||||||
|
v1 = nod(OAS, v1, nod(OSUB, nod(OLEN, a, N), nodintconst(1)));
|
||||||
|
n->nbody = list(n->nbody, v1);
|
||||||
|
|
||||||
|
typecheck(&n->ntest, Erv);
|
||||||
|
typechecklist(n->nbody, Etop);
|
||||||
|
walkstmt(&n);
|
||||||
|
lineno = lno;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// orderstmt arranged for a copy of the array/slice variable if needed.
|
// orderstmt arranged for a copy of the array/slice variable if needed.
|
||||||
ha = a;
|
ha = a;
|
||||||
hv1 = temp(types[TINT]);
|
hv1 = temp(types[TINT]);
|
||||||
|
@ -160,6 +160,7 @@ func block()
|
|||||||
func makeslice(typ *byte, nel int64, cap int64) (ary []any)
|
func makeslice(typ *byte, nel int64, cap int64) (ary []any)
|
||||||
func growslice(typ *byte, old []any, n int64) (ary []any)
|
func growslice(typ *byte, old []any, n int64) (ary []any)
|
||||||
func memmove(to *any, frm *any, length uintptr)
|
func memmove(to *any, frm *any, length uintptr)
|
||||||
|
func memclr(ptr *byte, length uintptr)
|
||||||
|
|
||||||
func memequal(x, y *any, size uintptr) bool
|
func memequal(x, y *any, size uintptr) bool
|
||||||
func memequal8(x, y *any) bool
|
func memequal8(x, y *any) bool
|
||||||
|
@ -2831,7 +2831,7 @@ checkassignlist(NodeList *l)
|
|||||||
|
|
||||||
// Check whether l and r are the same side effect-free expression,
|
// Check whether l and r are the same side effect-free expression,
|
||||||
// so that it is safe to reuse one instead of computing both.
|
// so that it is safe to reuse one instead of computing both.
|
||||||
static int
|
int
|
||||||
samesafeexpr(Node *l, Node *r)
|
samesafeexpr(Node *l, Node *r)
|
||||||
{
|
{
|
||||||
if(l->op != r->op || !eqtype(l->type, r->type))
|
if(l->op != r->op || !eqtype(l->type, r->type))
|
||||||
|
@ -162,6 +162,20 @@ func BenchmarkMemclr256(b *testing.B) { bmMemclr(b, 256) }
|
|||||||
func BenchmarkMemclr4096(b *testing.B) { bmMemclr(b, 4096) }
|
func BenchmarkMemclr4096(b *testing.B) { bmMemclr(b, 4096) }
|
||||||
func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
|
func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
|
||||||
|
|
||||||
|
func bmGoMemclr(b *testing.B, n int) {
|
||||||
|
x := make([]byte, n)
|
||||||
|
b.SetBytes(int64(n))
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
for j := range x {
|
||||||
|
x[j] = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func BenchmarkGoMemclr5(b *testing.B) { bmGoMemclr(b, 5) }
|
||||||
|
func BenchmarkGoMemclr16(b *testing.B) { bmGoMemclr(b, 16) }
|
||||||
|
func BenchmarkGoMemclr64(b *testing.B) { bmGoMemclr(b, 64) }
|
||||||
|
func BenchmarkGoMemclr256(b *testing.B) { bmGoMemclr(b, 256) }
|
||||||
|
|
||||||
func BenchmarkClearFat8(b *testing.B) {
|
func BenchmarkClearFat8(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
var x [8 / 4]uint32
|
var x [8 / 4]uint32
|
||||||
|
71
test/fixedbugs/issue5373.go
Normal file
71
test/fixedbugs/issue5373.go
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
// run
|
||||||
|
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Ensure that zeroing range loops have the requisite side-effects.
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func check(n int) {
|
||||||
|
// When n == 0, i is untouched by the range loop.
|
||||||
|
// Picking an initial value of -1 for i makes the
|
||||||
|
// "want" calculation below correct in all cases.
|
||||||
|
i := -1
|
||||||
|
s := make([]byte, n)
|
||||||
|
for i = range s {
|
||||||
|
s[i] = 0
|
||||||
|
}
|
||||||
|
if want := n - 1; i != want {
|
||||||
|
fmt.Printf("index after range with side-effect = %d want %d\n", i, want)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
i = n + 1
|
||||||
|
// i is shadowed here, so its value should be unchanged.
|
||||||
|
for i := range s {
|
||||||
|
s[i] = 0
|
||||||
|
}
|
||||||
|
if want := n + 1; i != want {
|
||||||
|
fmt.Printf("index after range without side-effect = %d want %d\n", i, want)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index variable whose evaluation has side-effects
|
||||||
|
var x int
|
||||||
|
f := func() int {
|
||||||
|
x++
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var a [1]int
|
||||||
|
for a[f()] = range s {
|
||||||
|
s[a[f()]] = 0
|
||||||
|
}
|
||||||
|
if want := n * 2; x != want {
|
||||||
|
fmt.Printf("index function calls = %d want %d\n", x, want)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Range expression whose evaluation has side-effects
|
||||||
|
x = 0
|
||||||
|
b := [1][]byte{s}
|
||||||
|
for i := range b[f()] {
|
||||||
|
b[f()][i] = 0
|
||||||
|
}
|
||||||
|
if want := n + 1; x != n+1 {
|
||||||
|
fmt.Printf("range expr function calls = %d want %d\n", x, want)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
check(0)
|
||||||
|
check(1)
|
||||||
|
check(15)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user