2016-03-01 15:57:46 -07:00
|
|
|
// Copyright 2013 The Go Authors. All rights reserved.
|
2013-05-17 13:53:49 -06:00
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package runtime_test
|
|
|
|
|
|
|
|
import (
|
2016-09-21 19:05:39 -06:00
|
|
|
"crypto/rand"
|
2016-05-25 07:44:39 -06:00
|
|
|
"fmt"
|
2016-09-21 19:05:39 -06:00
|
|
|
"internal/race"
|
2014-02-06 18:43:22 -07:00
|
|
|
. "runtime"
|
2013-05-17 13:53:49 -06:00
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestMemmove(t *testing.T) {
|
|
|
|
size := 256
|
|
|
|
if testing.Short() {
|
|
|
|
size = 128 + 16
|
|
|
|
}
|
|
|
|
src := make([]byte, size)
|
|
|
|
dst := make([]byte, size)
|
|
|
|
for i := 0; i < size; i++ {
|
|
|
|
src[i] = byte(128 + (i & 127))
|
|
|
|
}
|
|
|
|
for i := 0; i < size; i++ {
|
|
|
|
dst[i] = byte(i & 127)
|
|
|
|
}
|
|
|
|
for n := 0; n <= size; n++ {
|
|
|
|
for x := 0; x <= size-n; x++ { // offset in src
|
|
|
|
for y := 0; y <= size-n; y++ { // offset in dst
|
|
|
|
copy(dst[y:y+n], src[x:x+n])
|
|
|
|
for i := 0; i < y; i++ {
|
|
|
|
if dst[i] != byte(i&127) {
|
|
|
|
t.Fatalf("prefix dst[%d] = %d", i, dst[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for i := y; i < y+n; i++ {
|
|
|
|
if dst[i] != byte(128+((i-y+x)&127)) {
|
|
|
|
t.Fatalf("copied dst[%d] = %d", i, dst[i])
|
|
|
|
}
|
|
|
|
dst[i] = byte(i & 127) // reset dst
|
|
|
|
}
|
|
|
|
for i := y + n; i < size; i++ {
|
|
|
|
if dst[i] != byte(i&127) {
|
|
|
|
t.Fatalf("suffix dst[%d] = %d", i, dst[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestMemmoveAlias(t *testing.T) {
|
|
|
|
size := 256
|
|
|
|
if testing.Short() {
|
|
|
|
size = 128 + 16
|
|
|
|
}
|
|
|
|
buf := make([]byte, size)
|
|
|
|
for i := 0; i < size; i++ {
|
|
|
|
buf[i] = byte(i)
|
|
|
|
}
|
|
|
|
for n := 0; n <= size; n++ {
|
|
|
|
for x := 0; x <= size-n; x++ { // src offset
|
|
|
|
for y := 0; y <= size-n; y++ { // dst offset
|
|
|
|
copy(buf[y:y+n], buf[x:x+n])
|
|
|
|
for i := 0; i < y; i++ {
|
|
|
|
if buf[i] != byte(i) {
|
|
|
|
t.Fatalf("prefix buf[%d] = %d", i, buf[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for i := y; i < y+n; i++ {
|
|
|
|
if buf[i] != byte(i-y+x) {
|
|
|
|
t.Fatalf("copied buf[%d] = %d", i, buf[i])
|
|
|
|
}
|
|
|
|
buf[i] = byte(i) // reset buf
|
|
|
|
}
|
|
|
|
for i := y + n; i < size; i++ {
|
|
|
|
if buf[i] != byte(i) {
|
|
|
|
t.Fatalf("suffix buf[%d] = %d", i, buf[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-21 19:05:39 -06:00
|
|
|
func TestMemmoveLarge0x180000(t *testing.T) {
|
|
|
|
if race.Enabled {
|
|
|
|
t.Skip("skipping large memmove test under race detector")
|
|
|
|
}
|
|
|
|
testSize(t, 0x180000)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestMemmoveOverlapLarge0x120000(t *testing.T) {
|
|
|
|
if race.Enabled {
|
|
|
|
t.Skip("skipping large memmove test under race detector")
|
|
|
|
}
|
|
|
|
testOverlap(t, 0x120000)
|
|
|
|
}
|
|
|
|
|
|
|
|
func testSize(t *testing.T, size int) {
|
|
|
|
src := make([]byte, size)
|
|
|
|
dst := make([]byte, size)
|
|
|
|
_, _ = rand.Read(src)
|
|
|
|
_, _ = rand.Read(dst)
|
|
|
|
|
|
|
|
ref := make([]byte, size)
|
|
|
|
copyref(ref, dst)
|
|
|
|
|
|
|
|
for n := size - 50; n > 1; n >>= 1 {
|
|
|
|
for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
|
|
|
|
for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
|
|
|
|
copy(dst[y:y+n], src[x:x+n])
|
|
|
|
copyref(ref[y:y+n], src[x:x+n])
|
|
|
|
p := cmpb(dst, ref)
|
|
|
|
if p >= 0 {
|
|
|
|
t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, dst[p], ref[p])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func testOverlap(t *testing.T, size int) {
|
|
|
|
src := make([]byte, size)
|
|
|
|
test := make([]byte, size)
|
|
|
|
ref := make([]byte, size)
|
|
|
|
_, _ = rand.Read(src)
|
|
|
|
|
|
|
|
for n := size - 50; n > 1; n >>= 1 {
|
|
|
|
for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
|
|
|
|
for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
|
|
|
|
// Reset input
|
|
|
|
copyref(test, src)
|
|
|
|
copyref(ref, src)
|
|
|
|
copy(test[y:y+n], test[x:x+n])
|
|
|
|
if y <= x {
|
|
|
|
copyref(ref[y:y+n], ref[x:x+n])
|
|
|
|
} else {
|
|
|
|
copybw(ref[y:y+n], ref[x:x+n])
|
|
|
|
}
|
|
|
|
p := cmpb(test, ref)
|
|
|
|
if p >= 0 {
|
|
|
|
t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, test[p], ref[p])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Forward copy.
|
|
|
|
func copyref(dst, src []byte) {
|
|
|
|
for i, v := range src {
|
|
|
|
dst[i] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Backwards copy
|
|
|
|
func copybw(dst, src []byte) {
|
|
|
|
if len(src) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for i := len(src) - 1; i >= 0; i-- {
|
|
|
|
dst[i] = src[i]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns offset of difference
|
|
|
|
func matchLen(a, b []byte, max int) int {
|
|
|
|
a = a[:max]
|
|
|
|
b = b[:max]
|
|
|
|
for i, av := range a {
|
|
|
|
if b[i] != av {
|
|
|
|
return i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return max
|
|
|
|
}
|
|
|
|
|
|
|
|
func cmpb(a, b []byte) int {
|
|
|
|
l := matchLen(a, b, len(a))
|
|
|
|
if l == len(a) {
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
return l
|
|
|
|
}
|
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) {
|
|
|
|
for _, n := range sizes {
|
|
|
|
b.Run(fmt.Sprint(n), func(b *testing.B) {
|
|
|
|
b.SetBytes(int64(n))
|
|
|
|
fn(b, n)
|
|
|
|
})
|
2013-05-17 13:53:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
var bufSizes = []int{
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
|
|
|
32, 64, 128, 256, 512, 1024, 2048, 4096,
|
2015-09-21 20:34:39 -06:00
|
|
|
}
|
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func BenchmarkMemmove(b *testing.B) {
|
|
|
|
benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
|
|
|
|
x := make([]byte, n)
|
|
|
|
y := make([]byte, n)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
copy(x, y)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2016-03-06 17:58:30 -07:00
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func BenchmarkMemmoveUnalignedDst(b *testing.B) {
|
|
|
|
benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
|
|
|
|
x := make([]byte, n+1)
|
|
|
|
y := make([]byte, n)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
copy(x[1:], y)
|
|
|
|
}
|
|
|
|
})
|
2016-03-06 17:58:30 -07:00
|
|
|
}
|
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func BenchmarkMemmoveUnalignedSrc(b *testing.B) {
|
|
|
|
benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
|
|
|
|
x := make([]byte, n)
|
|
|
|
y := make([]byte, n+1)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
copy(x, y[1:])
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2015-09-21 20:34:39 -06:00
|
|
|
|
2014-02-06 18:43:22 -07:00
|
|
|
func TestMemclr(t *testing.T) {
|
|
|
|
size := 512
|
|
|
|
if testing.Short() {
|
|
|
|
size = 128 + 16
|
|
|
|
}
|
|
|
|
mem := make([]byte, size)
|
|
|
|
for i := 0; i < size; i++ {
|
|
|
|
mem[i] = 0xee
|
|
|
|
}
|
|
|
|
for n := 0; n < size; n++ {
|
|
|
|
for x := 0; x <= size-n; x++ { // offset in mem
|
|
|
|
MemclrBytes(mem[x : x+n])
|
|
|
|
for i := 0; i < x; i++ {
|
|
|
|
if mem[i] != 0xee {
|
|
|
|
t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for i := x; i < x+n; i++ {
|
|
|
|
if mem[i] != 0 {
|
|
|
|
t.Fatalf("failed clear mem[%d] = %d", i, mem[i])
|
|
|
|
}
|
|
|
|
mem[i] = 0xee
|
|
|
|
}
|
|
|
|
for i := x + n; i < size; i++ {
|
|
|
|
if mem[i] != 0xee {
|
|
|
|
t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func BenchmarkMemclr(b *testing.B) {
|
|
|
|
for _, n := range []int{5, 16, 64, 256, 4096, 65536} {
|
|
|
|
x := make([]byte, n)
|
|
|
|
b.Run(fmt.Sprint(n), func(b *testing.B) {
|
|
|
|
b.SetBytes(int64(n))
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
MemclrBytes(x)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
for _, m := range []int{1, 4, 8, 16, 64} {
|
|
|
|
x := make([]byte, m<<20)
|
|
|
|
b.Run(fmt.Sprint(m, "M"), func(b *testing.B) {
|
|
|
|
b.SetBytes(int64(m << 20))
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
MemclrBytes(x)
|
|
|
|
}
|
|
|
|
})
|
2014-02-06 18:43:22 -07:00
|
|
|
}
|
|
|
|
}
|
2014-04-01 13:51:02 -06:00
|
|
|
|
2016-05-25 07:44:39 -06:00
|
|
|
func BenchmarkGoMemclr(b *testing.B) {
|
|
|
|
benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) {
|
|
|
|
x := make([]byte, n)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for j := range x {
|
|
|
|
x[j] = 0
|
|
|
|
}
|
2015-01-07 18:44:49 -07:00
|
|
|
}
|
2016-05-25 07:44:39 -06:00
|
|
|
})
|
2015-01-07 18:44:49 -07:00
|
|
|
}
|
|
|
|
|
2014-07-21 12:23:29 -06:00
|
|
|
func BenchmarkClearFat8(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
var x [8 / 4]uint32
|
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat12(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
var x [12 / 4]uint32
|
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
2014-07-18 13:18:36 -06:00
|
|
|
func BenchmarkClearFat16(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [16 / 4]uint32
|
2014-07-18 13:18:36 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat24(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [24 / 4]uint32
|
2014-07-18 13:18:36 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 13:51:02 -06:00
|
|
|
func BenchmarkClearFat32(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [32 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
cmd/6g, runtime: improve duffzero throughput
It is faster to execute
MOVQ AX,(DI)
MOVQ AX,8(DI)
MOVQ AX,16(DI)
MOVQ AX,24(DI)
ADDQ $32,DI
than
STOSQ
STOSQ
STOSQ
STOSQ
However, in order to be able to jump into
the middle of a block of MOVQs, the call
site needs to pre-adjust DI.
If we're clearing a small area, the cost
of that DI pre-adjustment isn't repaid.
This CL switches the DUFFZERO implementation
to use a hybrid strategy, in which small
clears use STOSQ as before, but large clears
use mostly MOVQ/ADDQ blocks.
benchmark old ns/op new ns/op delta
BenchmarkClearFat8 0.55 0.55 +0.00%
BenchmarkClearFat12 0.82 0.83 +1.22%
BenchmarkClearFat16 0.55 0.55 +0.00%
BenchmarkClearFat24 0.82 0.82 +0.00%
BenchmarkClearFat32 2.20 1.94 -11.82%
BenchmarkClearFat40 1.92 1.66 -13.54%
BenchmarkClearFat48 2.21 1.93 -12.67%
BenchmarkClearFat56 3.03 2.20 -27.39%
BenchmarkClearFat64 3.26 2.48 -23.93%
BenchmarkClearFat72 3.57 2.76 -22.69%
BenchmarkClearFat80 3.83 3.05 -20.37%
BenchmarkClearFat88 4.14 3.30 -20.29%
BenchmarkClearFat128 5.54 4.69 -15.34%
BenchmarkClearFat256 9.95 9.09 -8.64%
BenchmarkClearFat512 18.7 17.9 -4.28%
BenchmarkClearFat1024 36.2 35.4 -2.21%
Change-Id: Ic786406d9b3cab68d5a231688f9e66fcd1bd7103
Reviewed-on: https://go-review.googlesource.com/2585
Reviewed-by: Keith Randall <khr@golang.org>
2015-04-15 12:05:01 -06:00
|
|
|
func BenchmarkClearFat40(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
var x [40 / 4]uint32
|
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat48(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
var x [48 / 4]uint32
|
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat56(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
var x [56 / 4]uint32
|
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 13:51:02 -06:00
|
|
|
func BenchmarkClearFat64(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [64 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat128(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [128 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat256(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [256 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat512(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [512 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkClearFat1024(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
2014-07-18 13:41:20 -06:00
|
|
|
var x [1024 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
_ = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-21 12:23:29 -06:00
|
|
|
func BenchmarkCopyFat8(b *testing.B) {
|
|
|
|
var x [8 / 4]uint32
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat12(b *testing.B) {
|
|
|
|
var x [12 / 4]uint32
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
2014-07-18 13:18:36 -06:00
|
|
|
func BenchmarkCopyFat16(b *testing.B) {
|
|
|
|
var x [16 / 4]uint32
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat24(b *testing.B) {
|
|
|
|
var x [24 / 4]uint32
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 13:51:02 -06:00
|
|
|
func BenchmarkCopyFat32(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [32 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat64(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [64 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat128(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [128 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat256(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [256 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat512(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [512 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func BenchmarkCopyFat1024(b *testing.B) {
|
2014-05-07 14:17:10 -06:00
|
|
|
var x [1024 / 4]uint32
|
2014-04-01 13:51:02 -06:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
y := x
|
|
|
|
_ = y
|
|
|
|
}
|
|
|
|
}
|