mirror of
https://github.com/golang/go
synced 2024-11-23 09:40:04 -07:00
f31a18ded4
Propagate values through some wide Zero/Move operations. Among other things this allows us to optimize some kinds of array initialization. For example, the following code no longer requires a temporary be allocated on the stack. Instead it writes the values directly into the return value. func f(i uint32) [4]uint32 { return [4]uint32{i, i+1, i+2, i+3} } The return value is unnecessarily cleared but removing that is probably a task for dead store analysis (I think it needs to be able to match multiple Store ops to wide Zero ops). In order to reliably remove stack variables that are rendered unnecessary by these new rules I've added a new generic version of the unread autos elimination pass. These rules are triggered more than 5000 times when building and testing the standard library. Updates #15925 (fixes for arrays of up to 4 elements). Updates #24386 (fixes for up to 4 kept elements). Updates #24416. compilebench results: name old time/op new time/op delta Template 353ms ± 5% 359ms ± 3% ~ (p=0.143 n=10+10) Unicode 219ms ± 1% 217ms ± 4% ~ (p=0.740 n=7+10) GoTypes 1.26s ± 1% 1.26s ± 2% ~ (p=0.549 n=9+10) Compiler 6.00s ± 1% 6.08s ± 1% +1.42% (p=0.000 n=9+8) SSA 15.3s ± 2% 15.6s ± 1% +2.43% (p=0.000 n=10+10) Flate 237ms ± 2% 240ms ± 2% +1.31% (p=0.015 n=10+10) GoParser 285ms ± 1% 285ms ± 1% ~ (p=0.878 n=8+8) Reflect 797ms ± 3% 807ms ± 2% ~ (p=0.065 n=9+10) Tar 334ms ± 0% 335ms ± 4% ~ (p=0.460 n=8+10) XML 419ms ± 0% 423ms ± 1% +0.91% (p=0.001 n=7+9) StdCmd 46.0s ± 0% 46.4s ± 0% +0.85% (p=0.000 n=9+9) name old user-time/op new user-time/op delta Template 337ms ± 3% 346ms ± 5% ~ (p=0.053 n=9+10) Unicode 205ms ±10% 205ms ± 8% ~ (p=1.000 n=10+10) GoTypes 1.22s ± 2% 1.21s ± 3% ~ (p=0.436 n=10+10) Compiler 5.85s ± 1% 5.93s ± 0% +1.46% (p=0.000 n=10+8) SSA 14.9s ± 1% 15.3s ± 1% +2.62% (p=0.000 n=10+10) Flate 229ms ± 4% 228ms ± 6% ~ (p=0.796 n=10+10) GoParser 271ms ± 3% 275ms ± 4% ~ (p=0.165 n=10+10) Reflect 779ms ± 5% 775ms ± 2% ~ (p=0.971 n=10+10) Tar 317ms ± 4% 319ms ± 5% ~ (p=0.853 n=10+10) XML 404ms ± 4% 409ms ± 5% ~ (p=0.436 n=10+10) name old alloc/op new alloc/op delta Template 34.9MB ± 0% 35.0MB ± 0% +0.26% (p=0.000 n=10+10) Unicode 29.3MB ± 0% 29.3MB ± 0% +0.02% (p=0.000 n=10+10) GoTypes 115MB ± 0% 115MB ± 0% +0.30% (p=0.000 n=10+10) Compiler 519MB ± 0% 521MB ± 0% +0.30% (p=0.000 n=10+10) SSA 1.55GB ± 0% 1.57GB ± 0% +1.34% (p=0.000 n=10+9) Flate 24.1MB ± 0% 24.2MB ± 0% +0.10% (p=0.000 n=10+10) GoParser 28.1MB ± 0% 28.1MB ± 0% +0.07% (p=0.000 n=10+10) Reflect 78.7MB ± 0% 78.7MB ± 0% +0.03% (p=0.000 n=8+10) Tar 34.4MB ± 0% 34.5MB ± 0% +0.12% (p=0.000 n=10+10) XML 43.2MB ± 0% 43.2MB ± 0% +0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 330k ± 0% 330k ± 0% -0.01% (p=0.017 n=10+10) Unicode 337k ± 0% 337k ± 0% +0.01% (p=0.000 n=9+10) GoTypes 1.15M ± 0% 1.15M ± 0% +0.03% (p=0.000 n=10+10) Compiler 4.77M ± 0% 4.77M ± 0% +0.03% (p=0.000 n=9+10) SSA 12.5M ± 0% 12.6M ± 0% +1.16% (p=0.000 n=10+10) Flate 221k ± 0% 221k ± 0% +0.05% (p=0.000 n=9+10) GoParser 275k ± 0% 275k ± 0% +0.01% (p=0.014 n=10+9) Reflect 944k ± 0% 944k ± 0% -0.02% (p=0.000 n=10+10) Tar 324k ± 0% 323k ± 0% -0.12% (p=0.000 n=10+10) XML 384k ± 0% 384k ± 0% -0.01% (p=0.001 n=10+10) name old object-bytes new object-bytes delta Template 476kB ± 0% 476kB ± 0% -0.04% (p=0.000 n=10+10) Unicode 218kB ± 0% 218kB ± 0% ~ (all equal) GoTypes 1.58MB ± 0% 1.58MB ± 0% -0.04% (p=0.000 n=10+10) Compiler 6.25MB ± 0% 6.24MB ± 0% -0.09% (p=0.000 n=10+10) SSA 15.9MB ± 0% 16.1MB ± 0% +1.22% (p=0.000 n=10+10) Flate 304kB ± 0% 304kB ± 0% -0.13% (p=0.000 n=10+10) GoParser 370kB ± 0% 370kB ± 0% -0.00% (p=0.000 n=10+10) Reflect 1.27MB ± 0% 1.27MB ± 0% -0.12% (p=0.000 n=10+10) Tar 421kB ± 0% 419kB ± 0% -0.64% (p=0.000 n=10+10) XML 518kB ± 0% 517kB ± 0% -0.12% (p=0.000 n=10+10) name old export-bytes new export-bytes delta Template 16.7kB ± 0% 16.7kB ± 0% ~ (all equal) Unicode 6.52kB ± 0% 6.52kB ± 0% ~ (all equal) GoTypes 29.2kB ± 0% 29.2kB ± 0% ~ (all equal) Compiler 88.0kB ± 0% 88.0kB ± 0% ~ (all equal) SSA 109kB ± 0% 109kB ± 0% ~ (all equal) Flate 4.49kB ± 0% 4.49kB ± 0% ~ (all equal) GoParser 8.10kB ± 0% 8.10kB ± 0% ~ (all equal) Reflect 7.71kB ± 0% 7.71kB ± 0% ~ (all equal) Tar 9.15kB ± 0% 9.15kB ± 0% ~ (all equal) XML 12.3kB ± 0% 12.3kB ± 0% ~ (all equal) name old text-bytes new text-bytes delta HelloSize 676kB ± 0% 672kB ± 0% -0.59% (p=0.000 n=10+10) CmdGoSize 7.26MB ± 0% 7.24MB ± 0% -0.18% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 10.2kB ± 0% 10.2kB ± 0% ~ (all equal) CmdGoSize 248kB ± 0% 248kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.46MB ± 0% 1.45MB ± 0% -0.31% (p=0.000 n=10+10) CmdGoSize 14.7MB ± 0% 14.7MB ± 0% -0.17% (p=0.000 n=10+10) Change-Id: Ic72b0c189dd542f391e1c9ab88a76e9148dc4285 Reviewed-on: https://go-review.googlesource.com/106495 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
96 lines
2.3 KiB
Go
96 lines
2.3 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "runtime"
|
|
|
|
// This file contains code generation tests related to the use of the
|
|
// stack.
|
|
|
|
// Check that stack stores are optimized away.
|
|
|
|
// 386:"TEXT\t.*, [$]0-"
|
|
// amd64:"TEXT\t.*, [$]0-"
|
|
// arm:"TEXT\t.*, [$]-4-"
|
|
// arm64:"TEXT\t.*, [$]-8-"
|
|
// mips:"TEXT\t.*, [$]-4-"
|
|
// ppc64le:"TEXT\t.*, [$]0-"
|
|
// s390x:"TEXT\t.*, [$]0-"
|
|
func StackStore() int {
|
|
var x int
|
|
return *(&x)
|
|
}
|
|
|
|
type T struct {
|
|
A, B, C, D int // keep exported fields
|
|
x, y, z int // reset unexported fields
|
|
}
|
|
|
|
// Check that large structs are cleared directly (issue #24416).
|
|
|
|
// 386:"TEXT\t.*, [$]0-"
|
|
// amd64:"TEXT\t.*, [$]0-"
|
|
// arm:"TEXT\t.*, [$]0-" (spills return address)
|
|
// arm64:"TEXT\t.*, [$]-8-"
|
|
// mips:"TEXT\t.*, [$]-4-"
|
|
// ppc64le:"TEXT\t.*, [$]0-"
|
|
// s390x:"TEXT\t.*, [$]0-"
|
|
func ZeroLargeStruct(x *T) {
|
|
t := T{}
|
|
*x = t
|
|
}
|
|
|
|
// Check that structs are partially initialised directly (issue #24386).
|
|
|
|
// Notes:
|
|
// - 386 fails due to spilling a register
|
|
// amd64:"TEXT\t.*, [$]0-"
|
|
// arm:"TEXT\t.*, [$]0-" (spills return address)
|
|
// arm64:"TEXT\t.*, [$]-8-"
|
|
// ppc64le:"TEXT\t.*, [$]0-"
|
|
// s390x:"TEXT\t.*, [$]0-"
|
|
// Note: that 386 currently has to spill a register.
|
|
func KeepWanted(t *T) {
|
|
*t = T{A: t.A, B: t.B, C: t.C, D: t.D}
|
|
}
|
|
|
|
// Check that small array operations avoid using the stack (issue #15925).
|
|
|
|
// Notes:
|
|
// - 386 fails due to spilling a register
|
|
// - arm & mips fail due to softfloat calls
|
|
// amd64:"TEXT\t.*, [$]0-"
|
|
// arm64:"TEXT\t.*, [$]-8-"
|
|
// ppc64le:"TEXT\t.*, [$]0-"
|
|
// s390x:"TEXT\t.*, [$]0-"
|
|
func ArrayAdd64(a, b [4]float64) [4]float64 {
|
|
return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}
|
|
}
|
|
|
|
// Check that small array initialization avoids using the stack.
|
|
|
|
// 386:"TEXT\t.*, [$]0-"
|
|
// amd64:"TEXT\t.*, [$]0-"
|
|
// arm:"TEXT\t.*, [$]0-" (spills return address)
|
|
// arm64:"TEXT\t.*, [$]-8-"
|
|
// mips:"TEXT\t.*, [$]-4-"
|
|
// ppc64le:"TEXT\t.*, [$]0-"
|
|
// s390x:"TEXT\t.*, [$]0-"
|
|
func ArrayInit(i, j int) [4]int {
|
|
return [4]int{i, 0, j, 0}
|
|
}
|
|
|
|
// Check that assembly output has matching offset and base register
|
|
// (issue #21064).
|
|
|
|
// amd64:`.*b\+24\(SP\)`
|
|
// arm:`.*b\+4\(FP\)`
|
|
func check_asmout(a, b int) int {
|
|
runtime.GC() // use some frame
|
|
return b
|
|
}
|