1
0
mirror of https://github.com/golang/go synced 2024-11-23 14:00:03 -07:00
go/test/codegen/memops.go
David Chase e4e192484b cmd/compile: split up the addressing mode on OpAMD64CMP*loadidx* always
Benchmarking suggests that the combo instruction is notably slower,
at least in the places where we measure.

Updates #37955

Change-Id: I829f1975dd6edf38163128ba51d84604055512f4
Reviewed-on: https://go-review.googlesource.com/c/go/+/228157
Run-TryBot: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2020-04-15 18:09:14 +00:00

306 lines
8.5 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
var x [2]bool
var x8 [2]uint8
var x16 [2]uint16
var x32 [2]uint32
var x64 [2]uint64
func compMem1() int {
// amd64:`CMPB\t"".x\+1\(SB\), [$]0`
if x[1] {
return 1
}
// amd64:`CMPB\t"".x8\+1\(SB\), [$]7`
if x8[1] == 7 {
return 1
}
// amd64:`CMPW\t"".x16\+2\(SB\), [$]7`
if x16[1] == 7 {
return 1
}
// amd64:`CMPL\t"".x32\+4\(SB\), [$]7`
if x32[1] == 7 {
return 1
}
// amd64:`CMPQ\t"".x64\+8\(SB\), [$]7`
if x64[1] == 7 {
return 1
}
return 0
}
//go:noinline
func f(x int) bool {
return false
}
//go:noinline
func f8(x int) int8 {
return 0
}
//go:noinline
func f16(x int) int16 {
return 0
}
//go:noinline
func f32(x int) int32 {
return 0
}
//go:noinline
func f64(x int) int64 {
return 0
}
func compMem2() int {
// amd64:`CMPB\t8\(SP\), [$]0`
if f(3) {
return 1
}
// amd64:`CMPB\t8\(SP\), [$]7`
if f8(3) == 7 {
return 1
}
// amd64:`CMPW\t8\(SP\), [$]7`
if f16(3) == 7 {
return 1
}
// amd64:`CMPL\t8\(SP\), [$]7`
if f32(3) == 7 {
return 1
}
// amd64:`CMPQ\t8\(SP\), [$]7`
if f64(3) == 7 {
return 1
}
return 0
}
func compMem3(x, y *int) (int, bool) {
// We can do comparisons of a register with memory even if
// the register is used subsequently.
r := *x
// amd64:`CMPQ\t\(`
// 386:`CMPL\t\(`
return r, r < *y
}
// The following functions test that indexed load/store operations get generated.
func idxInt8(x, y []int8, i int) {
var t int8
// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
// 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
y[i+1] = t
// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
// 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
x[i+1] = 77
}
func idxInt16(x, y []int16, i int) {
var t int16
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
y[i+1] = t
// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
// 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
y[16*i+1] = t
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
x[i+1] = 77
// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
// 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
x[16*i+1] = 77
}
func idxInt32(x, y []int32, i int) {
var t int32
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[2*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[2*i+1] = t
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
// 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
x[i+1] = 77
// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
x[16*i+1] = 77
}
func idxInt64(x, y []int64, i int) {
var t int64
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
t = x[i+1]
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
t = x[16*i+1]
// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
x[i+1] = 77
// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
x[16*i+1] = 77
}
func idxFloat32(x, y []float32, i int) {
var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
y[16*i+1] = t
}
func idxFloat64(x, y []float64, i int) {
var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
t = x[16*i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
y[16*i+1] = t
}
func idxLoadPlusOp(x []int32, i int) int32 {
s := x[0]
// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s += x[i+1]
// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s -= x[i+2]
// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s *= x[i+3]
// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s &= x[i+4]
// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s |= x[i+5]
// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s ^= x[i+6]
return s
}
func idxStorePlusOp(x []int32, i int, v int32) {
// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
x[i+1] += v
// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
x[i+2] -= v
// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
x[i+3] &= v
// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
x[i+4] |= v
// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
x[i+5] ^= v
// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
x[i+6] += 77
// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
x[i+7] &= 77
// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
x[i+8] |= 77
// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
x[i+9] ^= 77
}
func idxCompare(i int) int {
// amd64: `MOVBLZX\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
if x8[i+1] < x8[0] {
return 0
}
// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
if x16[i+1] < x16[0] {
return 0
}
// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
if x16[16*i+1] < x16[0] {
return 0
}
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
if x32[i+1] < x32[0] {
return 0
}
// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
if x32[16*i+1] < x32[0] {
return 0
}
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
if x64[i+1] < x64[0] {
return 0
}
// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
if x64[16*i+1] < x64[0] {
return 0
}
// amd64: `MOVBLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
if x8[i+2] < 77 {
return 0
}
// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
if x16[i+2] < 77 {
return 0
}
// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
if x16[16*i+2] < 77 {
return 0
}
// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
if x32[i+2] < 77 {
return 0
}
// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
if x32[16*i+2] < 77 {
return 0
}
// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
if x64[i+2] < 77 {
return 0
}
// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
if x64[16*i+2] < 77 {
return 0
}
return 1
}