mirror of
https://github.com/golang/go
synced 2024-11-14 08:10:22 -07:00
2b69ad0b3c
The CMP/CMN/TST/TEQ perform similar to SUB/ADD/AND/XOR except the result is abondoned, and only NZCV flags are affected. This CL implements further optimization with them. 1. A micro benchmark test gets more than 9% improvment. TSTTEQ-4 6.99ms ± 0% 6.35ms ± 0% -9.15% (p=0.000 n=33+36) (https://github.com/benshi001/ugo1/blob/master/tstteq2_test.go) 2. The go1 benckmark shows no regression, excluding noise. name old time/op new time/op delta BinaryTree17-4 25.7s ± 1% 25.7s ± 1% ~ (p=0.830 n=40+40) Fannkuch11-4 13.3s ± 0% 13.2s ± 0% -0.65% (p=0.000 n=40+34) FmtFprintfEmpty-4 394ns ± 0% 394ns ± 0% ~ (p=0.819 n=40+40) FmtFprintfString-4 677ns ± 0% 677ns ± 0% +0.06% (p=0.039 n=39+40) FmtFprintfInt-4 707ns ± 0% 706ns ± 0% -0.14% (p=0.000 n=40+39) FmtFprintfIntInt-4 1.04µs ± 0% 1.04µs ± 0% +0.10% (p=0.000 n=29+31) FmtFprintfPrefixedInt-4 1.10µs ± 0% 1.11µs ± 0% +0.65% (p=0.000 n=39+37) FmtFprintfFloat-4 2.27µs ± 0% 2.26µs ± 0% -0.53% (p=0.000 n=39+40) FmtManyArgs-4 3.96µs ± 0% 3.96µs ± 0% +0.10% (p=0.000 n=39+40) GobDecode-4 53.4ms ± 1% 52.8ms ± 2% -1.10% (p=0.000 n=39+39) GobEncode-4 50.3ms ± 3% 50.4ms ± 2% ~ (p=0.089 n=40+39) Gzip-4 2.62s ± 0% 2.64s ± 0% +0.60% (p=0.000 n=40+39) Gunzip-4 312ms ± 0% 312ms ± 0% +0.02% (p=0.030 n=40+39) HTTPClientServer-4 1.01ms ± 7% 0.98ms ± 7% -2.37% (p=0.000 n=40+39) JSONEncode-4 126ms ± 1% 126ms ± 1% -0.38% (p=0.004 n=39+39) JSONDecode-4 423ms ± 0% 426ms ± 2% +0.72% (p=0.001 n=39+40) Mandelbrot200-4 18.4ms ± 0% 18.4ms ± 0% +0.04% (p=0.000 n=38+40) GoParse-4 22.8ms ± 0% 22.6ms ± 0% -0.68% (p=0.000 n=35+40) RegexpMatchEasy0_32-4 699ns ± 0% 704ns ± 0% +0.73% (p=0.000 n=27+40) RegexpMatchEasy0_1K-4 4.27µs ± 0% 4.26µs ± 0% -0.09% (p=0.000 n=35+38) RegexpMatchEasy1_32-4 741ns ± 0% 735ns ± 0% -0.85% (p=0.000 n=40+35) RegexpMatchEasy1_1K-4 5.53µs ± 0% 5.49µs ± 0% -0.69% (p=0.000 n=39+40) RegexpMatchMedium_32-4 1.07µs ± 0% 1.04µs ± 2% -2.34% (p=0.000 n=40+40) RegexpMatchMedium_1K-4 261µs ± 0% 261µs ± 0% -0.16% (p=0.000 n=40+39) RegexpMatchHard_32-4 14.9µs ± 0% 14.9µs ± 0% -0.18% (p=0.000 n=39+40) RegexpMatchHard_1K-4 445µs ± 0% 446µs ± 0% +0.09% (p=0.000 n=36+34) Revcomp-4 41.8ms ± 1% 41.8ms ± 1% ~ (p=0.595 n=39+38) Template-4 530ms ± 1% 528ms ± 1% -0.49% (p=0.000 n=40+40) TimeParse-4 3.39µs ± 0% 3.42µs ± 0% +0.98% (p=0.000 n=36+38) TimeFormat-4 6.12µs ± 0% 6.07µs ± 0% -0.81% (p=0.000 n=34+38) [Geo mean] 384µs 383µs -0.24% name old speed new speed delta GobDecode-4 14.4MB/s ± 1% 14.5MB/s ± 2% +1.11% (p=0.000 n=39+39) GobEncode-4 15.3MB/s ± 3% 15.2MB/s ± 2% ~ (p=0.104 n=40+39) Gzip-4 7.40MB/s ± 1% 7.36MB/s ± 0% -0.60% (p=0.000 n=40+39) Gunzip-4 62.2MB/s ± 0% 62.1MB/s ± 0% -0.02% (p=0.047 n=40+39) JSONEncode-4 15.4MB/s ± 1% 15.4MB/s ± 2% +0.39% (p=0.002 n=39+39) JSONDecode-4 4.59MB/s ± 0% 4.56MB/s ± 2% -0.71% (p=0.000 n=39+40) GoParse-4 2.54MB/s ± 0% 2.56MB/s ± 0% +0.72% (p=0.000 n=26+40) RegexpMatchEasy0_32-4 45.8MB/s ± 0% 45.4MB/s ± 0% -0.75% (p=0.000 n=38+40) RegexpMatchEasy0_1K-4 240MB/s ± 0% 240MB/s ± 0% +0.09% (p=0.000 n=35+38) RegexpMatchEasy1_32-4 43.1MB/s ± 0% 43.5MB/s ± 0% +0.84% (p=0.000 n=40+39) RegexpMatchEasy1_1K-4 185MB/s ± 0% 186MB/s ± 0% +0.69% (p=0.000 n=39+40) RegexpMatchMedium_32-4 936kB/s ± 1% 959kB/s ± 2% +2.38% (p=0.000 n=40+40) RegexpMatchMedium_1K-4 3.92MB/s ± 0% 3.93MB/s ± 0% +0.18% (p=0.000 n=39+40) RegexpMatchHard_32-4 2.15MB/s ± 0% 2.15MB/s ± 0% +0.19% (p=0.000 n=40+40) RegexpMatchHard_1K-4 2.30MB/s ± 0% 2.30MB/s ± 0% ~ (all equal) Revcomp-4 60.8MB/s ± 1% 60.8MB/s ± 1% ~ (p=0.600 n=39+38) Template-4 3.66MB/s ± 1% 3.68MB/s ± 1% +0.46% (p=0.000 n=40+40) [Geo mean] 12.8MB/s 12.8MB/s +0.27% Change-Id: I849161169ecf0876a04b7c1d3990fa8d1435215e Reviewed-on: https://go-review.googlesource.com/122855 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
172 lines
3.5 KiB
Go
172 lines
3.5 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "unsafe"
|
|
|
|
// This file contains code generation tests related to the comparison
|
|
// operators.
|
|
|
|
// -------------- //
|
|
// Equality //
|
|
// -------------- //
|
|
|
|
// Check that compare to constant string use 2/4/8 byte compares
|
|
|
|
func CompareString1(s string) bool {
|
|
// amd64:`CMPW\t\(.*\), [$]`
|
|
// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
|
|
// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xx"
|
|
}
|
|
|
|
func CompareString2(s string) bool {
|
|
// amd64:`CMPL\t\(.*\), [$]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xxxx"
|
|
}
|
|
|
|
func CompareString3(s string) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
// arm64:-`CMPW\t`
|
|
// ppc64le:-`CMPW\t`
|
|
// s390x:-`CMPW\t`
|
|
return s == "xxxxxxxx"
|
|
}
|
|
|
|
// Check that arrays compare use 2/4/8 byte compares
|
|
|
|
func CompareArray1(a, b [2]byte) bool {
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// arm64:-`MOVBU\t`
|
|
// ppc64le:-`MOVBZ\t`
|
|
// s390x:-`MOVBZ\t`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray2(a, b [3]uint16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray3(a, b [3]int16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray4(a, b [12]int8) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray5(a, b [15]byte) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
// This was a TODO in mapaccess1_faststr
|
|
func CompareArray6(a, b unsafe.Pointer) bool {
|
|
// amd64:`CMPL\t\(.*\), [A-Z]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
return *((*[4]byte)(a)) != *((*[4]byte)(b))
|
|
}
|
|
|
|
// -------------- //
|
|
// Ordering //
|
|
// -------------- //
|
|
|
|
// Test that LEAQ/ADDQconst are folded into SETx ops
|
|
|
|
func CmpFold(x uint32) bool {
|
|
// amd64:`SETHI\t.*\(SP\)`
|
|
return x > 4
|
|
}
|
|
|
|
// Test that direct comparisons with memory are generated when
|
|
// possible
|
|
|
|
func CmpMem1(p int, q *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return p < *q
|
|
}
|
|
|
|
func CmpMem2(p *int, q int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return *p < q
|
|
}
|
|
|
|
func CmpMem3(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return *p < 7
|
|
}
|
|
|
|
func CmpMem4(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return 7 < *p
|
|
}
|
|
|
|
func CmpMem5(p **int) {
|
|
// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
|
|
*p = nil
|
|
}
|
|
|
|
func CmpMem6(a []int) int {
|
|
// 386:`CMPL\s8\([A-Z]+\),`
|
|
// amd64:`CMPQ\s16\([A-Z]+\),`
|
|
if a[1] > a[2] {
|
|
return 1
|
|
} else {
|
|
return 2
|
|
}
|
|
}
|
|
|
|
// Check tbz/tbnz are generated when comparing against zero on arm64
|
|
|
|
func CmpZero1(a int32, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero2(a int64, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero3(a int32, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero4(a int64, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpToZero(a, b int32) int32 {
|
|
if a&b < 0 { // arm:`TST`,-`AND`
|
|
return 1
|
|
} else if a+b < 0 { // arm:`CMN`,-`ADD`
|
|
return 2
|
|
} else if a^b < 0 { // arm:`TEQ`,-`XOR`
|
|
return 3
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|