mirror of
https://github.com/golang/go
synced 2024-11-17 10:24:48 -07:00
5aeecc4530
This CL optimizes arm64's NEG/MVN/TST/CMN with a shifted operand. 1. The total size of pkg/android_arm64 decreases about 0.2KB, excluding cmd/compile/ . 2. The go1 benchmark shows no regression, excluding noise. name old time/op new time/op delta BinaryTree17-4 16.4s ± 1% 16.4s ± 1% ~ (p=0.914 n=29+29) Fannkuch11-4 8.72s ± 0% 8.72s ± 0% ~ (p=0.274 n=30+29) FmtFprintfEmpty-4 174ns ± 0% 174ns ± 0% ~ (all equal) FmtFprintfString-4 370ns ± 0% 370ns ± 0% ~ (all equal) FmtFprintfInt-4 419ns ± 0% 419ns ± 0% ~ (all equal) FmtFprintfIntInt-4 672ns ± 1% 675ns ± 2% ~ (p=0.217 n=28+30) FmtFprintfPrefixedInt-4 806ns ± 0% 806ns ± 0% ~ (p=0.402 n=30+28) FmtFprintfFloat-4 1.09µs ± 0% 1.09µs ± 0% +0.02% (p=0.011 n=22+27) FmtManyArgs-4 2.67µs ± 0% 2.68µs ± 0% ~ (p=0.279 n=29+30) GobDecode-4 33.1ms ± 1% 33.1ms ± 0% ~ (p=0.052 n=28+29) GobEncode-4 29.6ms ± 0% 29.6ms ± 0% +0.08% (p=0.013 n=28+29) Gzip-4 1.38s ± 2% 1.39s ± 2% ~ (p=0.071 n=29+29) Gunzip-4 139ms ± 0% 139ms ± 0% ~ (p=0.265 n=29+29) HTTPClientServer-4 789µs ± 4% 785µs ± 4% ~ (p=0.206 n=29+28) JSONEncode-4 49.7ms ± 0% 49.6ms ± 0% -0.24% (p=0.000 n=30+30) JSONDecode-4 266ms ± 1% 267ms ± 1% +0.34% (p=0.000 n=30+30) Mandelbrot200-4 16.6ms ± 0% 16.6ms ± 0% ~ (p=0.835 n=28+30) GoParse-4 15.9ms ± 0% 15.8ms ± 0% -0.29% (p=0.000 n=27+30) RegexpMatchEasy0_32-4 380ns ± 0% 381ns ± 0% +0.18% (p=0.000 n=30+30) RegexpMatchEasy0_1K-4 1.18µs ± 0% 1.19µs ± 0% +0.23% (p=0.000 n=30+30) RegexpMatchEasy1_32-4 357ns ± 0% 358ns ± 0% +0.28% (p=0.000 n=29+29) RegexpMatchEasy1_1K-4 2.04µs ± 0% 2.04µs ± 0% +0.06% (p=0.006 n=30+30) RegexpMatchMedium_32-4 589ns ± 0% 590ns ± 0% +0.24% (p=0.000 n=28+30) RegexpMatchMedium_1K-4 162µs ± 0% 162µs ± 0% -0.01% (p=0.027 n=26+29) RegexpMatchHard_32-4 9.58µs ± 0% 9.58µs ± 0% ~ (p=0.935 n=30+30) RegexpMatchHard_1K-4 287µs ± 0% 287µs ± 0% ~ (p=0.387 n=29+30) Revcomp-4 2.50s ± 0% 2.50s ± 0% -0.10% (p=0.020 n=28+28) Template-4 310ms ± 0% 310ms ± 1% ~ (p=0.406 n=30+30) TimeParse-4 1.68µs ± 0% 1.68µs ± 0% +0.03% (p=0.014 n=30+17) TimeFormat-4 1.65µs ± 0% 1.66µs ± 0% +0.32% (p=0.000 n=27+29) [Geo mean] 247µs 247µs +0.05% name old speed new speed delta GobDecode-4 23.2MB/s ± 0% 23.2MB/s ± 0% -0.08% (p=0.032 n=27+29) GobEncode-4 26.0MB/s ± 0% 25.9MB/s ± 0% -0.10% (p=0.011 n=29+29) Gzip-4 14.1MB/s ± 2% 14.0MB/s ± 2% ~ (p=0.081 n=29+29) Gunzip-4 139MB/s ± 0% 139MB/s ± 0% ~ (p=0.290 n=29+29) JSONEncode-4 39.0MB/s ± 0% 39.1MB/s ± 0% +0.25% (p=0.000 n=29+30) JSONDecode-4 7.30MB/s ± 1% 7.28MB/s ± 1% -0.33% (p=0.000 n=30+30) GoParse-4 3.65MB/s ± 0% 3.66MB/s ± 0% +0.29% (p=0.000 n=27+30) RegexpMatchEasy0_32-4 84.1MB/s ± 0% 84.0MB/s ± 0% -0.17% (p=0.000 n=30+28) RegexpMatchEasy0_1K-4 864MB/s ± 0% 862MB/s ± 0% -0.24% (p=0.000 n=30+30) RegexpMatchEasy1_32-4 89.5MB/s ± 0% 89.3MB/s ± 0% -0.18% (p=0.000 n=28+24) RegexpMatchEasy1_1K-4 502MB/s ± 0% 502MB/s ± 0% -0.05% (p=0.008 n=30+29) RegexpMatchMedium_32-4 1.70MB/s ± 0% 1.69MB/s ± 0% -0.59% (p=0.000 n=29+30) RegexpMatchMedium_1K-4 6.31MB/s ± 0% 6.31MB/s ± 0% +0.05% (p=0.005 n=30+26) RegexpMatchHard_32-4 3.34MB/s ± 0% 3.34MB/s ± 0% ~ (all equal) RegexpMatchHard_1K-4 3.57MB/s ± 0% 3.57MB/s ± 0% ~ (all equal) Revcomp-4 102MB/s ± 0% 102MB/s ± 0% +0.10% (p=0.022 n=28+28) Template-4 6.26MB/s ± 0% 6.26MB/s ± 1% ~ (p=0.768 n=30+30) [Geo mean] 24.2MB/s 24.1MB/s -0.08% Change-Id: I494f9db7f8a568a00e9c74ae25086a58b2221683 Reviewed-on: https://go-review.googlesource.com/137976 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
212 lines
4.3 KiB
Go
212 lines
4.3 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "unsafe"
|
|
|
|
// This file contains code generation tests related to the comparison
|
|
// operators.
|
|
|
|
// -------------- //
|
|
// Equality //
|
|
// -------------- //
|
|
|
|
// Check that compare to constant string use 2/4/8 byte compares
|
|
|
|
func CompareString1(s string) bool {
|
|
// amd64:`CMPW\t\(.*\), [$]`
|
|
// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
|
|
// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xx"
|
|
}
|
|
|
|
func CompareString2(s string) bool {
|
|
// amd64:`CMPL\t\(.*\), [$]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xxxx"
|
|
}
|
|
|
|
func CompareString3(s string) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
// arm64:-`CMPW\t`
|
|
// ppc64le:-`CMPW\t`
|
|
// s390x:-`CMPW\t`
|
|
return s == "xxxxxxxx"
|
|
}
|
|
|
|
// Check that arrays compare use 2/4/8 byte compares
|
|
|
|
func CompareArray1(a, b [2]byte) bool {
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// arm64:-`MOVBU\t`
|
|
// ppc64le:-`MOVBZ\t`
|
|
// s390x:-`MOVBZ\t`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray2(a, b [3]uint16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray3(a, b [3]int16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray4(a, b [12]int8) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray5(a, b [15]byte) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
// This was a TODO in mapaccess1_faststr
|
|
func CompareArray6(a, b unsafe.Pointer) bool {
|
|
// amd64:`CMPL\t\(.*\), [A-Z]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
return *((*[4]byte)(a)) != *((*[4]byte)(b))
|
|
}
|
|
|
|
// -------------- //
|
|
// Ordering //
|
|
// -------------- //
|
|
|
|
// Test that LEAQ/ADDQconst are folded into SETx ops
|
|
|
|
func CmpFold(x uint32) bool {
|
|
// amd64:`SETHI\t.*\(SP\)`
|
|
return x > 4
|
|
}
|
|
|
|
// Test that direct comparisons with memory are generated when
|
|
// possible
|
|
|
|
func CmpMem1(p int, q *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return p < *q
|
|
}
|
|
|
|
func CmpMem2(p *int, q int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return *p < q
|
|
}
|
|
|
|
func CmpMem3(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return *p < 7
|
|
}
|
|
|
|
func CmpMem4(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return 7 < *p
|
|
}
|
|
|
|
func CmpMem5(p **int) {
|
|
// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
|
|
*p = nil
|
|
}
|
|
|
|
func CmpMem6(a []int) int {
|
|
// 386:`CMPL\s8\([A-Z]+\),`
|
|
// amd64:`CMPQ\s16\([A-Z]+\),`
|
|
if a[1] > a[2] {
|
|
return 1
|
|
} else {
|
|
return 2
|
|
}
|
|
}
|
|
|
|
// Check tbz/tbnz are generated when comparing against zero on arm64
|
|
|
|
func CmpZero1(a int32, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero2(a int64, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero3(a int32, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero4(a int64, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpToZero(a, b, d int32, e, f int64) int32 {
|
|
// arm:`TST`,-`AND`
|
|
// arm64:`TSTW`,-`AND`
|
|
// 386:`TESTL`,-`ANDL`
|
|
// amd64:`TESTL`,-`ANDL`
|
|
c0 := a&b < 0
|
|
// arm:`CMN`,-`ADD`
|
|
// arm64:`CMNW`,-`ADD`
|
|
c1 := a+b < 0
|
|
// arm:`TEQ`,-`XOR`
|
|
c2 := a^b < 0
|
|
// arm64:`TST`,-`AND`
|
|
// amd64:`TESTQ`,-`ANDQ`
|
|
c3 := e&f < 0
|
|
// arm64:`CMN`,-`ADD`
|
|
c4 := e+f < 0
|
|
// not optimized to single CMNW/CMN due to further use of b+d
|
|
// arm64:`ADD`,-`CMNW`
|
|
// arm:`ADD`,-`CMN`
|
|
c5 := b+d == 0
|
|
// not optimized to single TSTW/TST due to further use of a&d
|
|
// arm64:`AND`,-`TSTW`
|
|
// arm:`AND`,-`TST`
|
|
// 386:`ANDL`
|
|
c6 := a&d >= 0
|
|
// arm64:`TST\sR[0-9]+<<3,\sR[0-9]+`
|
|
c7 := e&(f<<3) < 0
|
|
// arm64:`CMN\sR[0-9]+<<3,\sR[0-9]+`
|
|
c8 := e+(f<<3) < 0
|
|
if c0 {
|
|
return 1
|
|
} else if c1 {
|
|
return 2
|
|
} else if c2 {
|
|
return 3
|
|
} else if c3 {
|
|
return 4
|
|
} else if c4 {
|
|
return 5
|
|
} else if c5 {
|
|
return b + d
|
|
} else if c6 {
|
|
return a & d
|
|
} else if c7 {
|
|
return 7
|
|
} else if c8 {
|
|
return 8
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|