mirror of
https://github.com/golang/go
synced 2024-11-23 12:40:11 -07:00
8dbd9afbb0
This adds some improvements to the rules for PPC64 to eliminate unnecessary zero or sign extends, and fix some rule for truncates which were not always using the correct sign instruction. This reduces of size of many functions by 1 or 2 instructions and can improve performance in cases where the execution time depends on small loops where at least 1 instruction was removed and where that loop contributes a significant amount of the total execution time. Included is a testcase for codegen to verify the sign/zero extend instructions are omitted. An example of the improvement (strings): IndexAnyASCII/256:1-16 392ns ± 0% 369ns ± 0% -5.79% (p=0.000 n=1+10) IndexAnyASCII/256:2-16 397ns ± 0% 376ns ± 0% -5.23% (p=0.000 n=1+9) IndexAnyASCII/256:4-16 405ns ± 0% 384ns ± 0% -5.19% (p=1.714 n=1+6) IndexAnyASCII/256:8-16 427ns ± 0% 403ns ± 0% -5.57% (p=0.000 n=1+10) IndexAnyASCII/256:16-16 441ns ± 0% 418ns ± 1% -5.33% (p=0.000 n=1+10) IndexAnyASCII/4096:1-16 5.62µs ± 0% 5.27µs ± 1% -6.31% (p=0.000 n=1+10) IndexAnyASCII/4096:2-16 5.67µs ± 0% 5.29µs ± 0% -6.67% (p=0.222 n=1+8) IndexAnyASCII/4096:4-16 5.66µs ± 0% 5.28µs ± 1% -6.66% (p=0.000 n=1+10) IndexAnyASCII/4096:8-16 5.66µs ± 0% 5.31µs ± 1% -6.10% (p=0.000 n=1+10) IndexAnyASCII/4096:16-16 5.70µs ± 0% 5.33µs ± 1% -6.43% (p=0.182 n=1+10) Change-Id: I739a6132b505936d39001aada5a978ff2a5f0500 Reviewed-on: https://go-review.googlesource.com/129875 Reviewed-by: David Chase <drchase@google.com>
214 lines
4.7 KiB
Go
214 lines
4.7 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
var sval64 [8]int64
|
|
var sval32 [8]int32
|
|
var sval16 [8]int16
|
|
var sval8 [8]int8
|
|
var val64 [8]uint64
|
|
var val32 [8]uint32
|
|
var val16 [8]uint16
|
|
var val8 [8]uint8
|
|
|
|
// ----------------------------- //
|
|
// avoid zero/sign extensions //
|
|
// ----------------------------- //
|
|
|
|
func set16(x8 int8, u8 uint8, y8 int8, z8 uint8) {
|
|
// Truncate not needed, load does sign/zero extend
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
sval16[0] = int16(x8)
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
val16[0] = uint16(u8)
|
|
|
|
// AND not needed due to size
|
|
// ppc64le:-"ANDCC"
|
|
sval16[1] = 255 & int16(x8+y8)
|
|
|
|
// ppc64le:-"ANDCC"
|
|
val16[1] = 255 & uint16(u8+z8)
|
|
|
|
}
|
|
func shiftidx(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
sval16[0] = int16(val16[x8>>1])
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
val16[0] = uint16(sval16[u8>>2])
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
sval16[1] = int16(val16[x16>>1])
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
val16[1] = uint16(sval16[u16>>2])
|
|
|
|
}
|
|
|
|
func setnox(x8 int8, u8 uint8, y8 int8, z8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
|
|
// Truncate not needed due to sign/zero extension on load
|
|
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
sval16[0] = int16(x8)
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
val16[0] = uint16(u8)
|
|
|
|
// AND not needed due to size
|
|
// ppc64le:-"ANDCC"
|
|
sval16[1] = 255 & int16(x8+y8)
|
|
|
|
// ppc64le:-"ANDCC"
|
|
val16[1] = 255 & uint16(u8+z8)
|
|
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
sval32[0] = int32(x8)
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
sval32[1] = int32(x16)
|
|
|
|
//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
val32[0] = uint32(u8)
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
val32[1] = uint32(u16)
|
|
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
sval64[0] = int64(x8)
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
sval64[1] = int64(x16)
|
|
|
|
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
|
|
sval64[2] = int64(x32)
|
|
|
|
//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
val64[0] = uint64(u8)
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
val64[1] = uint64(u16)
|
|
|
|
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
|
|
val64[2] = uint64(u32)
|
|
}
|
|
|
|
func cmp16(x8 int8, u8 uint8, x32 int32, u32 uint32, x64 int64, u64 uint64) bool {
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
if int16(x8) == sval16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
if uint16(u8) == val16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if uint16(u32>>16) == val16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if uint16(u64>>48) == val16[0] {
|
|
return true
|
|
}
|
|
|
|
// Verify the truncates are using the correct sign.
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if int16(x32) == sval16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
if uint16(u32) == val16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if int16(x64) == sval16[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
if uint16(u64) == val16[0] {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func cmp32(x8 int8, u8 uint8, x16 int16, u16 uint16, x64 int64, u64 uint64) bool {
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
if int32(x8) == sval32[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
if uint32(u8) == val32[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
if int32(x16) == sval32[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if uint32(u16) == val32[0] {
|
|
return true
|
|
}
|
|
|
|
// Verify the truncates are using the correct sign.
|
|
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
|
|
if int32(x64) == sval32[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
|
|
if uint32(u64) == val32[0] {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
|
|
func cmp64(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) bool {
|
|
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
|
|
if int64(x8) == sval64[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
|
|
if uint64(u8) == val64[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
|
|
if int64(x16) == sval64[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
|
|
if uint64(u16) == val64[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
|
|
if int64(x32) == sval64[0] {
|
|
return true
|
|
}
|
|
|
|
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
|
|
if uint64(u32) == val64[0] {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|