1
0
mirror of https://github.com/golang/go synced 2024-11-13 17:30:24 -07:00

cmd/compile: improve rules for PPC64.rules

This adds some improvements to the rules for PPC64 to eliminate
unnecessary zero or sign extends, and fix some rule for truncates
which were not always using the correct sign instruction.

This reduces of size of many functions by 1 or 2 instructions and
can improve performance in cases where the execution time depends
on small loops where at least 1 instruction was removed and where that
loop contributes a significant amount of the total execution time.

Included is a testcase for codegen to verify the sign/zero extend
instructions are omitted.

An example of the improvement (strings):
IndexAnyASCII/256:1-16     392ns ± 0%   369ns ± 0%  -5.79%  (p=0.000 n=1+10)
IndexAnyASCII/256:2-16     397ns ± 0%   376ns ± 0%  -5.23%  (p=0.000 n=1+9)
IndexAnyASCII/256:4-16     405ns ± 0%   384ns ± 0%  -5.19%  (p=1.714 n=1+6)
IndexAnyASCII/256:8-16     427ns ± 0%   403ns ± 0%  -5.57%  (p=0.000 n=1+10)
IndexAnyASCII/256:16-16    441ns ± 0%   418ns ± 1%  -5.33%  (p=0.000 n=1+10)
IndexAnyASCII/4096:1-16   5.62µs ± 0%  5.27µs ± 1%  -6.31%  (p=0.000 n=1+10)
IndexAnyASCII/4096:2-16   5.67µs ± 0%  5.29µs ± 0%  -6.67%  (p=0.222 n=1+8)
IndexAnyASCII/4096:4-16   5.66µs ± 0%  5.28µs ± 1%  -6.66%  (p=0.000 n=1+10)
IndexAnyASCII/4096:8-16   5.66µs ± 0%  5.31µs ± 1%  -6.10%  (p=0.000 n=1+10)
IndexAnyASCII/4096:16-16  5.70µs ± 0%  5.33µs ± 1%  -6.43%  (p=0.182 n=1+10)

Change-Id: I739a6132b505936d39001aada5a978ff2a5f0500
Reviewed-on: https://go-review.googlesource.com/129875
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Lynn Boger 2018-08-15 17:34:06 -04:00
parent 8eb36ae9c7
commit 8dbd9afbb0
3 changed files with 1402 additions and 31 deletions

View File

@ -660,14 +660,51 @@
(MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF -> y (MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF -> y
// small and of zero-extend -> either zero-extend or small and // small and of zero-extend -> either zero-extend or small and
// degenerate-and
(ANDconst [c] y:(MOVBZreg _)) && c&0xFF == 0xFF -> y (ANDconst [c] y:(MOVBZreg _)) && c&0xFF == 0xFF -> y
(ANDconst [0xFF] y:(MOVBreg _)) -> y
(ANDconst [c] y:(MOVHZreg _)) && c&0xFFFF == 0xFFFF -> y (ANDconst [c] y:(MOVHZreg _)) && c&0xFFFF == 0xFFFF -> y
(ANDconst [c] y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF -> y (ANDconst [0xFFFF] y:(MOVHreg _)) -> y
// normal case
(ANDconst [c] (MOVBZreg x)) -> (ANDconst [c&0xFF] x) (AND (MOVDconst [c]) y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF -> y
(ANDconst [c] (MOVHZreg x)) -> (ANDconst [c&0xFFFF] x) (AND (MOVDconst [0xFFFFFFFF]) y:(MOVWreg x)) -> (MOVWZreg x)
(ANDconst [c] (MOVWZreg x)) -> (ANDconst [c&0xFFFFFFFF] x) // normal case
(ANDconst [c] (MOV(B|BZ)reg x)) -> (ANDconst [c&0xFF] x)
(ANDconst [c] (MOV(H|HZ)reg x)) -> (ANDconst [c&0xFFFF] x)
(ANDconst [c] (MOV(W|WZ)reg x)) -> (ANDconst [c&0xFFFFFFFF] x)
// Eliminate unnecessary sign/zero extend following right shift
(MOV(B|H|W)Zreg (SRWconst [c] (MOVBZreg x))) -> (SRWconst [c] (MOVBZreg x))
(MOV(H|W)Zreg (SRWconst [c] (MOVHZreg x))) -> (SRWconst [c] (MOVHZreg x))
(MOVWZreg (SRWconst [c] (MOVWZreg x))) -> (SRWconst [c] (MOVWZreg x))
(MOV(B|H|W)reg (SRAWconst [c] (MOVBreg x))) -> (SRAWconst [c] (MOVBreg x))
(MOV(H|W)reg (SRAWconst [c] (MOVHreg x))) -> (SRAWconst [c] (MOVHreg x))
(MOVWreg (SRAWconst [c] (MOVWreg x))) -> (SRAWconst [c] (MOVWreg x))
(MOVWZreg (SRWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRWconst [c] x)
(MOVHZreg (SRWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRWconst [c] x)
(MOVBZreg (SRWconst [c] x)) && sizeof(x.Type) == 8 -> (SRWconst [c] x)
(MOVWreg (SRAWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRAWconst [c] x)
(MOVHreg (SRAWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRAWconst [c] x)
(MOVBreg (SRAWconst [c] x)) && sizeof(x.Type) == 8 -> (SRAWconst [c] x)
// initial right shift will handle sign/zero extend
(MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
(MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)
(MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
(MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)
(MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
(MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
(MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)
// Various redundant zero/sign extension combinations. // Various redundant zero/sign extension combinations.
(MOVBZreg y:(MOVBZreg _)) -> y // repeat (MOVBZreg y:(MOVBZreg _)) -> y // repeat
@ -851,22 +888,38 @@
(ZeroExt16to(32|64) x) -> (MOVHZreg x) (ZeroExt16to(32|64) x) -> (MOVHZreg x)
(ZeroExt32to64 x) -> (MOVWZreg x) (ZeroExt32to64 x) -> (MOVWZreg x)
(Trunc(16|32|64)to8 x) -> (MOVBreg x) (Trunc(16|32|64)to8 x) && isSigned(x.Type) -> (MOVBreg x)
(Trunc(32|64)to16 x) -> (MOVHreg x) (Trunc(16|32|64)to8 x) -> (MOVBZreg x)
(Trunc64to32 x) -> (MOVWreg x) (Trunc(32|64)to16 x) && isSigned(x.Type) -> (MOVHreg x)
(Trunc(32|64)to16 x) -> (MOVHZreg x)
(Trunc64to32 x) && isSigned(x.Type) -> (MOVWreg x)
(Trunc64to32 x) -> (MOVWZreg x)
(Slicemask <t> x) -> (SRADconst (NEG <t> x) [63]) (Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
// Note that MOV??reg returns a 64-bit int, x is not necessarily that wide // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide
// This may interact with other patterns in the future. (Compare with arm64) // This may interact with other patterns in the future. (Compare with arm64)
(MOVBZreg x:(MOVBZload _ _)) -> x (MOV(B|H|W)Zreg x:(MOVBZload _ _)) -> x
(MOVHZreg x:(MOVHZload _ _)) -> x (MOV(H|W)Zreg x:(MOVHZload _ _)) -> x
(MOVHreg x:(MOVHload _ _)) -> x (MOV(H|W)reg x:(MOVHload _ _)) -> x
(MOVWZreg x:(MOVWZload _ _)) -> x
(MOVWreg x:(MOVWload _ _)) -> x
// don't extend if argument is already extended
(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> x
(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> x
(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> x
(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> x
(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> x
(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> x
(MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))]) (MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
(MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))]) (MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))]) (MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))]) (MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
(MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
(MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
// Lose widening ops fed to to stores // Lose widening ops fed to to stores
(MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) -> (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) -> (MOVBstore [off] {sym} ptr x mem)

File diff suppressed because it is too large Load Diff

213
test/codegen/noextend.go Normal file
View File

@ -0,0 +1,213 @@
// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
var sval64 [8]int64
var sval32 [8]int32
var sval16 [8]int16
var sval8 [8]int8
var val64 [8]uint64
var val32 [8]uint32
var val16 [8]uint16
var val8 [8]uint8
// ----------------------------- //
// avoid zero/sign extensions //
// ----------------------------- //
func set16(x8 int8, u8 uint8, y8 int8, z8 uint8) {
// Truncate not needed, load does sign/zero extend
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
sval16[0] = int16(x8)
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
val16[0] = uint16(u8)
// AND not needed due to size
// ppc64le:-"ANDCC"
sval16[1] = 255 & int16(x8+y8)
// ppc64le:-"ANDCC"
val16[1] = 255 & uint16(u8+z8)
}
func shiftidx(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
sval16[0] = int16(val16[x8>>1])
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
val16[0] = uint16(sval16[u8>>2])
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
sval16[1] = int16(val16[x16>>1])
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
val16[1] = uint16(sval16[u16>>2])
}
func setnox(x8 int8, u8 uint8, y8 int8, z8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
// Truncate not needed due to sign/zero extension on load
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
sval16[0] = int16(x8)
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
val16[0] = uint16(u8)
// AND not needed due to size
// ppc64le:-"ANDCC"
sval16[1] = 255 & int16(x8+y8)
// ppc64le:-"ANDCC"
val16[1] = 255 & uint16(u8+z8)
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
sval32[0] = int32(x8)
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
sval32[1] = int32(x16)
//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
val32[0] = uint32(u8)
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
val32[1] = uint32(u16)
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
sval64[0] = int64(x8)
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
sval64[1] = int64(x16)
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
sval64[2] = int64(x32)
//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
val64[0] = uint64(u8)
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
val64[1] = uint64(u16)
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
val64[2] = uint64(u32)
}
func cmp16(x8 int8, u8 uint8, x32 int32, u32 uint32, x64 int64, u64 uint64) bool {
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
if int16(x8) == sval16[0] {
return true
}
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
if uint16(u8) == val16[0] {
return true
}
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if uint16(u32>>16) == val16[0] {
return true
}
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if uint16(u64>>48) == val16[0] {
return true
}
// Verify the truncates are using the correct sign.
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if int16(x32) == sval16[0] {
return true
}
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
if uint16(u32) == val16[0] {
return true
}
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if int16(x64) == sval16[0] {
return true
}
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
if uint16(u64) == val16[0] {
return true
}
return false
}
func cmp32(x8 int8, u8 uint8, x16 int16, u16 uint16, x64 int64, u64 uint64) bool {
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
if int32(x8) == sval32[0] {
return true
}
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
if uint32(u8) == val32[0] {
return true
}
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
if int32(x16) == sval32[0] {
return true
}
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if uint32(u16) == val32[0] {
return true
}
// Verify the truncates are using the correct sign.
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
if int32(x64) == sval32[0] {
return true
}
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
if uint32(u64) == val32[0] {
return true
}
return false
}
func cmp64(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) bool {
// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
if int64(x8) == sval64[0] {
return true
}
// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
if uint64(u8) == val64[0] {
return true
}
// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
if int64(x16) == sval64[0] {
return true
}
// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
if uint64(u16) == val64[0] {
return true
}
// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
if int64(x32) == sval64[0] {
return true
}
// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
if uint64(u32) == val64[0] {
return true
}
return false
}