1
0
mirror of https://github.com/golang/go synced 2024-11-23 09:50:03 -07:00

cmd/compile: mark amd64 HMUL ops as not commutative

HMUL is commutative. However, it has asymmetric register requirements.
There are existing rewrite rules to place arguments in preferable slots.

Due to a bug, the existing rulegen commutativity engine doesn't generate
the commuted form of the HMUL rules.
The commuted form of those rewrite rules cause infinite loops.
In order to fix the rulegen commutativity bug,
we need to choose between eliminating
those rewrite rules and marking HMUL ops as not commutative.

This change chooses the latter, since doing so yields better
optimization results on std+cmd.

Removing the rewrite rules yields only text size regressions:

file                                before  after   Δ       %
runtime.s                           477257  477269  +12     +0.003%
time.s                              83552   83612   +60     +0.072%
encoding/asn1.s                     57378   57382   +4      +0.007%
cmd/go/internal/modfetch/codehost.s 89822   89829   +7      +0.008%
cmd/internal/test2json.s            9459    9466    +7      +0.074%
cmd/go/internal/test.s              57665   57678   +13     +0.023%

Marking HMUL as not commutative actually yields (mostly) improvements:

file                               before   after    Δ       %
runtime.s                          477257   477247   -10     -0.002%
math.s                             35985    35992    +7      +0.019%
strconv.s                          53486    53462    -24     -0.045%
syscall.s                          82483    82446    -37     -0.045%
time.s                             83552    83561    +9      +0.011%
os.s                               52691    52684    -7      -0.013%
archive/zip.s                      42285    42272    -13     -0.031%
encoding/asn1.s                    57378    57329    -49     -0.085%
encoding/base64.s                  12156    12094    -62     -0.510%
net.s                              296286   296276   -10     -0.003%
encoding/base32.s                  9720     9658     -62     -0.638%
net/http.s                         560931   560907   -24     -0.004%
net/smtp.s                         14421    14411    -10     -0.069%
cmd/vendor/golang.org/x/sys/unix.s 74307    74266    -41     -0.055%

The regressions are minor, and are in functions math.cbrt,
time.Time.String, and time.Date.

Change-Id: I9f6d9ee71654e5b70381cac77b0ac26011f4ea12
Reviewed-on: https://go-review.googlesource.com/c/go/+/213701
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Josh Bleecher Snyder 2020-01-07 09:27:18 -08:00
parent 24fa159a2e
commit 6817210edf
2 changed files with 7 additions and 8 deletions

View File

@ -220,10 +220,13 @@ func init() {
{name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt32,Flags)", asm: "MULL", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 32x32->64 unsigned multiply). Returns uint32(x), and flags set to overflow if uint32(x) != x. {name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt32,Flags)", asm: "MULL", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 32x32->64 unsigned multiply). Returns uint32(x), and flags set to overflow if uint32(x) != x.
{name: "MULQU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt64,Flags)", asm: "MULQ", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 64x64->128 unsigned multiply). Returns uint64(x), and flags set to overflow if uint64(x) != x. {name: "MULQU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt64,Flags)", asm: "MULQ", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 64x64->128 unsigned multiply). Returns uint64(x), and flags set to overflow if uint64(x) != x.
{name: "HMULQ", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width // HMULx[U] are intentionally not marked as commutative, even though they are.
{name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width // This is because they have asymmetric register requirements.
{name: "HMULQU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width // There are rewrite rules to try to place arguments in preferable slots.
{name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width {name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
{name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
{name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
{name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits {name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits

View File

@ -6696,7 +6696,6 @@ var opcodeTable = [...]opInfo{
{ {
name: "HMULQ", name: "HMULQ",
argLen: 2, argLen: 2,
commutative: true,
clobberFlags: true, clobberFlags: true,
asm: x86.AIMULQ, asm: x86.AIMULQ,
reg: regInfo{ reg: regInfo{
@ -6713,7 +6712,6 @@ var opcodeTable = [...]opInfo{
{ {
name: "HMULL", name: "HMULL",
argLen: 2, argLen: 2,
commutative: true,
clobberFlags: true, clobberFlags: true,
asm: x86.AIMULL, asm: x86.AIMULL,
reg: regInfo{ reg: regInfo{
@ -6730,7 +6728,6 @@ var opcodeTable = [...]opInfo{
{ {
name: "HMULQU", name: "HMULQU",
argLen: 2, argLen: 2,
commutative: true,
clobberFlags: true, clobberFlags: true,
asm: x86.AMULQ, asm: x86.AMULQ,
reg: regInfo{ reg: regInfo{
@ -6747,7 +6744,6 @@ var opcodeTable = [...]opInfo{
{ {
name: "HMULLU", name: "HMULLU",
argLen: 2, argLen: 2,
commutative: true,
clobberFlags: true, clobberFlags: true,
asm: x86.AMULL, asm: x86.AMULL,
reg: regInfo{ reg: regInfo{