From e7307034ccdd3c3e482f80b9ea6d4a69bb04d7b6 Mon Sep 17 00:00:00 2001 From: cuiweixie Date: Fri, 5 Aug 2022 14:01:57 +0000 Subject: [PATCH] cmd/compile: store combine on amd64 Fixes #54120 Change-Id: I6915b6e8d459d9becfdef4fdcba95ee4dea6af05 GitHub-Last-Rev: 03f19942c7a697d3b5e696e700a9827633d709bb GitHub-Pull-Request: golang/go#54126 Reviewed-on: https://go-review.googlesource.com/c/go/+/420115 Reviewed-by: Keith Randall Reviewed-by: Dmitri Shuralyov TryBot-Result: Gopher Robot Reviewed-by: Than McIntosh Run-TryBot: Wayne Zuo --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 13 +++--- src/cmd/compile/internal/ssa/rewrite.go | 3 ++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 44 ++++++++++++-------- test/codegen/memcombine.go | 8 ++++ 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index c0a376e3525..d96a37f7ce5 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -2043,15 +2043,18 @@ && clobber(x) => (MOVQstore [i] {s} p0 w0 mem) -(MOVBstore [7] {s} p1 (SHRQconst [56] w) - x1:(MOVWstore [5] {s} p1 (SHRQconst [40] w) - x2:(MOVLstore [1] {s} p1 (SHRQconst [8] w) - x3:(MOVBstore [0] {s} p1 w mem)))) +(MOVBstore [c3] {s} p3 (SHRQconst [56] w) + x1:(MOVWstore [c2] {s} p2 (SHRQconst [40] w) + x2:(MOVLstore [c1] {s} p1 (SHRQconst [8] w) + x3:(MOVBstore [c0] {s} p0 w mem)))) && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 + && sequentialAddresses(p0, p1, int64(1 + c0 - c1)) + && sequentialAddresses(p0, p2, int64(5 + c0 - c2)) + && sequentialAddresses(p0, p3, int64(7 + c0 - c3)) && clobber(x1, x2, x3) - => (MOVQstore {s} p1 w mem) + => (MOVQstore [c0] {s} p0 w mem) (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 05fb2f27764..15a5cf61363 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1764,6 +1764,9 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { // sequentialAddresses reports true if it can prove that x + n == y func sequentialAddresses(x, y *Value, n int64) bool { + if x == y && n == 0 { + return true + } if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 341fcc2f076..434496bd2f1 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -11255,54 +11255,62 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w0, mem) return true } - // match: (MOVBstore [7] {s} p1 (SHRQconst [56] w) x1:(MOVWstore [5] {s} p1 (SHRQconst [40] w) x2:(MOVLstore [1] {s} p1 (SHRQconst [8] w) x3:(MOVBstore [0] {s} p1 w mem)))) - // cond: x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && clobber(x1, x2, x3) - // result: (MOVQstore {s} p1 w mem) + // match: (MOVBstore [c3] {s} p3 (SHRQconst [56] w) x1:(MOVWstore [c2] {s} p2 (SHRQconst [40] w) x2:(MOVLstore [c1] {s} p1 (SHRQconst [8] w) x3:(MOVBstore [c0] {s} p0 w mem)))) + // cond: x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && sequentialAddresses(p0, p1, int64(1 + c0 - c1)) && sequentialAddresses(p0, p2, int64(5 + c0 - c2)) && sequentialAddresses(p0, p3, int64(7 + c0 - c3)) && clobber(x1, x2, x3) + // result: (MOVQstore [c0] {s} p0 w mem) for { - if auxIntToInt32(v.AuxInt) != 7 { - break - } + c3 := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) - p1 := v_0 + p3 := v_0 if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 56 { break } w := v_1.Args[0] x1 := v_2 - if x1.Op != OpAMD64MOVWstore || auxIntToInt32(x1.AuxInt) != 5 || auxToSym(x1.Aux) != s { + if x1.Op != OpAMD64MOVWstore { + break + } + c2 := auxIntToInt32(x1.AuxInt) + if auxToSym(x1.Aux) != s { break } _ = x1.Args[2] - if p1 != x1.Args[0] { - break - } + p2 := x1.Args[0] x1_1 := x1.Args[1] if x1_1.Op != OpAMD64SHRQconst || auxIntToInt8(x1_1.AuxInt) != 40 || w != x1_1.Args[0] { break } x2 := x1.Args[2] - if x2.Op != OpAMD64MOVLstore || auxIntToInt32(x2.AuxInt) != 1 || auxToSym(x2.Aux) != s { + if x2.Op != OpAMD64MOVLstore { + break + } + c1 := auxIntToInt32(x2.AuxInt) + if auxToSym(x2.Aux) != s { break } _ = x2.Args[2] - if p1 != x2.Args[0] { - break - } + p1 := x2.Args[0] x2_1 := x2.Args[1] if x2_1.Op != OpAMD64SHRQconst || auxIntToInt8(x2_1.AuxInt) != 8 || w != x2_1.Args[0] { break } x3 := x2.Args[2] - if x3.Op != OpAMD64MOVBstore || auxIntToInt32(x3.AuxInt) != 0 || auxToSym(x3.Aux) != s { + if x3.Op != OpAMD64MOVBstore { + break + } + c0 := auxIntToInt32(x3.AuxInt) + if auxToSym(x3.Aux) != s { break } mem := x3.Args[2] - if p1 != x3.Args[0] || w != x3.Args[1] || !(x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && clobber(x1, x2, x3)) { + p0 := x3.Args[0] + if w != x3.Args[1] || !(x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && sequentialAddresses(p0, p1, int64(1+c0-c1)) && sequentialAddresses(p0, p2, int64(5+c0-c2)) && sequentialAddresses(p0, p3, int64(7+c0-c3)) && clobber(x1, x2, x3)) { break } v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(c0) v.Aux = symToAux(s) - v.AddArg3(p1, w, mem) + v.AddArg3(p0, w, mem) return true } // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem)) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 1427f7a7d57..6e0132744ce 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -369,6 +369,14 @@ func store_le64_idx(b []byte, x uint64, idx int) { binary.LittleEndian.PutUint64(b[idx:], x) } +func store_le64_idx2(dst []byte, d, length, offset int) []byte { + a := dst[d : d+length] + b := dst[d-offset:] + // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` + binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b)) + return dst +} + func store_le64_load(b []byte, x *[8]byte) { _ = b[8] // amd64:-`MOV[BWL]`