From 14cb41584dbf6b0ced5a6467034c1821210fd01c Mon Sep 17 00:00:00 2001 From: David du Colombier <0intro@gmail.com> Date: Fri, 25 Aug 2017 01:56:50 +0200 Subject: [PATCH] cmd/compile: don't use MOVOstore instruction on plan9/amd64 CL 54410 and CL 56250 recently added use of the MOVOstore instruction to improve performance. However, we can't use the MOVOstore instruction on Plan 9, because floating point operations are not allowed in the note handler. This change adds a configuration flag useSSE to enable the use of SSE instructions for non-floating point operations. This flag is enabled by default and disabled on Plan 9. When this flag is disabled, the MOVOstore instruction is not used and the MOVQstoreconst instruction is used instead. Fixes #21599 Change-Id: Ie609e5d9b82ec0092ae874bab4ce01caa5bc8fb8 Reviewed-on: https://go-review.googlesource.com/58850 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/config.go | 7 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 36 +++- src/cmd/compile/internal/ssa/rewriteAMD64.go | 176 ++++++++++++++++--- 3 files changed, 185 insertions(+), 34 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 6587c40ebc..54704ec60e 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -33,6 +33,7 @@ type Config struct { ctxt *obj.Link // Generic arch information optimize bool // Do optimization noDuffDevice bool // Don't use Duff's device + useSSE bool // Use SSE for non-float operations nacl bool // GOOS=nacl use387 bool // GO386=387 NeedsFpScratch bool // No direct move between GP and FP register sets @@ -264,11 +265,13 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config c.ctxt = ctxt c.optimize = optimize c.nacl = objabi.GOOS == "nacl" + c.useSSE = true - // Don't use Duff's device on Plan 9 AMD64, because floating - // point operations are not allowed in note handler. + // Don't use Duff's device nor SSE on Plan 9 AMD64, because + // floating point operations are not allowed in note handler. if objabi.GOOS == "plan9" && arch == "amd64" { c.noDuffDevice = true + c.useSSE = false } if c.nacl { diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index fff894c571..e648e0856b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -386,29 +386,48 @@ (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [0] destptr mem)) -(Zero [s] destptr mem) && s > 8 && s < 16 -> +// Strip off any fractional word zeroing. +(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE -> + (Zero [s-s%8] (OffPtr destptr [s%8]) + (MOVQstoreconst [0] destptr mem)) + +// Zero small numbers of words directly. +(Zero [16] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)) +(Zero [24] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem))) +(Zero [32] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,24)] destptr + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)))) + +(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE -> (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem)) // Adjust zeros to be a multiple of 16 bytes. -(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 -> +(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE -> (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem)) -(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 -> +(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE -> (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVQstoreconst [0] destptr mem)) -(Zero [16] destptr mem) -> +(Zero [16] destptr mem) && config.useSSE -> (MOVOstore destptr (MOVOconst [0]) mem) -(Zero [32] destptr mem) -> +(Zero [32] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)) -(Zero [48] destptr mem) -> +(Zero [48] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))) -(Zero [64] destptr mem) -> +(Zero [64] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) @@ -421,7 +440,7 @@ // Large zeroing uses REP STOSQ. (Zero [s] destptr mem) - && (s > 1024 || (config.noDuffDevice && s > 64)) + && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0 -> (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) @@ -2205,6 +2224,7 @@ && clobber(x) -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) + && config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 302812b170..3762931178 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -842,7 +842,7 @@ func rewriteValueAMD64(v *Value) bool { case OpXor8: return rewriteValueAMD64_OpXor8_0(v) case OpZero: - return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) + return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) || rewriteValueAMD64_OpZero_20(v) case OpZeroExt16to32: return rewriteValueAMD64_OpZeroExt16to32_0(v) case OpZeroExt16to64: @@ -9089,6 +9089,8 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) // cond: ValAndOff(sc).canAdd(off) // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) @@ -9218,7 +9220,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { return true } // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) + // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) for { c := v.AuxInt @@ -9238,7 +9240,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { break } mem := x.Args[1] - if !(x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { + if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstore) @@ -42862,6 +42864,8 @@ func rewriteValueAMD64_OpXor8_0(v *Value) bool { func rewriteValueAMD64_OpZero_0(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (Zero [0] _ mem) // cond: // result: mem @@ -43021,14 +43025,126 @@ func rewriteValueAMD64_OpZero_0(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: s > 8 && s < 16 + // cond: s%8 != 0 && s > 8 && !config.useSSE + // result: (Zero [s-s%8] (OffPtr destptr [s%8]) (MOVQstoreconst [0] destptr mem)) + for { + s := v.AuxInt + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(s%8 != 0 && s > 8 && !config.useSSE) { + break + } + v.reset(OpZero) + v.AuxInt = s - s%8 + v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) + v0.AuxInt = s % 8 + v0.AddArg(destptr) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = 0 + v1.AddArg(destptr) + v1.AddArg(mem) + v.AddArg(v1) + return true + } + return false +} +func rewriteValueAMD64_OpZero_10(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (Zero [16] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)) + for { + if v.AuxInt != 16 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 8) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = 0 + v0.AddArg(destptr) + v0.AddArg(mem) + v.AddArg(v0) + return true + } + // match: (Zero [24] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))) + for { + if v.AuxInt != 24 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 16) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = makeValAndOff(0, 8) + v0.AddArg(destptr) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = 0 + v1.AddArg(destptr) + v1.AddArg(mem) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Zero [32] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))) + for { + if v.AuxInt != 32 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 24) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = makeValAndOff(0, 16) + v0.AddArg(destptr) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = makeValAndOff(0, 8) + v1.AddArg(destptr) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v2.AuxInt = 0 + v2.AddArg(destptr) + v2.AddArg(mem) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Zero [s] destptr mem) + // cond: s > 8 && s < 16 && config.useSSE // result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s > 8 && s < 16) { + if !(s > 8 && s < 16 && config.useSSE) { break } v.reset(OpAMD64MOVQstoreconst) @@ -43041,24 +43157,15 @@ func rewriteValueAMD64_OpZero_0(v *Value) bool { v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpZero_10(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - typ := &b.Func.Config.Types - _ = typ // match: (Zero [s] destptr mem) - // cond: s%16 != 0 && s > 16 && s%16 > 8 + // cond: s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE // result: (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s%16 != 0 && s > 16 && s%16 > 8) { + if !(s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE) { break } v.reset(OpZero) @@ -43077,14 +43184,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: s%16 != 0 && s > 16 && s%16 <= 8 + // cond: s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE // result: (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVQstoreconst [0] destptr mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s%16 != 0 && s > 16 && s%16 <= 8) { + if !(s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE) { break } v.reset(OpZero) @@ -43101,7 +43208,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [16] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore destptr (MOVOconst [0]) mem) for { if v.AuxInt != 16 { @@ -43110,6 +43217,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v.AddArg(destptr) v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128) @@ -43119,7 +43229,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [32] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)) for { if v.AuxInt != 32 { @@ -43128,6 +43238,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 16 @@ -43146,7 +43259,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [48] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))) for { if v.AuxInt != 48 { @@ -43155,6 +43268,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 32 @@ -43182,7 +43298,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [64] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)))) for { if v.AuxInt != 64 { @@ -43191,6 +43307,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 48 @@ -43226,6 +43345,15 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { v.AddArg(v2) return true } + return false +} +func rewriteValueAMD64_OpZero_20(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + typ := &b.Func.Config.Types + _ = typ // match: (Zero [s] destptr mem) // cond: s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice // result: (DUFFZERO [s] destptr (MOVOconst [0]) mem) @@ -43247,14 +43375,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: (s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0 + // cond: (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0 // result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !((s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0) { + if !((s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0) { break } v.reset(OpAMD64REPSTOSQ)