diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go index bfd1f8a784d..232f39d6b48 100644 --- a/src/cmd/compile/internal/gc/asm_test.go +++ b/src/cmd/compile/internal/gc/asm_test.go @@ -261,6 +261,11 @@ var allAsmTests = []*asmTests{ os: "linux", tests: linuxPPC64LETests, }, + { + arch: "amd64", + os: "plan9", + tests: plan9AMD64Tests, + }, } var linuxAMD64Tests = []*asmTest{ @@ -2126,6 +2131,34 @@ var linuxPPC64LETests = []*asmTest{ }, } +var plan9AMD64Tests = []*asmTest{ + // We should make sure that the compiler doesn't generate floating point + // instructions for non-float operations on Plan 9, because floating point + // operations are not allowed in the note handler. + // Array zeroing. + { + ` + func $() [16]byte { + var a [16]byte + return a + } + `, + []string{"\tMOVQ\t\\$0, \"\""}, + []string{}, + }, + // Array copy. + { + ` + func $(a [16]byte) (b [16]byte) { + b = a + return + } + `, + []string{"\tMOVQ\t\"\"\\.a\\+[0-9]+\\(SP\\), (AX|CX)", "\tMOVQ\t(AX|CX), \"\"\\.b\\+[0-9]+\\(SP\\)"}, + []string{}, + }, +} + // TestLineNumber checks to make sure the generated assembly has line numbers // see issue #16214 func TestLineNumber(t *testing.T) { diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 0e19e5970a5..ff1b97b0a4c 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -319,7 +319,10 @@ (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) -(Move [16] dst src mem) -> (MOVOstore dst (MOVOload src mem) mem) +(Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem) +(Move [16] dst src mem) && !config.useSSE -> + (MOVQstore [8] dst (MOVQload [8] src mem) + (MOVQstore dst (MOVQload src mem) mem)) (Move [3] dst src mem) -> (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) @@ -344,11 +347,18 @@ (OffPtr src [s%16]) (MOVQstore dst (MOVQload src mem) mem)) (Move [s] dst src mem) - && s > 16 && s%16 != 0 && s%16 > 8 -> + && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE -> (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) +(Move [s] dst src mem) + && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE -> + (Move [s-s%16] + (OffPtr dst [s%16]) + (OffPtr src [s%16]) + (MOVQstore [8] dst (MOVQload [8] src mem) + (MOVQstore dst (MOVQload src mem) mem))) // Medium copying uses a duff device. (Move [s] dst src mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e2971696bb3..399afa6ef9b 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -41824,6 +41824,8 @@ func rewriteValueAMD64_OpMod8u_0(v *Value) bool { func rewriteValueAMD64_OpMove_0(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config typ := &b.Func.Config.Types _ = typ // match: (Move [0] _ _ mem) @@ -41921,7 +41923,7 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { return true } // match: (Move [16] dst src mem) - // cond: + // cond: config.useSSE // result: (MOVOstore dst (MOVOload src mem) mem) for { if v.AuxInt != 16 { @@ -41931,6 +41933,9 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { dst := v.Args[0] src := v.Args[1] mem := v.Args[2] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v.AddArg(dst) v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) @@ -41940,6 +41945,38 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { v.AddArg(mem) return true } + // match: (Move [16] dst src mem) + // cond: !config.useSSE + // result: (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if v.AuxInt != 16 { + break + } + _ = v.Args[2] + dst := v.Args[0] + src := v.Args[1] + mem := v.Args[2] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = 8 + v.AddArg(dst) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = 8 + v0.AddArg(src) + v0.AddArg(mem) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v1.AddArg(dst) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg(src) + v2.AddArg(mem) + v1.AddArg(v2) + v1.AddArg(mem) + v.AddArg(v1) + return true + } // match: (Move [3] dst src mem) // cond: // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) @@ -42027,6 +42064,15 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { v.AddArg(v1) return true } + return false +} +func rewriteValueAMD64_OpMove_10(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + typ := &b.Func.Config.Types + _ = typ // match: (Move [7] dst src mem) // cond: // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) @@ -42056,15 +42102,6 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { v.AddArg(v1) return true } - return false -} -func rewriteValueAMD64_OpMove_10(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - typ := &b.Func.Config.Types - _ = typ // match: (Move [s] dst src mem) // cond: s > 8 && s < 16 // result: (MOVQstore [s-8] dst (MOVQload [s-8] src mem) (MOVQstore dst (MOVQload src mem) mem)) @@ -42128,7 +42165,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { return true } // match: (Move [s] dst src mem) - // cond: s > 16 && s%16 != 0 && s%16 > 8 + // cond: s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) for { s := v.AuxInt @@ -42136,7 +42173,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { dst := v.Args[0] src := v.Args[1] mem := v.Args[2] - if !(s > 16 && s%16 != 0 && s%16 > 8) { + if !(s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE) { break } v.reset(OpMove) @@ -42160,6 +42197,47 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { return true } // match: (Move [s] dst src mem) + // cond: s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE + // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))) + for { + s := v.AuxInt + _ = v.Args[2] + dst := v.Args[0] + src := v.Args[1] + mem := v.Args[2] + if !(s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE) { + break + } + v.reset(OpMove) + v.AuxInt = s - s%16 + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = s % 16 + v0.AddArg(dst) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = s % 16 + v1.AddArg(src) + v.AddArg(v1) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2.AuxInt = 8 + v2.AddArg(dst) + v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v3.AuxInt = 8 + v3.AddArg(src) + v3.AddArg(mem) + v2.AddArg(v3) + v4 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v4.AddArg(dst) + v5 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v5.AddArg(src) + v5.AddArg(mem) + v4.AddArg(v5) + v4.AddArg(mem) + v2.AddArg(v4) + v.AddArg(v2) + return true + } + // match: (Move [s] dst src mem) // cond: s >= 32 && s <= 16*64 && s%16 == 0 && !config.noDuffDevice // result: (DUFFCOPY [14*(64-s/16)] dst src mem) for {