From 0be8280d8d01ef348fede17aef13aecf15cd7091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Derkacz?= Date: Thu, 29 Oct 2020 01:10:49 +0100 Subject: [PATCH] cmd/compile: optimize small zeroing/moving on riscv64 Optimize small (s <= 32) zeroing/moving operations on riscv64. Avoid generating unaligned memory accesses. The code is almost one to one translation of the corresponding mips64 rules with additional rule for s=32. Change-Id: I753b0b8e53cb9efcf43c8080cab90f3d03539fb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/266217 Reviewed-by: Joel Sing Reviewed-by: Cherry Zhang --- .../compile/internal/ssa/gen/RISCV64.rules | 129 +++- .../compile/internal/ssa/rewriteRISCV64.go | 670 +++++++++++++++++- 2 files changed, 757 insertions(+), 42 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 449f3cad03c..4380a5efef7 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -9,7 +9,6 @@ // * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants. // * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores. // * Eliminate zero immediate shifts, adds, etc. -// * Use a Duff's device for some moves and zeros. // * Avoid using Neq32 for writeBarrier.enabled checks. // Lowering arithmetic @@ -352,18 +351,64 @@ // with OffPtr -> ADDI. (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) -// Zeroing -// TODO: more optimized zeroing, including attempting to use aligned accesses. -(Zero [0] _ mem) => mem -(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst) mem) -(Zero [2] ptr mem) => (MOVHstore ptr (MOVHconst) mem) -(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) -(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) +// Small zeroing +(Zero [0] _ mem) => mem +(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst [0]) mem) +(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => + (MOVHstore ptr (MOVHconst [0]) mem) +(Zero [2] ptr mem) => + (MOVBstore [1] ptr (MOVBconst [0]) + (MOVBstore ptr (MOVBconst [0]) mem)) +(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => + (MOVWstore ptr (MOVWconst [0]) mem) +(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => + (MOVHstore [2] ptr (MOVHconst [0]) + (MOVHstore ptr (MOVHconst [0]) mem)) +(Zero [4] ptr mem) => + (MOVBstore [3] ptr (MOVBconst [0]) + (MOVBstore [2] ptr (MOVBconst [0]) + (MOVBstore [1] ptr (MOVBconst [0]) + (MOVBstore ptr (MOVBconst [0]) mem)))) +(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 => + (MOVDstore ptr (MOVDconst [0]) mem) +(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => + (MOVWstore [4] ptr (MOVWconst [0]) + (MOVWstore ptr (MOVWconst [0]) mem)) +(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 => + (MOVHstore [6] ptr (MOVHconst [0]) + (MOVHstore [4] ptr (MOVHconst [0]) + (MOVHstore [2] ptr (MOVHconst [0]) + (MOVHstore ptr (MOVHconst [0]) mem)))) -// Medium zeroing uses a Duff's device +(Zero [3] ptr mem) => + (MOVBstore [2] ptr (MOVBconst [0]) + (MOVBstore [1] ptr (MOVBconst [0]) + (MOVBstore ptr (MOVBconst [0]) mem))) +(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => + (MOVHstore [4] ptr (MOVHconst [0]) + (MOVHstore [2] ptr (MOVHconst [0]) + (MOVHstore ptr (MOVHconst [0]) mem))) +(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => + (MOVWstore [8] ptr (MOVWconst [0]) + (MOVWstore [4] ptr (MOVWconst [0]) + (MOVWstore ptr (MOVWconst [0]) mem))) +(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 => + (MOVDstore [8] ptr (MOVDconst [0]) + (MOVDstore ptr (MOVDconst [0]) mem)) +(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 => + (MOVDstore [16] ptr (MOVDconst [0]) + (MOVDstore [8] ptr (MOVDconst [0]) + (MOVDstore ptr (MOVDconst [0]) mem))) +(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 => + (MOVDstore [24] ptr (MOVDconst [0]) + (MOVDstore [16] ptr (MOVDconst [0]) + (MOVDstore [8] ptr (MOVDconst [0]) + (MOVDstore ptr (MOVDconst [0]) mem)))) + +// Medium 8-aligned zeroing uses a Duff's device // 8 and 128 are magic constants, see runtime/mkduff.go (Zero [s] {t} ptr mem) - && s%8 == 0 && s >= 16 && s <= 8*128 + && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice => (DUFFZERO [8 * (128 - s/8)] ptr mem) @@ -377,7 +422,7 @@ (Convert ...) => (MOVconvert ...) // Checks -(IsNonNil p) => (NeqPtr (MOVDconst) p) +(IsNonNil p) => (NeqPtr (MOVDconst [0]) p) (IsInBounds ...) => (Less64U ...) (IsSliceInBounds ...) => (Leq64U ...) @@ -394,18 +439,64 @@ (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) -// Moves -// TODO: more optimized moves, including attempting to use aligned accesses. -(Move [0] _ _ mem) => mem +// Small moves +(Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) -(Move [2] dst src mem) => (MOVHstore dst (MOVHload src mem) mem) -(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) -(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) +(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => + (MOVHstore dst (MOVHload src mem) mem) +(Move [2] dst src mem) => + (MOVBstore [1] dst (MOVBload [1] src mem) + (MOVBstore dst (MOVBload src mem) mem)) +(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => + (MOVWstore dst (MOVWload src mem) mem) +(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => + (MOVHstore [2] dst (MOVHload [2] src mem) + (MOVHstore dst (MOVHload src mem) mem)) +(Move [4] dst src mem) => + (MOVBstore [3] dst (MOVBload [3] src mem) + (MOVBstore [2] dst (MOVBload [2] src mem) + (MOVBstore [1] dst (MOVBload [1] src mem) + (MOVBstore dst (MOVBload src mem) mem)))) +(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 => + (MOVDstore dst (MOVDload src mem) mem) +(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => + (MOVWstore [4] dst (MOVWload [4] src mem) + (MOVWstore dst (MOVWload src mem) mem)) +(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => + (MOVHstore [6] dst (MOVHload [6] src mem) + (MOVHstore [4] dst (MOVHload [4] src mem) + (MOVHstore [2] dst (MOVHload [2] src mem) + (MOVHstore dst (MOVHload src mem) mem)))) -// Medium move uses a Duff's device +(Move [3] dst src mem) => + (MOVBstore [2] dst (MOVBload [2] src mem) + (MOVBstore [1] dst (MOVBload [1] src mem) + (MOVBstore dst (MOVBload src mem) mem))) +(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => + (MOVHstore [4] dst (MOVHload [4] src mem) + (MOVHstore [2] dst (MOVHload [2] src mem) + (MOVHstore dst (MOVHload src mem) mem))) +(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => + (MOVWstore [8] dst (MOVWload [8] src mem) + (MOVWstore [4] dst (MOVWload [4] src mem) + (MOVWstore dst (MOVWload src mem) mem))) +(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 => + (MOVDstore [8] dst (MOVDload [8] src mem) + (MOVDstore dst (MOVDload src mem) mem)) +(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 => + (MOVDstore [16] dst (MOVDload [16] src mem) + (MOVDstore [8] dst (MOVDload [8] src mem) + (MOVDstore dst (MOVDload src mem) mem))) +(Move [32] {t} dst src mem) && t.Alignment()%8 == 0 => + (MOVDstore [24] dst (MOVDload [24] src mem) + (MOVDstore [16] dst (MOVDload [16] src mem) + (MOVDstore [8] dst (MOVDload [8] src mem) + (MOVDstore dst (MOVDload src mem) mem)))) + +// Medium 8-aligned move uses a Duff's device // 16 and 128 are magic constants, see runtime/mkduff.go (Move [s] {t} dst src mem) - && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 + && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [16 * (128 - s/8)] dst src mem) diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index a91e8428ef0..fb507b65c4c 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -4,6 +4,7 @@ package ssa import "math" +import "cmd/compile/internal/types" func rewriteValueRISCV64(v *Value) bool { switch v.Op { @@ -1020,11 +1021,12 @@ func rewriteValueRISCV64_OpIsNonNil(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (IsNonNil p) - // result: (NeqPtr (MOVDconst) p) + // result: (NeqPtr (MOVDconst [0]) p) for { p := v_0 v.reset(OpNeqPtr) v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) v.AddArg2(v0, p) return true } @@ -1971,8 +1973,28 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { v.AddArg3(dst, v0, mem) return true } - // match: (Move [2] dst src mem) + // match: (Move [2] {t} dst src mem) + // cond: t.Alignment()%2 == 0 // result: (MOVHstore dst (MOVHload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 2 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [2] dst src mem) + // result: (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)) for { if auxIntToInt64(v.AuxInt) != 2 { break @@ -1980,14 +2002,66 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { dst := v_0 src := v_1 mem := v_2 - v.reset(OpRISCV64MOVHstore) - v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v0.AuxInt = int32ToAuxInt(1) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [4] {t} dst src mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore dst (MOVWload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) v0.AddArg2(src, mem) v.AddArg3(dst, v0, mem) return true } + // match: (Move [4] {t} dst src mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v0.AuxInt = int32ToAuxInt(2) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } // match: (Move [4] dst src mem) - // result: (MOVWstore dst (MOVWload src mem) mem) + // result: (MOVBstore [3] dst (MOVBload [3] src mem) (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)))) for { if auxIntToInt64(v.AuxInt) != 4 { break @@ -1995,29 +2069,303 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { dst := v_0 src := v_1 mem := v_2 - v.reset(OpRISCV64MOVWstore) - v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v0.AuxInt = int32ToAuxInt(3) v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(2) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v2.AuxInt = int32ToAuxInt(2) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v3.AuxInt = int32ToAuxInt(1) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v4.AuxInt = int32ToAuxInt(1) + v4.AddArg2(src, mem) + v5 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v6 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v6.AddArg2(src, mem) + v5.AddArg3(dst, v6, mem) + v3.AddArg3(dst, v4, v5) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) return true } - // match: (Move [8] dst src mem) + // match: (Move [8] {t} dst src mem) + // cond: t.Alignment()%8 == 0 // result: (MOVDstore dst (MOVDload src mem) mem) for { if auxIntToInt64(v.AuxInt) != 8 { break } + t := auxToType(v.Aux) dst := v_0 src := v_1 mem := v_2 + if !(t.Alignment()%8 == 0) { + break + } v.reset(OpRISCV64MOVDstore) v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) v0.AddArg2(src, mem) v.AddArg3(dst, v0, mem) return true } + // match: (Move [8] {t} dst src mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [8] {t} dst src mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [6] dst (MOVHload [6] src mem) (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)))) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(6) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v0.AuxInt = int32ToAuxInt(6) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(4) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v2.AuxInt = int32ToAuxInt(4) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v3.AuxInt = int32ToAuxInt(2) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v4.AuxInt = int32ToAuxInt(2) + v4.AddArg2(src, mem) + v5 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v6 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v6.AddArg2(src, mem) + v5.AddArg3(dst, v6, mem) + v3.AddArg3(dst, v4, v5) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [3] dst src mem) + // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem))) + for { + if auxIntToInt64(v.AuxInt) != 3 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v0.AuxInt = int32ToAuxInt(2) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(1) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v2.AuxInt = int32ToAuxInt(1) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8) + v4.AddArg2(src, mem) + v3.AddArg3(dst, v4, mem) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [6] {t} dst src mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))) + for { + if auxIntToInt64(v.AuxInt) != 6 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(2) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v2.AuxInt = int32ToAuxInt(2) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16) + v4.AddArg2(src, mem) + v3.AddArg3(dst, v4, mem) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [12] {t} dst src mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))) + for { + if auxIntToInt64(v.AuxInt) != 12 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(4) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v2.AuxInt = int32ToAuxInt(4) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32) + v4.AddArg2(src, mem) + v3.AddArg3(dst, v4, mem) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [16] {t} dst src mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 16 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [24] {t} dst src mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))) + for { + if auxIntToInt64(v.AuxInt) != 24 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(16) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v0.AuxInt = int32ToAuxInt(16) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(8) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v2.AuxInt = int32ToAuxInt(8) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v4.AddArg2(src, mem) + v3.AddArg3(dst, v4, mem) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [32] {t} dst src mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [24] dst (MOVDload [24] src mem) (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)))) + for { + if auxIntToInt64(v.AuxInt) != 32 { + break + } + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(24) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v0.AuxInt = int32ToAuxInt(24) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(16) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v2.AuxInt = int32ToAuxInt(16) + v2.AddArg2(src, mem) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v3.AuxInt = int32ToAuxInt(8) + v4 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v4.AuxInt = int32ToAuxInt(8) + v4.AddArg2(src, mem) + v5 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v6 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64) + v6.AddArg2(src, mem) + v5.AddArg3(dst, v6, mem) + v3.AddArg3(dst, v4, v5) + v1.AddArg3(dst, v2, v3) + v.AddArg3(dst, v0, v1) + return true + } // match: (Move [s] {t} dst src mem) - // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) + // cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem) for { s := auxIntToInt64(v.AuxInt) @@ -2025,7 +2373,7 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { dst := v_0 src := v_1 mem := v_2 - if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) { + if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) { break } v.reset(OpRISCV64DUFFCOPY) @@ -5747,7 +6095,7 @@ func rewriteValueRISCV64_OpZero(v *Value) bool { return true } // match: (Zero [1] ptr mem) - // result: (MOVBstore ptr (MOVBconst) mem) + // result: (MOVBstore ptr (MOVBconst [0]) mem) for { if auxIntToInt64(v.AuxInt) != 1 { break @@ -5756,57 +6104,333 @@ func rewriteValueRISCV64_OpZero(v *Value) bool { mem := v_1 v.reset(OpRISCV64MOVBstore) v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8) + v0.AuxInt = int8ToAuxInt(0) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (Zero [2] {t} ptr mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore ptr (MOVHconst [0]) mem) + for { + if auxIntToInt64(v.AuxInt) != 2 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16) + v0.AuxInt = int16ToAuxInt(0) v.AddArg3(ptr, v0, mem) return true } // match: (Zero [2] ptr mem) - // result: (MOVHstore ptr (MOVHconst) mem) + // result: (MOVBstore [1] ptr (MOVBconst [0]) (MOVBstore ptr (MOVBconst [0]) mem)) for { if auxIntToInt64(v.AuxInt) != 2 { break } ptr := v_0 mem := v_1 - v.reset(OpRISCV64MOVHstore) - v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16) + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v1.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [4] {t} ptr mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore ptr (MOVWconst [0]) mem) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(0) v.AddArg3(ptr, v0, mem) return true } + // match: (Zero [4] {t} ptr mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [2] ptr (MOVHconst [0]) (MOVHstore ptr (MOVHconst [0]) mem)) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16) + v0.AuxInt = int16ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v1.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, v0, v1) + return true + } // match: (Zero [4] ptr mem) - // result: (MOVWstore ptr (MOVWconst) mem) + // result: (MOVBstore [3] ptr (MOVBconst [0]) (MOVBstore [2] ptr (MOVBconst [0]) (MOVBstore [1] ptr (MOVBconst [0]) (MOVBstore ptr (MOVBconst [0]) mem)))) for { if auxIntToInt64(v.AuxInt) != 4 { break } ptr := v_0 mem := v_1 - v.reset(OpRISCV64MOVWstore) - v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32) - v.AddArg3(ptr, v0, mem) + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(2) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v2.AuxInt = int32ToAuxInt(1) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v3.AddArg3(ptr, v0, mem) + v2.AddArg3(ptr, v0, v3) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) return true } - // match: (Zero [8] ptr mem) - // result: (MOVDstore ptr (MOVDconst) mem) + // match: (Zero [8] {t} ptr mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore ptr (MOVDconst [0]) mem) for { if auxIntToInt64(v.AuxInt) != 8 { break } + t := auxToType(v.Aux) ptr := v_0 mem := v_1 + if !(t.Alignment()%8 == 0) { + break + } v.reset(OpRISCV64MOVDstore) v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) v.AddArg3(ptr, v0, mem) return true } + // match: (Zero [8] {t} ptr mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore [4] ptr (MOVWconst [0]) (MOVWstore ptr (MOVWconst [0]) mem)) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v1.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [8] {t} ptr mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [6] ptr (MOVHconst [0]) (MOVHstore [4] ptr (MOVHconst [0]) (MOVHstore [2] ptr (MOVHconst [0]) (MOVHstore ptr (MOVHconst [0]) mem)))) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(6) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16) + v0.AuxInt = int16ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(4) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v2.AuxInt = int32ToAuxInt(2) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v3.AddArg3(ptr, v0, mem) + v2.AddArg3(ptr, v0, v3) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [3] ptr mem) + // result: (MOVBstore [2] ptr (MOVBconst [0]) (MOVBstore [1] ptr (MOVBconst [0]) (MOVBstore ptr (MOVBconst [0]) mem))) + for { + if auxIntToInt64(v.AuxInt) != 3 { + break + } + ptr := v_0 + mem := v_1 + v.reset(OpRISCV64MOVBstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(1) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVBstore, types.TypeMem) + v2.AddArg3(ptr, v0, mem) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [6] {t} ptr mem) + // cond: t.Alignment()%2 == 0 + // result: (MOVHstore [4] ptr (MOVHconst [0]) (MOVHstore [2] ptr (MOVHconst [0]) (MOVHstore ptr (MOVHconst [0]) mem))) + for { + if auxIntToInt64(v.AuxInt) != 6 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%2 == 0) { + break + } + v.reset(OpRISCV64MOVHstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16) + v0.AuxInt = int16ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(2) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVHstore, types.TypeMem) + v2.AddArg3(ptr, v0, mem) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [12] {t} ptr mem) + // cond: t.Alignment()%4 == 0 + // result: (MOVWstore [8] ptr (MOVWconst [0]) (MOVWstore [4] ptr (MOVWconst [0]) (MOVWstore ptr (MOVWconst [0]) mem))) + for { + if auxIntToInt64(v.AuxInt) != 12 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%4 == 0) { + break + } + v.reset(OpRISCV64MOVWstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(4) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem) + v2.AddArg3(ptr, v0, mem) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [16] {t} ptr mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) + for { + if auxIntToInt64(v.AuxInt) != 16 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v1.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [24] {t} ptr mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))) + for { + if auxIntToInt64(v.AuxInt) != 24 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(16) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(8) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v2.AddArg3(ptr, v0, mem) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } + // match: (Zero [32] {t} ptr mem) + // cond: t.Alignment()%8 == 0 + // result: (MOVDstore [24] ptr (MOVDconst [0]) (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))) + for { + if auxIntToInt64(v.AuxInt) != 32 { + break + } + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(t.Alignment()%8 == 0) { + break + } + v.reset(OpRISCV64MOVDstore) + v.AuxInt = int32ToAuxInt(24) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(16) + v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v2.AuxInt = int32ToAuxInt(8) + v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem) + v3.AddArg3(ptr, v0, mem) + v2.AddArg3(ptr, v0, v3) + v1.AddArg3(ptr, v0, v2) + v.AddArg3(ptr, v0, v1) + return true + } // match: (Zero [s] {t} ptr mem) - // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice + // cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice // result: (DUFFZERO [8 * (128 - s/8)] ptr mem) for { s := auxIntToInt64(v.AuxInt) t := auxToType(v.Aux) ptr := v_0 mem := v_1 - if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) { + if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) { break } v.reset(OpRISCV64DUFFZERO)