mirror of
https://github.com/golang/go
synced 2024-11-11 18:21:40 -07:00
cmd/compile/internal/ssa: optimize rules Zero and Move on loong64
goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3A6000 @ 2500.00MHz │ old.bench │ new.bench │ │ sec/op │ sec/op vs base │ ClearFat7 3.6020n ± 0% 0.5087n ± 1% -85.88% (p=0.000 n=20) ClearFat8 0.5137n ± 0% 0.8004n ± 0% +55.81% (p=0.000 n=20) ClearFat11 5.2030n ± 0% 0.5082n ± 1% -90.23% (p=0.000 n=20) ClearFat12 0.8244n ± 0% 0.8004n ± 0% -2.91% (p=0.000 n=20) ClearFat13 6.0030n ± 0% 0.5077n ± 1% -91.54% (p=0.000 n=20) ClearFat14 6.4030n ± 0% 0.8004n ± 0% -87.50% (p=0.000 n=20) ClearFat15 6.8030n ± 0% 0.5065n ± 1% -92.55% (p=0.000 n=20) ClearFat16 2.4010n ± 0% 0.8004n ± 0% -66.66% (p=0.000 n=20) ClearFat24 3.202n ± 0% 1.601n ± 0% -50.00% (p=0.000 n=20) ClearFat32 4.002n ± 0% 2.001n ± 0% -50.00% (p=0.000 n=20) ClearFat40 4.802n ± 0% 1.601n ± 0% -66.66% (p=0.000 n=20) ClearFat48 5.603n ± 0% 2.001n ± 0% -64.29% (p=0.000 n=20) ClearFat56 6.403n ± 0% 2.001n ± 0% -68.75% (p=0.000 n=20) ClearFat64 7.204n ± 0% 2.401n ± 0% -66.67% (p=0.000 n=20) ClearFat72 8.004n ± 0% 2.001n ± 0% -75.00% (p=0.000 n=20) ClearFat128 14.010n ± 0% 3.218n ± 0% -77.03% (p=0.000 n=20) ClearFat256 26.810n ± 0% 6.727n ± 0% -74.91% (p=0.000 n=20) ClearFat512 52.43n ± 0% 16.40n ± 0% -68.72% (p=0.000 n=20) ClearFat1024 103.65n ± 0% 37.49n ± 0% -63.83% (p=0.000 n=20) ClearFat1032 104.50n ± 0% 52.83n ± 0% -49.44% (p=0.000 n=20) ClearFat1040 105.30n ± 0% 53.23n ± 0% -49.45% (p=0.000 n=20) CopyFat7 6.0030n ± 0% 0.6048n ± 0% -89.93% (p=0.000 n=20) CopyFat8 0.8004n ± 0% 0.5974n ± 0% -25.37% (p=0.000 n=20) CopyFat11 9.2050n ± 0% 0.6057n ± 0% -93.42% (p=0.000 n=20) CopyFat12 0.8103n ± 0% 0.6064n ± 0% -25.16% (p=0.000 n=20) CopyFat13 6.4030n ± 0% 0.6052n ± 0% -90.55% (p=0.000 n=20) CopyFat14 6.8040n ± 0% 0.6064n ± 0% -91.09% (p=0.000 n=20) CopyFat15 7.2040n ± 0% 0.6071n ± 0% -91.57% (p=0.000 n=20) CopyFat16 2.8010n ± 0% 0.6064n ± 0% -78.35% (p=0.000 n=20) CopyFat24 3.602n ± 0% 2.001n ± 0% -44.45% (p=0.000 n=20) CopyFat32 4.402n ± 0% 2.001n ± 0% -54.54% (p=0.000 n=20) CopyFat64 7.604n ± 0% 2.802n ± 0% -63.15% (p=0.000 n=20) CopyFat72 8.405n ± 0% 3.202n ± 0% -61.90% (p=0.000 n=20) CopyFat128 14.410n ± 0% 5.480n ± 0% -61.97% (p=0.000 n=20) CopyFat256 28.57n ± 0% 12.16n ± 0% -57.44% (p=0.000 n=20) CopyFat512 63.63n ± 0% 24.88n ± 0% -60.90% (p=0.000 n=20) CopyFat520 67.23n ± 0% 24.11n ± 0% -64.14% (p=0.000 n=20) CopyFat1024 125.00n ± 0% 50.60n ± 0% -59.52% (p=0.000 n=20) CopyFat1032 121.30n ± 0% 64.32n ± 0% -46.97% (p=0.000 n=20) CopyFat1040 124.50n ± 0% 67.23n ± 0% -46.00% (p=0.000 n=20) geomean 9.539n 2.779n -70.87% Change-Id: Ic04e5f849f20ec3ec748d6763d4c9f8a1f21ee49 Reviewed-on: https://go-review.googlesource.com/c/go/+/592115 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Tim King <taking@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn>
This commit is contained in:
parent
a9bd84e037
commit
9b88f58099
@ -260,137 +260,138 @@
|
||||
// zeroing
|
||||
(Zero [0] _ mem) => mem
|
||||
(Zero [1] ptr mem) => (MOVBstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [2] ptr mem) =>
|
||||
(MOVBstore [1] ptr (MOVVconst [0])
|
||||
(MOVBstore [0] ptr (MOVVconst [0]) mem))
|
||||
(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore [2] ptr (MOVVconst [0])
|
||||
(MOVHstore [0] ptr (MOVVconst [0]) mem))
|
||||
(Zero [4] ptr mem) =>
|
||||
(MOVBstore [3] ptr (MOVVconst [0])
|
||||
(MOVBstore [2] ptr (MOVVconst [0])
|
||||
(MOVBstore [1] ptr (MOVVconst [0])
|
||||
(MOVBstore [0] ptr (MOVVconst [0]) mem))))
|
||||
(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVVstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore [4] ptr (MOVVconst [0])
|
||||
(MOVWstore [0] ptr (MOVVconst [0]) mem))
|
||||
(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore [6] ptr (MOVVconst [0])
|
||||
(MOVHstore [4] ptr (MOVVconst [0])
|
||||
(MOVHstore [2] ptr (MOVVconst [0])
|
||||
(MOVHstore [0] ptr (MOVVconst [0]) mem))))
|
||||
|
||||
(Zero [2] ptr mem) => (MOVHstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [3] ptr mem) =>
|
||||
(MOVBstore [2] ptr (MOVVconst [0])
|
||||
(MOVBstore [1] ptr (MOVVconst [0])
|
||||
(MOVBstore [0] ptr (MOVVconst [0]) mem)))
|
||||
(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [4] {t} ptr mem) => (MOVWstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [5] ptr mem) =>
|
||||
(MOVBstore [4] ptr (MOVVconst [0])
|
||||
(MOVWstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [6] ptr mem) =>
|
||||
(MOVHstore [4] ptr (MOVVconst [0])
|
||||
(MOVHstore [2] ptr (MOVVconst [0])
|
||||
(MOVHstore [0] ptr (MOVVconst [0]) mem)))
|
||||
(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [7] ptr mem) =>
|
||||
(MOVWstore [3] ptr (MOVVconst [0])
|
||||
(MOVWstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [8] {t} ptr mem) => (MOVVstore ptr (MOVVconst [0]) mem)
|
||||
(Zero [9] ptr mem) =>
|
||||
(MOVBstore [8] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [10] ptr mem) =>
|
||||
(MOVHstore [8] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [11] ptr mem) =>
|
||||
(MOVWstore [7] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [12] ptr mem) =>
|
||||
(MOVWstore [8] ptr (MOVVconst [0])
|
||||
(MOVWstore [4] ptr (MOVVconst [0])
|
||||
(MOVWstore [0] ptr (MOVVconst [0]) mem)))
|
||||
(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [13] ptr mem) =>
|
||||
(MOVVstore [5] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [14] ptr mem) =>
|
||||
(MOVVstore [6] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [15] ptr mem) =>
|
||||
(MOVVstore [7] ptr (MOVVconst [0])
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
(Zero [16] ptr mem) =>
|
||||
(MOVVstore [8] ptr (MOVVconst [0])
|
||||
(MOVVstore [0] ptr (MOVVconst [0]) mem))
|
||||
(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVVstore [16] ptr (MOVVconst [0])
|
||||
(MOVVstore [8] ptr (MOVVconst [0])
|
||||
(MOVVstore [0] ptr (MOVVconst [0]) mem)))
|
||||
(MOVVstore ptr (MOVVconst [0]) mem))
|
||||
|
||||
// strip off fractional word zeroing
|
||||
(Zero [s] ptr mem) && s%8 != 0 && s > 16 =>
|
||||
(Zero [s%8]
|
||||
(OffPtr <ptr.Type> ptr [s-s%8])
|
||||
(Zero [s-s%8] ptr mem))
|
||||
|
||||
// medium zeroing uses a duff device
|
||||
// 8, and 128 are magic constants, see runtime/mkduff.go
|
||||
(Zero [s] {t} ptr mem)
|
||||
&& s%8 == 0 && s > 24 && s <= 8*128
|
||||
&& t.Alignment()%8 == 0 && !config.noDuffDevice =>
|
||||
(Zero [s] ptr mem)
|
||||
&& s%8 == 0 && s > 16 && s <= 8*128
|
||||
&& !config.noDuffDevice =>
|
||||
(DUFFZERO [8 * (128 - s/8)] ptr mem)
|
||||
|
||||
// large or unaligned zeroing uses a loop
|
||||
(Zero [s] {t} ptr mem)
|
||||
&& (s > 8*128 || config.noDuffDevice) || t.Alignment()%8 != 0 =>
|
||||
(LoweredZero [t.Alignment()]
|
||||
// large zeroing uses a loop
|
||||
(Zero [s] ptr mem)
|
||||
&& s%8 == 0 && s > 8*128 =>
|
||||
(LoweredZero
|
||||
ptr
|
||||
(ADDVconst <ptr.Type> ptr [s-moveSize(t.Alignment(), config)])
|
||||
(ADDVconst <ptr.Type> ptr [s-8])
|
||||
mem)
|
||||
|
||||
// moves
|
||||
(Move [0] _ _ mem) => mem
|
||||
(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
|
||||
(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore dst (MOVHload src mem) mem)
|
||||
(Move [2] dst src mem) =>
|
||||
(MOVBstore [1] dst (MOVBload [1] src mem)
|
||||
(MOVBstore dst (MOVBload src mem) mem))
|
||||
(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore dst (MOVWload src mem) mem)
|
||||
(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore [2] dst (MOVHload [2] src mem)
|
||||
(MOVHstore dst (MOVHload src mem) mem))
|
||||
(Move [4] dst src mem) =>
|
||||
(MOVBstore [3] dst (MOVBload [3] src mem)
|
||||
(MOVBstore [2] dst (MOVBload [2] src mem)
|
||||
(MOVBstore [1] dst (MOVBload [1] src mem)
|
||||
(MOVBstore dst (MOVBload src mem) mem))))
|
||||
(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVVstore dst (MOVVload src mem) mem)
|
||||
(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore [4] dst (MOVWload [4] src mem)
|
||||
(MOVWstore dst (MOVWload src mem) mem))
|
||||
(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore [6] dst (MOVHload [6] src mem)
|
||||
(MOVHstore [4] dst (MOVHload [4] src mem)
|
||||
(MOVHstore [2] dst (MOVHload [2] src mem)
|
||||
(MOVHstore dst (MOVHload src mem) mem))))
|
||||
|
||||
(Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem)
|
||||
(Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem)
|
||||
(Move [3] dst src mem) =>
|
||||
(MOVBstore [2] dst (MOVBload [2] src mem)
|
||||
(MOVBstore [1] dst (MOVBload [1] src mem)
|
||||
(MOVBstore dst (MOVBload src mem) mem)))
|
||||
(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 =>
|
||||
(MOVHstore [4] dst (MOVHload [4] src mem)
|
||||
(MOVHstore [2] dst (MOVHload [2] src mem)
|
||||
(MOVHstore dst (MOVHload src mem) mem)))
|
||||
(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
|
||||
(MOVWstore [8] dst (MOVWload [8] src mem)
|
||||
(MOVWstore [4] dst (MOVWload [4] src mem)
|
||||
(MOVWstore dst (MOVWload src mem) mem)))
|
||||
(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVBstore [2] dst (MOVBUload [2] src mem)
|
||||
(MOVHstore dst (MOVHUload src mem) mem))
|
||||
(Move [4] dst src mem) => (MOVWstore dst (MOVWUload src mem) mem)
|
||||
(Move [5] dst src mem) =>
|
||||
(MOVBstore [4] dst (MOVBUload [4] src mem)
|
||||
(MOVWstore dst (MOVWUload src mem) mem))
|
||||
(Move [6] dst src mem) =>
|
||||
(MOVHstore [4] dst (MOVHUload [4] src mem)
|
||||
(MOVWstore dst (MOVWUload src mem) mem))
|
||||
(Move [7] dst src mem) =>
|
||||
(MOVWstore [3] dst (MOVWUload [3] src mem)
|
||||
(MOVWstore dst (MOVWUload src mem) mem))
|
||||
(Move [8] dst src mem) => (MOVVstore dst (MOVVload src mem) mem)
|
||||
(Move [9] dst src mem) =>
|
||||
(MOVBstore [8] dst (MOVBUload [8] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [10] dst src mem) =>
|
||||
(MOVHstore [8] dst (MOVHUload [8] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [11] dst src mem) =>
|
||||
(MOVWstore [7] dst (MOVWload [7] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [12] dst src mem) =>
|
||||
(MOVWstore [8] dst (MOVWUload [8] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [13] dst src mem) =>
|
||||
(MOVVstore [5] dst (MOVVload [5] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [14] dst src mem) =>
|
||||
(MOVVstore [6] dst (MOVVload [6] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [15] dst src mem) =>
|
||||
(MOVVstore [7] dst (MOVVload [7] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [16] dst src mem) =>
|
||||
(MOVVstore [8] dst (MOVVload [8] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem))
|
||||
(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
|
||||
(MOVVstore [16] dst (MOVVload [16] src mem)
|
||||
(MOVVstore [8] dst (MOVVload [8] src mem)
|
||||
(MOVVstore dst (MOVVload src mem) mem)))
|
||||
|
||||
// strip off fractional word move
|
||||
(Move [s] dst src mem) && s%8 != 0 && s > 16 =>
|
||||
(Move [s%8]
|
||||
(OffPtr <dst.Type> dst [s-s%8])
|
||||
(OffPtr <src.Type> src [s-s%8])
|
||||
(Move [s-s%8] dst src mem))
|
||||
|
||||
// medium move uses a duff device
|
||||
(Move [s] {t} dst src mem)
|
||||
&& s%8 == 0 && s >= 24 && s <= 8*128 && t.Alignment()%8 == 0
|
||||
(Move [s] dst src mem)
|
||||
&& s%8 == 0 && s > 16 && s <= 8*128
|
||||
&& !config.noDuffDevice && logLargeCopy(v, s) =>
|
||||
(DUFFCOPY [16 * (128 - s/8)] dst src mem)
|
||||
// 16 and 128 are magic constants. 16 is the number of bytes to encode:
|
||||
// MOVV (R1), R23
|
||||
// ADDV $8, R1
|
||||
// MOVV R23, (R2)
|
||||
// ADDV $8, R2
|
||||
// and 128 is the number of such blocks. See runtime/duff_mips64.s:duffcopy.
|
||||
// MOVV (R20), R30
|
||||
// ADDV $8, R20
|
||||
// MOVV R30, (R21)
|
||||
// ADDV $8, R21
|
||||
// and 128 is the number of such blocks. See runtime/duff_loong64.s:duffcopy.
|
||||
|
||||
// large or unaligned move uses a loop
|
||||
(Move [s] {t} dst src mem)
|
||||
&& s > 24 && logLargeCopy(v, s) || t.Alignment()%8 != 0 =>
|
||||
(LoweredMove [t.Alignment()]
|
||||
// large move uses a loop
|
||||
(Move [s] dst src mem)
|
||||
&& s%8 == 0 && s > 1024 && logLargeCopy(v, s) =>
|
||||
(LoweredMove
|
||||
dst
|
||||
src
|
||||
(ADDVconst <src.Type> src [s-moveSize(t.Alignment(), config)])
|
||||
(ADDVconst <src.Type> src [s-8])
|
||||
mem)
|
||||
|
||||
|
||||
// calls
|
||||
(StaticCall ...) => (CALLstatic ...)
|
||||
(ClosureCall ...) => (CALLclosure ...)
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user