1
0
mirror of https://github.com/golang/go synced 2024-11-27 01:01:21 -07:00

cmd/compile: convert more AMD64.rules lines to typed aux mode

Change-Id: Idded860128b1a23680520d8c2b9f6d8620dcfcc7
Reviewed-on: https://go-review.googlesource.com/c/go/+/228077
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Keith Randall 2020-04-12 17:11:25 -07:00
parent 916ecbc731
commit dc9879e8fd
5 changed files with 408 additions and 375 deletions

View File

@ -114,191 +114,191 @@
// Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
(SignExt8to16 ...) -> (MOVBQSX ...)
(SignExt8to32 ...) -> (MOVBQSX ...)
(SignExt8to64 ...) -> (MOVBQSX ...)
(SignExt16to32 ...) -> (MOVWQSX ...)
(SignExt16to64 ...) -> (MOVWQSX ...)
(SignExt32to64 ...) -> (MOVLQSX ...)
(SignExt8to16 ...) => (MOVBQSX ...)
(SignExt8to32 ...) => (MOVBQSX ...)
(SignExt8to64 ...) => (MOVBQSX ...)
(SignExt16to32 ...) => (MOVWQSX ...)
(SignExt16to64 ...) => (MOVWQSX ...)
(SignExt32to64 ...) => (MOVLQSX ...)
(ZeroExt8to16 ...) -> (MOVBQZX ...)
(ZeroExt8to32 ...) -> (MOVBQZX ...)
(ZeroExt8to64 ...) -> (MOVBQZX ...)
(ZeroExt16to32 ...) -> (MOVWQZX ...)
(ZeroExt16to64 ...) -> (MOVWQZX ...)
(ZeroExt32to64 ...) -> (MOVLQZX ...)
(ZeroExt8to16 ...) => (MOVBQZX ...)
(ZeroExt8to32 ...) => (MOVBQZX ...)
(ZeroExt8to64 ...) => (MOVBQZX ...)
(ZeroExt16to32 ...) => (MOVWQZX ...)
(ZeroExt16to64 ...) => (MOVWQZX ...)
(ZeroExt32to64 ...) => (MOVLQZX ...)
(Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63])
(Slicemask <t> x) => (SARQconst (NEGQ <t> x) [63])
(SpectreIndex <t> x y) -> (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
(SpectreSliceIndex <t> x y) -> (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
(SpectreIndex <t> x y) => (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
(SpectreSliceIndex <t> x y) => (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
// Lowering truncation
// Because we ignore high parts of registers, truncates are just copies.
(Trunc16to8 ...) -> (Copy ...)
(Trunc32to8 ...) -> (Copy ...)
(Trunc32to16 ...) -> (Copy ...)
(Trunc64to8 ...) -> (Copy ...)
(Trunc64to16 ...) -> (Copy ...)
(Trunc64to32 ...) -> (Copy ...)
(Trunc16to8 ...) => (Copy ...)
(Trunc32to8 ...) => (Copy ...)
(Trunc32to16 ...) => (Copy ...)
(Trunc64to8 ...) => (Copy ...)
(Trunc64to16 ...) => (Copy ...)
(Trunc64to32 ...) => (Copy ...)
// Lowering float <-> int
(Cvt32to32F ...) -> (CVTSL2SS ...)
(Cvt32to64F ...) -> (CVTSL2SD ...)
(Cvt64to32F ...) -> (CVTSQ2SS ...)
(Cvt64to64F ...) -> (CVTSQ2SD ...)
(Cvt32to32F ...) => (CVTSL2SS ...)
(Cvt32to64F ...) => (CVTSL2SD ...)
(Cvt64to32F ...) => (CVTSQ2SS ...)
(Cvt64to64F ...) => (CVTSQ2SD ...)
(Cvt32Fto32 ...) -> (CVTTSS2SL ...)
(Cvt32Fto64 ...) -> (CVTTSS2SQ ...)
(Cvt64Fto32 ...) -> (CVTTSD2SL ...)
(Cvt64Fto64 ...) -> (CVTTSD2SQ ...)
(Cvt32Fto32 ...) => (CVTTSS2SL ...)
(Cvt32Fto64 ...) => (CVTTSS2SQ ...)
(Cvt64Fto32 ...) => (CVTTSD2SL ...)
(Cvt64Fto64 ...) => (CVTTSD2SQ ...)
(Cvt32Fto64F ...) -> (CVTSS2SD ...)
(Cvt64Fto32F ...) -> (CVTSD2SS ...)
(Cvt32Fto64F ...) => (CVTSS2SD ...)
(Cvt64Fto32F ...) => (CVTSD2SS ...)
(Round(32|64)F ...) -> (Copy ...)
(Round(32|64)F ...) => (Copy ...)
(CvtBoolToUint8 ...) -> (Copy ...)
(CvtBoolToUint8 ...) => (Copy ...)
// Lowering shifts
// Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
// result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
(Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
(Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
(Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y)
(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLQ x y)
(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y)
(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y)
(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y)
(Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
(Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16])))
(Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8])))
(Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
(Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
(Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16])))
(Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8])))
(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y)
(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y)
(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y)
(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRB x y)
(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRQ x y)
(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRL x y)
(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRW x y)
(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRB x y)
// Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
// We implement this by setting the shift value to -1 (all ones) if the shift value is >= width.
(Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64])))))
(Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32])))))
(Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16])))))
(Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8])))))
(Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64])))))
(Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32])))))
(Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16])))))
(Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8])))))
(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y)
(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y)
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y)
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARB x y)
(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SARQ x y)
(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SARL x y)
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
// Lowering integer comparisons
(Less(64|32|16|8) x y) -> (SETL (CMP(Q|L|W|B) x y))
(Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y))
(Leq(64|32|16|8) x y) -> (SETLE (CMP(Q|L|W|B) x y))
(Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y))
(Eq(Ptr|64|32|16|8|B) x y) -> (SETEQ (CMP(Q|Q|L|W|B|B) x y))
(Neq(Ptr|64|32|16|8|B) x y) -> (SETNE (CMP(Q|Q|L|W|B|B) x y))
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
(Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y))
(Leq(64|32|16|8) x y) => (SETLE (CMP(Q|L|W|B) x y))
(Leq(64|32|16|8)U x y) => (SETBE (CMP(Q|L|W|B) x y))
(Eq(Ptr|64|32|16|8|B) x y) => (SETEQ (CMP(Q|Q|L|W|B|B) x y))
(Neq(Ptr|64|32|16|8|B) x y) => (SETNE (CMP(Q|Q|L|W|B|B) x y))
// Lowering floating point comparisons
// Note Go assembler gets UCOMISx operand order wrong, but it is right here
// and the operands are reversed when generating assembly language.
(Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y))
(Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y))
(Eq(32|64)F x y) => (SETEQF (UCOMIS(S|D) x y))
(Neq(32|64)F x y) => (SETNEF (UCOMIS(S|D) x y))
// Use SETGF/SETGEF with reversed operands to dodge NaN case.
(Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x))
(Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x))
(Less(32|64)F x y) => (SETGF (UCOMIS(S|D) y x))
(Leq(32|64)F x y) => (SETGEF (UCOMIS(S|D) y x))
// Lowering loads
(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
(Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
(Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
(Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem)
(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVQload ptr mem)
(Load <t> ptr mem) && is32BitInt(t) => (MOVLload ptr mem)
(Load <t> ptr mem) && is16BitInt(t) => (MOVWload ptr mem)
(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) => (MOVBload ptr mem)
(Load <t> ptr mem) && is32BitFloat(t) => (MOVSSload ptr mem)
(Load <t> ptr mem) && is64BitFloat(t) => (MOVSDload ptr mem)
// Lowering stores
// These more-specific FP versions of Store pattern should come first.
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (MOVSDstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (MOVSSstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 8 => (MOVQstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 4 => (MOVLstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 2 => (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
// Lowering moves
(Move [0] _ _ mem) -> mem
(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
(Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem)
(Move [16] dst src mem) && !config.useSSE ->
(Move [0] _ _ mem) => mem
(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
(Move [2] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
(Move [4] dst src mem) => (MOVLstore dst (MOVLload src mem) mem)
(Move [8] dst src mem) => (MOVQstore dst (MOVQload src mem) mem)
(Move [16] dst src mem) && config.useSSE => (MOVOstore dst (MOVOload src mem) mem)
(Move [16] dst src mem) && !config.useSSE =>
(MOVQstore [8] dst (MOVQload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem))
(Move [32] dst src mem) ->
(Move [32] dst src mem) =>
(Move [16]
(OffPtr <dst.Type> dst [16])
(OffPtr <src.Type> src [16])
(Move [16] dst src mem))
(Move [48] dst src mem) && config.useSSE ->
(Move [48] dst src mem) && config.useSSE =>
(Move [32]
(OffPtr <dst.Type> dst [16])
(OffPtr <src.Type> src [16])
(Move [16] dst src mem))
(Move [64] dst src mem) && config.useSSE ->
(Move [64] dst src mem) && config.useSSE =>
(Move [32]
(OffPtr <dst.Type> dst [32])
(OffPtr <src.Type> src [32])
(Move [32] dst src mem))
(Move [3] dst src mem) ->
(Move [3] dst src mem) =>
(MOVBstore [2] dst (MOVBload [2] src mem)
(MOVWstore dst (MOVWload src mem) mem))
(Move [5] dst src mem) ->
(Move [5] dst src mem) =>
(MOVBstore [4] dst (MOVBload [4] src mem)
(MOVLstore dst (MOVLload src mem) mem))
(Move [6] dst src mem) ->
(Move [6] dst src mem) =>
(MOVWstore [4] dst (MOVWload [4] src mem)
(MOVLstore dst (MOVLload src mem) mem))
(Move [7] dst src mem) ->
(Move [7] dst src mem) =>
(MOVLstore [3] dst (MOVLload [3] src mem)
(MOVLstore dst (MOVLload src mem) mem))
(Move [9] dst src mem) ->
(Move [9] dst src mem) =>
(MOVBstore [8] dst (MOVBload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem))
(Move [10] dst src mem) ->
(Move [10] dst src mem) =>
(MOVWstore [8] dst (MOVWload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem))
(Move [12] dst src mem) ->
(Move [12] dst src mem) =>
(MOVLstore [8] dst (MOVLload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem))
(Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 ->
(MOVQstore [s-8] dst (MOVQload [s-8] src mem)
(Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 =>
(MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem)
(MOVQstore dst (MOVQload src mem) mem))
// Adjust moves to be a multiple of 16 bytes.
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 <= 8 ->
&& s > 16 && s%16 != 0 && s%16 <= 8 =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(MOVQstore dst (MOVQload src mem) mem))
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE ->
&& s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(MOVOstore dst (MOVOload src mem) mem))
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE ->
&& s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
@ -308,7 +308,7 @@
// Medium copying uses a duff device.
(Move [s] dst src mem)
&& s > 64 && s <= 16*64 && s%16 == 0
&& !config.noDuffDevice && logLargeCopy(v, s) ->
&& !config.noDuffDevice && logLargeCopy(v, s) =>
(DUFFCOPY [14*(64-s/16)] dst src mem)
// 14 and 64 are magic constants. 14 is the number of bytes to encode:
// MOVUPS (SI), X0
@ -318,71 +318,71 @@
// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
// Large copying uses REP MOVSQ.
(Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 && logLargeCopy(v, s) ->
(Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 && logLargeCopy(v, s) =>
(REPMOVSQ dst src (MOVQconst [s/8]) mem)
// Lowering Zero instructions
(Zero [0] _ mem) -> mem
(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
(Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
(Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
(Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem)
(Zero [0] _ mem) => mem
(Zero [1] destptr mem) => (MOVBstoreconst [makeValAndOff32(0,0)] destptr mem)
(Zero [2] destptr mem) => (MOVWstoreconst [makeValAndOff32(0,0)] destptr mem)
(Zero [4] destptr mem) => (MOVLstoreconst [makeValAndOff32(0,0)] destptr mem)
(Zero [8] destptr mem) => (MOVQstoreconst [makeValAndOff32(0,0)] destptr mem)
(Zero [3] destptr mem) ->
(MOVBstoreconst [makeValAndOff(0,2)] destptr
(MOVWstoreconst [0] destptr mem))
(Zero [5] destptr mem) ->
(MOVBstoreconst [makeValAndOff(0,4)] destptr
(MOVLstoreconst [0] destptr mem))
(Zero [6] destptr mem) ->
(MOVWstoreconst [makeValAndOff(0,4)] destptr
(MOVLstoreconst [0] destptr mem))
(Zero [7] destptr mem) ->
(MOVLstoreconst [makeValAndOff(0,3)] destptr
(MOVLstoreconst [0] destptr mem))
(Zero [3] destptr mem) =>
(MOVBstoreconst [makeValAndOff32(0,2)] destptr
(MOVWstoreconst [makeValAndOff32(0,0)] destptr mem))
(Zero [5] destptr mem) =>
(MOVBstoreconst [makeValAndOff32(0,4)] destptr
(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
(Zero [6] destptr mem) =>
(MOVWstoreconst [makeValAndOff32(0,4)] destptr
(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
(Zero [7] destptr mem) =>
(MOVLstoreconst [makeValAndOff32(0,3)] destptr
(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE =>
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
(MOVQstoreconst [0] destptr mem))
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
// Zero small numbers of words directly.
(Zero [16] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))
(Zero [24] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem)))
(Zero [32] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,24)] destptr
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))))
(Zero [16] destptr mem) && !config.useSSE =>
(MOVQstoreconst [makeValAndOff32(0,8)] destptr
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
(Zero [24] destptr mem) && !config.useSSE =>
(MOVQstoreconst [makeValAndOff32(0,16)] destptr
(MOVQstoreconst [makeValAndOff32(0,8)] destptr
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem)))
(Zero [32] destptr mem) && !config.useSSE =>
(MOVQstoreconst [makeValAndOff32(0,24)] destptr
(MOVQstoreconst [makeValAndOff32(0,16)] destptr
(MOVQstoreconst [makeValAndOff32(0,8)] destptr
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))))
(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,s-8)] destptr
(MOVQstoreconst [0] destptr mem))
(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE =>
(MOVQstoreconst [makeValAndOff32(0,int32(s-8))] destptr
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
// Adjust zeros to be a multiple of 16 bytes.
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE =>
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE =>
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVQstoreconst [0] destptr mem))
(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
(Zero [16] destptr mem) && config.useSSE ->
(Zero [16] destptr mem) && config.useSSE =>
(MOVOstore destptr (MOVOconst [0]) mem)
(Zero [32] destptr mem) && config.useSSE ->
(Zero [32] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [48] destptr mem) && config.useSSE ->
(Zero [48] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem)))
(Zero [64] destptr mem) && config.useSSE ->
(Zero [64] destptr mem) && config.useSSE =>
(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
@ -390,13 +390,13 @@
// Medium zeroing uses a duff device.
(Zero [s] destptr mem)
&& s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice ->
&& s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice =>
(DUFFZERO [s] destptr (MOVOconst [0]) mem)
// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
&& (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
&& s%8 == 0 ->
&& s%8 == 0 =>
(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
// Lowering constants

View File

@ -1034,18 +1034,18 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int,
if !token.IsIdentifier(e.name) || rr.declared(e.name) {
switch e.field {
case "Aux":
rr.add(breakf("auxTo%s(%s.%s) != %s", strings.Title(e.dclType), v, e.field, e.name))
rr.add(breakf("auxTo%s(%s.%s) != %s", title(e.dclType), v, e.field, e.name))
case "AuxInt":
rr.add(breakf("auxIntTo%s(%s.%s) != %s", strings.Title(e.dclType), v, e.field, e.name))
rr.add(breakf("auxIntTo%s(%s.%s) != %s", title(e.dclType), v, e.field, e.name))
case "Type":
rr.add(breakf("%s.%s != %s", v, e.field, e.name))
}
} else {
switch e.field {
case "Aux":
rr.add(declf(e.name, "auxTo%s(%s.%s)", strings.Title(e.dclType), v, e.field))
rr.add(declf(e.name, "auxTo%s(%s.%s)", title(e.dclType), v, e.field))
case "AuxInt":
rr.add(declf(e.name, "auxIntTo%s(%s.%s)", strings.Title(e.dclType), v, e.field))
rr.add(declf(e.name, "auxIntTo%s(%s.%s)", title(e.dclType), v, e.field))
case "Type":
rr.add(declf(e.name, "%s.%s", v, e.field))
}
@ -1762,7 +1762,8 @@ func (op opData) auxIntType() string {
return "int32"
case "Int64":
return "int64"
//case "Int128":
case "Int128":
return "int128"
case "Float32":
return "float32"
case "Float64":
@ -1780,6 +1781,16 @@ func (op opData) auxIntType() string {
}
}
func title(s string) string {
if i := strings.Index(s, "."); i >= 0 {
s = s[i+1:]
}
return strings.Title(s)
}
func unTitle(s string) string {
if i := strings.Index(s, "."); i >= 0 {
s = s[i+1:]
}
return strings.ToLower(s[:1]) + s[1:]
}

View File

@ -192,6 +192,10 @@ func (x ValAndOff) add(off int64) int64 {
return makeValAndOff(x.Val(), x.Off()+off)
}
// int128 is a type that stores a 128-bit constant.
// The only allowed constant right now is 0, so we can cheat quite a bit.
type int128 int64
type BoundsKind uint8
const (

View File

@ -568,6 +568,12 @@ func auxIntToFloat64(i int64) float64 {
func auxIntToValAndOff(i int64) ValAndOff {
return ValAndOff(i)
}
func auxIntToInt128(x int64) int128 {
if x != 0 {
panic("nonzero int128 not allowed")
}
return 0
}
func boolToAuxInt(b bool) int64 {
if b {
@ -596,6 +602,12 @@ func float64ToAuxInt(f float64) int64 {
func valAndOffToAuxInt(v ValAndOff) int64 {
return int64(v)
}
func int128ToAuxInt(x int128) int64 {
if x != 0 {
panic("nonzero int128 not allowed")
}
return 0
}
func auxToString(i interface{}) string {
return i.(string)
@ -605,6 +617,9 @@ func auxToSym(i interface{}) Sym {
s, _ := i.(Sym)
return s
}
func auxToType(i interface{}) *types.Type {
return i.(*types.Type)
}
func stringToAux(s string) interface{} {
return s
@ -612,6 +627,9 @@ func stringToAux(s string) interface{} {
func symToAux(s Sym) interface{} {
return s
}
func typeToAux(t *types.Type) interface{} {
return t
}
// uaddOvf reports whether unsigned a+b would overflow.
func uaddOvf(a, b int64) bool {

File diff suppressed because it is too large Load Diff