mirror of
https://github.com/golang/go
synced 2024-11-18 02:04:45 -07:00
cmd/internal/obj/arm64: encode float constants into FMOVS/FMOVD instructions
Current assembler rewrites float constants to values stored in memory except 0.0, which is not performant. This patch uses the FMOVS/FMOVD instructions to move some available floating-point immediate constants into SIMD&FP destination registers. These available constants can be encoded into FMOVS/FMOVD instructions, checked by the chipfloat7() function. go1 benchmark results. name old time/op new time/op delta BinaryTree17-8 6.27s ± 1% 6.27s ± 1% ~ (p=0.762 n=10+8) Fannkuch11-8 5.42s ± 1% 5.38s ± 0% -0.63% (p=0.000 n=10+10) FmtFprintfEmpty-8 92.9ns ± 1% 93.4ns ± 0% +0.47% (p=0.004 n=9+8) FmtFprintfString-8 169ns ± 2% 170ns ± 4% ~ (p=0.378 n=10+10) FmtFprintfInt-8 197ns ± 1% 196ns ± 1% -0.77% (p=0.009 n=10+9) FmtFprintfIntInt-8 284ns ± 1% 286ns ± 1% ~ (p=0.051 n=10+10) FmtFprintfPrefixedInt-8 419ns ± 0% 422ns ± 1% +0.69% (p=0.038 n=6+10) FmtFprintfFloat-8 458ns ± 0% 463ns ± 1% +1.14% (p=0.000 n=10+10) FmtManyArgs-8 1.35µs ± 2% 1.36µs ± 1% +0.91% (p=0.043 n=10+10) GobDecode-8 16.0ms ± 2% 15.5ms ± 1% -3.39% (p=0.000 n=10+10) GobEncode-8 11.9ms ± 3% 11.4ms ± 1% -3.98% (p=0.000 n=10+9) Gzip-8 621ms ± 0% 625ms ± 0% +0.59% (p=0.000 n=9+10) Gunzip-8 74.0ms ± 1% 74.3ms ± 0% ~ (p=0.059 n=9+8) HTTPClientServer-8 116µs ± 1% 116µs ± 1% ~ (p=0.165 n=10+10) JSONEncode-8 29.3ms ± 1% 29.5ms ± 0% +0.72% (p=0.001 n=10+10) JSONDecode-8 145ms ± 1% 148ms ± 2% +2.06% (p=0.000 n=10+10) Mandelbrot200-8 9.67ms ± 0% 9.48ms ± 1% -1.92% (p=0.000 n=8+10) GoParse-8 7.55ms ± 0% 7.60ms ± 0% +0.57% (p=0.000 n=9+10) RegexpMatchEasy0_32-8 234ns ± 0% 210ns ± 0% -10.13% (p=0.000 n=8+10) RegexpMatchEasy0_1K-8 753ns ± 1% 729ns ± 0% -3.17% (p=0.000 n=10+8) RegexpMatchEasy1_32-8 225ns ± 0% 224ns ± 0% -0.44% (p=0.000 n=9+9) RegexpMatchEasy1_1K-8 1.03µs ± 0% 1.04µs ± 1% +1.29% (p=0.000 n=10+10) RegexpMatchMedium_32-8 320ns ± 3% 296ns ± 6% -7.50% (p=0.000 n=10+10) RegexpMatchMedium_1K-8 77.0µs ± 5% 73.6µs ± 1% ~ (p=0.393 n=10+10) RegexpMatchHard_32-8 3.93µs ± 0% 3.89µs ± 1% -0.95% (p=0.000 n=10+9) RegexpMatchHard_1K-8 120µs ± 5% 115µs ± 1% ~ (p=0.739 n=10+10) Revcomp-8 1.07s ± 0% 1.08s ± 1% +0.63% (p=0.000 n=10+9) Template-8 165ms ± 1% 163ms ± 1% -1.05% (p=0.001 n=8+10) TimeParse-8 751ns ± 1% 749ns ± 1% ~ (p=0.209 n=10+10) TimeFormat-8 759ns ± 1% 751ns ± 1% -0.96% (p=0.001 n=10+10) name old speed new speed delta GobDecode-8 48.0MB/s ± 2% 49.6MB/s ± 1% +3.50% (p=0.000 n=10+10) GobEncode-8 64.5MB/s ± 3% 67.1MB/s ± 1% +4.08% (p=0.000 n=10+9) Gzip-8 31.2MB/s ± 0% 31.1MB/s ± 0% -0.55% (p=0.000 n=9+8) Gunzip-8 262MB/s ± 1% 261MB/s ± 0% ~ (p=0.059 n=9+8) JSONEncode-8 66.3MB/s ± 1% 65.8MB/s ± 0% -0.72% (p=0.001 n=10+10) JSONDecode-8 13.4MB/s ± 1% 13.2MB/s ± 1% -2.02% (p=0.000 n=10+10) GoParse-8 7.67MB/s ± 0% 7.63MB/s ± 0% -0.57% (p=0.000 n=9+10) RegexpMatchEasy0_32-8 136MB/s ± 0% 152MB/s ± 0% +11.45% (p=0.000 n=10+10) RegexpMatchEasy0_1K-8 1.36GB/s ± 1% 1.40GB/s ± 0% +3.25% (p=0.000 n=10+8) RegexpMatchEasy1_32-8 142MB/s ± 0% 143MB/s ± 0% +0.35% (p=0.000 n=10+9) RegexpMatchEasy1_1K-8 992MB/s ± 0% 980MB/s ± 1% -1.27% (p=0.000 n=10+10) RegexpMatchMedium_32-8 3.12MB/s ± 3% 3.38MB/s ± 6% +8.17% (p=0.000 n=10+10) RegexpMatchMedium_1K-8 13.3MB/s ± 5% 13.9MB/s ± 1% ~ (p=0.362 n=10+10) RegexpMatchHard_32-8 8.14MB/s ± 0% 8.21MB/s ± 1% +0.95% (p=0.000 n=10+9) RegexpMatchHard_1K-8 8.54MB/s ± 5% 8.90MB/s ± 1% ~ (p=0.636 n=10+10) Revcomp-8 238MB/s ± 0% 236MB/s ± 1% -0.63% (p=0.000 n=10+9) Template-8 11.8MB/s ± 1% 11.9MB/s ± 1% +1.07% (p=0.001 n=8+10) Change-Id: I57b372d8dcd47e6aec39893843b20385d5d9c37e Reviewed-on: https://go-review.googlesource.com/129555 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
81957dd58e
commit
c430adf136
10
src/cmd/asm/internal/asm/testdata/arm64.s
vendored
10
src/cmd/asm/internal/asm/testdata/arm64.s
vendored
@ -163,6 +163,12 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
||||
MOVB (R29)(R30<<0), R14 // ae7bbe38
|
||||
MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38
|
||||
MOVB R4, (R2)(R6.SXTX) // 44e82638
|
||||
FMOVS $(4.0), F0 // 0010221e
|
||||
FMOVD $(4.0), F0 // 0010621e
|
||||
FMOVS $(0.265625), F1 // 01302a1e
|
||||
FMOVD $(0.1796875), F2 // 02f0681e
|
||||
FMOVS $(0.96875), F3 // 03f02d1e
|
||||
FMOVD $(28.0), F4 // 0490671e
|
||||
|
||||
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
|
||||
FMOVS (R2)(R6<<2), F4 // 447866bc
|
||||
@ -479,14 +485,14 @@ again:
|
||||
// {
|
||||
// outcode($1, &$2, NREG, &$4);
|
||||
// }
|
||||
FADDD $0.5, F1 // FADDD $(0.5), F1
|
||||
// FADDD $0.5, F1 // FADDD $(0.5), F1
|
||||
FADDD F1, F2
|
||||
|
||||
// LTYPEK frcon ',' freg ',' freg
|
||||
// {
|
||||
// outcode($1, &$2, $4.reg, &$6);
|
||||
// }
|
||||
FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2
|
||||
// FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2
|
||||
FADDD F1, F2, F3
|
||||
|
||||
//
|
||||
|
@ -219,8 +219,6 @@ var optab = []Optab{
|
||||
|
||||
{AFADDS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFADDS, C_FREG, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFADDS, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFADDS, C_FCON, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFMSUBD, C_FREG, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0},
|
||||
{AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0},
|
||||
{AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0},
|
||||
@ -340,9 +338,9 @@ var optab = []Optab{
|
||||
{AFMOVS, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0},
|
||||
{AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFMOVS, C_REG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_REG, 29, 4, 0, 0, 0},
|
||||
@ -2461,6 +2459,9 @@ func buildop(ctxt *obj.Link) {
|
||||
}
|
||||
}
|
||||
|
||||
// chipfloat7() checks if the immediate constants available in FMOVS/FMOVD instructions.
|
||||
// For details of the range of constants available, see
|
||||
// http://infocenter.arm.com/help/topic/com.arm.doc.dui0473m/dom1359731199385.html.
|
||||
func (c *ctxt7) chipfloat7(e float64) int {
|
||||
ei := math.Float64bits(e)
|
||||
l := uint32(int32(ei))
|
||||
@ -3486,19 +3487,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
|
||||
case 54: /* floating point arith */
|
||||
o1 = c.oprrr(p, p.As)
|
||||
|
||||
var rf int
|
||||
if p.From.Type == obj.TYPE_CONST {
|
||||
rf = c.chipfloat7(p.From.Val.(float64))
|
||||
if rf < 0 || true {
|
||||
c.ctxt.Diag("invalid floating-point immediate\n%v", p)
|
||||
rf = 0
|
||||
}
|
||||
|
||||
rf |= (1 << 3)
|
||||
} else {
|
||||
rf = int(p.From.Reg)
|
||||
}
|
||||
rf := int(p.From.Reg)
|
||||
rt := int(p.To.Reg)
|
||||
r := int(p.Reg)
|
||||
if (o1&(0x1F<<24)) == (0x1E<<24) && (o1&(1<<11)) == 0 { /* monadic */
|
||||
@ -3509,6 +3498,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
}
|
||||
o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31)
|
||||
|
||||
case 55: /* floating-point constant */
|
||||
var rf int
|
||||
o1 = 0xf<<25 | 1<<21 | 1<<12
|
||||
rf = c.chipfloat7(p.From.Val.(float64))
|
||||
if rf < 0 {
|
||||
c.ctxt.Diag("invalid floating-point immediate\n%v", p)
|
||||
}
|
||||
if p.As == AFMOVD {
|
||||
o1 |= 1 << 22
|
||||
}
|
||||
o1 |= (uint32(rf&0xff) << 13) | uint32(p.To.Reg&31)
|
||||
|
||||
case 56: /* floating point compare */
|
||||
o1 = c.oprrr(p, p.As)
|
||||
|
||||
|
@ -254,7 +254,11 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
||||
switch p.As {
|
||||
case AFMOVS:
|
||||
if p.From.Type == obj.TYPE_FCONST {
|
||||
f32 := float32(p.From.Val.(float64))
|
||||
f64 := p.From.Val.(float64)
|
||||
f32 := float32(f64)
|
||||
if c.chipfloat7(f64) > 0 {
|
||||
break
|
||||
}
|
||||
if math.Float32bits(f32) == 0 {
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = REGZERO
|
||||
@ -269,6 +273,9 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
||||
case AFMOVD:
|
||||
if p.From.Type == obj.TYPE_FCONST {
|
||||
f64 := p.From.Val.(float64)
|
||||
if c.chipfloat7(f64) > 0 {
|
||||
break
|
||||
}
|
||||
if math.Float64bits(f64) == 0 {
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = REGZERO
|
||||
|
Loading…
Reference in New Issue
Block a user