mirror of
https://github.com/golang/go
synced 2024-11-18 13:54:59 -07:00
cmd/internal/obj/arm: use new form of MOVW introduced in ARMv7
As discussion in issue #18293, "MOVW $Imm-16, Reg" was introduced in ARMv7. It directly encoded the 16-bit immediate into the instruction instead of put it in the constant pool. This patch makes the arm assembler choose this form of MOVW if available. Besides 4 bytes are saved in the constant pool, the go1 benchmark test also shows a slight improvement. name old time/op new time/op delta BinaryTree17-4 42.7s ± 1% 42.7s ± 1% ~ (p=0.304 n=50+50) Fannkuch11-4 24.8s ± 1% 24.8s ± 0% ~ (p=0.757 n=50+49) FmtFprintfEmpty-4 875ns ± 1% 873ns ± 2% ~ (p=0.066 n=44+46) FmtFprintfString-4 1.43µs ± 1% 1.45µs ± 1% +1.68% (p=0.000 n=44+44) FmtFprintfInt-4 1.52µs ± 1% 1.52µs ± 1% +0.26% (p=0.009 n=41+45) FmtFprintfIntInt-4 2.19µs ± 1% 2.20µs ± 1% +0.76% (p=0.000 n=43+46) FmtFprintfPrefixedInt-4 2.56µs ± 2% 2.53µs ± 1% -1.03% (p=0.000 n=45+44) FmtFprintfFloat-4 4.41µs ± 1% 4.39µs ± 1% -0.52% (p=0.000 n=44+44) FmtManyArgs-4 9.02µs ± 2% 9.04µs ± 1% +0.27% (p=0.000 n=46+44) GobDecode-4 106ms ± 1% 106ms ± 1% ~ (p=0.310 n=45+43) GobEncode-4 88.1ms ± 2% 88.0ms ± 2% ~ (p=0.648 n=49+50) Gzip-4 4.31s ± 1% 4.27s ± 1% -1.01% (p=0.000 n=50+50) Gunzip-4 618ms ± 1% 608ms ± 1% -1.65% (p=0.000 n=45+47) HTTPClientServer-4 689µs ± 6% 692µs ± 4% +0.52% (p=0.038 n=50+47) JSONEncode-4 282ms ± 2% 280ms ± 1% -0.75% (p=0.000 n=46+43) JSONDecode-4 945ms ± 2% 940ms ± 1% -0.47% (p=0.000 n=47+47) Mandelbrot200-4 49.4ms ± 1% 49.3ms ± 1% ~ (p=0.163 n=45+45) GoParse-4 46.0ms ± 3% 45.5ms ± 2% -0.95% (p=0.000 n=49+40) RegexpMatchEasy0_32-4 1.29µs ± 1% 1.28µs ± 1% -0.14% (p=0.005 n=38+45) RegexpMatchEasy0_1K-4 7.92µs ± 8% 7.75µs ± 6% -2.12% (p=0.000 n=47+50) RegexpMatchEasy1_32-4 1.31µs ± 1% 1.31µs ± 0% ~ (p=0.282 n=45+48) RegexpMatchEasy1_1K-4 10.4µs ± 5% 10.4µs ± 3% ~ (p=0.771 n=50+49) RegexpMatchMedium_32-4 2.06µs ± 1% 2.07µs ± 1% +0.35% (p=0.001 n=44+49) RegexpMatchMedium_1K-4 533µs ± 1% 532µs ± 1% ~ (p=0.710 n=43+47) RegexpMatchHard_32-4 29.7µs ± 1% 29.6µs ± 1% -0.34% (p=0.002 n=43+46) RegexpMatchHard_1K-4 893µs ± 2% 885µs ± 1% -0.85% (p=0.000 n=50+45) Revcomp-4 85.6ms ± 4% 85.5ms ± 2% ~ (p=0.683 n=50+50) Template-4 1.05s ± 3% 1.04s ± 1% -1.06% (p=0.000 n=50+44) TimeParse-4 7.19µs ± 2% 7.11µs ± 2% -1.10% (p=0.000 n=48+46) TimeFormat-4 13.4µs ± 1% 13.5µs ± 1% ~ (p=0.056 n=46+49) [Geo mean] 747µs 745µs -0.28% name old speed new speed delta GobDecode-4 7.23MB/s ± 1% 7.22MB/s ± 1% ~ (p=0.062 n=45+39) GobEncode-4 8.71MB/s ± 2% 8.72MB/s ± 2% ~ (p=0.656 n=49+50) Gzip-4 4.50MB/s ± 1% 4.55MB/s ± 1% +1.03% (p=0.000 n=50+50) Gunzip-4 31.4MB/s ± 1% 31.9MB/s ± 1% +1.67% (p=0.000 n=45+47) JSONEncode-4 6.89MB/s ± 2% 6.94MB/s ± 1% +0.76% (p=0.000 n=46+43) JSONDecode-4 2.05MB/s ± 2% 2.06MB/s ± 2% +0.32% (p=0.017 n=47+50) GoParse-4 1.26MB/s ± 3% 1.27MB/s ± 1% +0.68% (p=0.000 n=50+48) RegexpMatchEasy0_32-4 24.9MB/s ± 1% 24.9MB/s ± 1% +0.13% (p=0.004 n=38+45) RegexpMatchEasy0_1K-4 129MB/s ± 7% 132MB/s ± 6% +2.34% (p=0.000 n=46+50) RegexpMatchEasy1_32-4 24.5MB/s ± 1% 24.4MB/s ± 1% ~ (p=0.252 n=45+48) RegexpMatchEasy1_1K-4 98.8MB/s ± 4% 98.7MB/s ± 3% ~ (p=0.771 n=50+49) RegexpMatchMedium_32-4 485kB/s ± 3% 480kB/s ± 0% -0.95% (p=0.000 n=50+38) RegexpMatchMedium_1K-4 1.92MB/s ± 1% 1.92MB/s ± 1% ~ (p=0.129 n=43+47) RegexpMatchHard_32-4 1.08MB/s ± 2% 1.08MB/s ± 1% +0.38% (p=0.017 n=46+46) RegexpMatchHard_1K-4 1.15MB/s ± 2% 1.16MB/s ± 1% +0.67% (p=0.001 n=50+49) Revcomp-4 29.7MB/s ± 4% 29.7MB/s ± 2% ~ (p=0.682 n=50+50) Template-4 1.85MB/s ± 3% 1.87MB/s ± 1% +1.04% (p=0.000 n=50+44) [Geo mean] 6.56MB/s 6.60MB/s +0.47% Change-Id: Ic2cca90133c27a08d9f1a23c65b0eed5fbd02684 Reviewed-on: https://go-review.googlesource.com/41190 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
34ee8ec193
commit
a041806335
@ -123,12 +123,17 @@ var optab = []Optab{
|
||||
{AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0},
|
||||
{AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0},
|
||||
{AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
|
||||
{AMOVW, C_SCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
|
||||
{AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
|
||||
{AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4},
|
||||
{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||
{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
||||
{AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
|
||||
{AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||
{AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
|
||||
{ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
|
||||
{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
|
||||
{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||
{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
|
||||
@ -1123,6 +1128,9 @@ func (c *ctxt5) aclass(a *obj.Addr) int {
|
||||
if immrot(^uint32(c.instoffset)) != 0 {
|
||||
return C_NCON
|
||||
}
|
||||
if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 {
|
||||
return C_SCON
|
||||
}
|
||||
return C_LCON
|
||||
|
||||
case obj.NAME_EXTERN,
|
||||
@ -1217,7 +1225,7 @@ func cmp(a int, b int) bool {
|
||||
}
|
||||
switch a {
|
||||
case C_LCON:
|
||||
if b == C_RCON || b == C_NCON {
|
||||
if b == C_RCON || b == C_NCON || b == C_SCON {
|
||||
return true
|
||||
}
|
||||
|
||||
@ -1674,14 +1682,22 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
}
|
||||
|
||||
case 12: /* movw $lcon, reg */
|
||||
o1 = c.omvl(p, &p.From, int(p.To.Reg))
|
||||
if o.a1 == C_SCON {
|
||||
o1 = c.omvs(p, &p.From, int(p.To.Reg))
|
||||
} else {
|
||||
o1 = c.omvl(p, &p.From, int(p.To.Reg))
|
||||
}
|
||||
|
||||
if o.flag&LPCREL != 0 {
|
||||
o2 = c.oprrr(p, AADD, int(p.Scond)) | (uint32(p.To.Reg)&15)<<0 | (REGPC&15)<<16 | (uint32(p.To.Reg)&15)<<12
|
||||
}
|
||||
|
||||
case 13: /* op $lcon, [R], R */
|
||||
o1 = c.omvl(p, &p.From, REGTMP)
|
||||
if o.a1 == C_SCON {
|
||||
o1 = c.omvs(p, &p.From, REGTMP)
|
||||
} else {
|
||||
o1 = c.omvl(p, &p.From, REGTMP)
|
||||
}
|
||||
|
||||
if o1 == 0 {
|
||||
break
|
||||
@ -2827,6 +2843,17 @@ func (c *ctxt5) ofsr(a obj.As, r int, v int32, b int, sc int, p *obj.Prog) uint3
|
||||
return o
|
||||
}
|
||||
|
||||
// MOVW $"lower 16-bit", Reg
|
||||
func (c *ctxt5) omvs(p *obj.Prog, a *obj.Addr, dr int) uint32 {
|
||||
var o1 uint32
|
||||
o1 = ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28
|
||||
o1 |= 0x30 << 20
|
||||
o1 |= (uint32(dr) & 15) << 12
|
||||
o1 |= uint32(a.Offset) & 0x0fff
|
||||
o1 |= (uint32(a.Offset) & 0xf000) << 4
|
||||
return o1
|
||||
}
|
||||
|
||||
func (c *ctxt5) omvl(p *obj.Prog, a *obj.Addr, dr int) uint32 {
|
||||
var o1 uint32
|
||||
if p.Pcond == nil {
|
||||
|
Loading…
Reference in New Issue
Block a user