mirror of
https://github.com/golang/go
synced 2024-11-19 11:14:47 -07:00
cmd/compile,math: improve code generation for math.Abs
Implement int reg <-> fp reg moves on amd64. If we see a load to int reg followed by an int->fp move, then we can just load to the fp reg instead. Same for stores. math.Abs is now: MOVQ "".x+8(SP), AX SHLQ $1, AX SHRQ $1, AX MOVQ AX, "".~r1+16(SP) math.Copysign is now: MOVQ "".x+8(SP), AX SHLQ $1, AX SHRQ $1, AX MOVQ "".y+16(SP), CX SHRQ $63, CX SHLQ $63, CX ORQ CX, AX MOVQ AX, "".~r2+24(SP) math.Float64bits is now: MOVSD "".x+8(SP), X0 MOVSD X0, "".~r1+16(SP) (it would be nicer to use a non-SSE reg for this, nothing is perfect) And due to the fix for #21440, the inlined version of these improve as well. name old time/op new time/op delta Abs 1.38ns ± 5% 0.89ns ±10% -35.54% (p=0.000 n=10+10) Copysign 1.56ns ± 7% 1.35ns ± 6% -13.77% (p=0.000 n=9+10) Fixes #13095 Change-Id: Ibd7f2792412a6668608780b0688a77062e1f1499 Reviewed-on: https://go-review.googlesource.com/58732 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
This commit is contained in:
parent
e11fd00629
commit
fb05948d9e
@ -678,6 +678,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
// Break false dependency on destination register.
|
// Break false dependency on destination register.
|
||||||
opregreg(s, x86.AXORPS, r, r)
|
opregreg(s, x86.AXORPS, r, r)
|
||||||
opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
|
opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
|
||||||
|
case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
|
||||||
|
p := s.Prog(x86.AMOVQ)
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = v.Args[0].Reg()
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = v.Reg()
|
||||||
|
case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
|
||||||
|
p := s.Prog(x86.AMOVL)
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = v.Args[0].Reg()
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = v.Reg()
|
||||||
case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
|
case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
|
||||||
ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
|
ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
|
||||||
ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
|
ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
|
||||||
|
@ -214,7 +214,7 @@ var allAsmTests = []*asmTests{
|
|||||||
{
|
{
|
||||||
arch: "amd64",
|
arch: "amd64",
|
||||||
os: "linux",
|
os: "linux",
|
||||||
imports: []string{"encoding/binary", "math/bits", "unsafe"},
|
imports: []string{"encoding/binary", "math", "math/bits", "unsafe"},
|
||||||
tests: linuxAMD64Tests,
|
tests: linuxAMD64Tests,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -990,6 +990,57 @@ var linuxAMD64Tests = []*asmTest{
|
|||||||
`,
|
`,
|
||||||
[]string{"TEXT\t.*, [$]0-8"},
|
[]string{"TEXT\t.*, [$]0-8"},
|
||||||
},
|
},
|
||||||
|
// math.Abs using integer registers
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return math.Abs(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,"},
|
||||||
|
},
|
||||||
|
// math.Copysign using integer registers
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x, y float64) float64 {
|
||||||
|
return math.Copysign(x, y)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,", "\tSHRQ\t[$]63,", "\tSHLQ\t[$]63,", "\tORQ\t"},
|
||||||
|
},
|
||||||
|
// int <-> fp moves
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x float64) uint64 {
|
||||||
|
return math.Float64bits(x+1) + 1
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tMOVQ\tX.*, [^X].*"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x float32) uint32 {
|
||||||
|
return math.Float32bits(x+1) + 1
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tMOVL\tX.*, [^X].*"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x uint64) float64 {
|
||||||
|
return math.Float64frombits(x+1) + 1
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tMOVQ\t[^X].*, X.*"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`
|
||||||
|
func $(x uint32) float32 {
|
||||||
|
return math.Float32frombits(x+1) + 1
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
[]string{"\tMOVL\t[^X].*, X.*"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var linux386Tests = []*asmTest{
|
var linux386Tests = []*asmTest{
|
||||||
|
@ -2456,3 +2456,47 @@
|
|||||||
(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
|
(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
|
||||||
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
|
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
|
||||||
(ADDLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
|
(ADDLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
|
||||||
|
|
||||||
|
// float <-> int register moves, with no conversion.
|
||||||
|
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
|
||||||
|
(MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val)
|
||||||
|
(MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val)
|
||||||
|
(MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val)
|
||||||
|
(MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val)
|
||||||
|
|
||||||
|
// Other load-like ops.
|
||||||
|
(ADDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y))
|
||||||
|
(ADDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y))
|
||||||
|
(SUBQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y))
|
||||||
|
(SUBLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y))
|
||||||
|
(ANDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y))
|
||||||
|
(ANDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y))
|
||||||
|
( ORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y))
|
||||||
|
( ORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y))
|
||||||
|
(XORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y))
|
||||||
|
(XORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y))
|
||||||
|
|
||||||
|
(ADDQconstmem [valOff] {sym} ptr (MOVSDstore [ValAndOff(valOff).Off()] {sym} ptr x _)) ->
|
||||||
|
(ADDQconst [ValAndOff(valOff).Val()] (MOVQf2i x))
|
||||||
|
(ADDLconstmem [valOff] {sym} ptr (MOVSSstore [ValAndOff(valOff).Off()] {sym} ptr x _)) ->
|
||||||
|
(ADDLconst [ValAndOff(valOff).Val()] (MOVLf2i x))
|
||||||
|
|
||||||
|
(ADDSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y))
|
||||||
|
(ADDSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y))
|
||||||
|
(SUBSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y))
|
||||||
|
(SUBSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y))
|
||||||
|
(MULSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y))
|
||||||
|
(MULSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y))
|
||||||
|
|
||||||
|
// Redirect stores to use the other register set.
|
||||||
|
(MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem)
|
||||||
|
(MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem)
|
||||||
|
(MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem)
|
||||||
|
(MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem)
|
||||||
|
|
||||||
|
// Load args directly into the register class where it will be used.
|
||||||
|
// We do this by just modifying the type of the Arg.
|
||||||
|
(MOVQf2i <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||||
|
(MOVLf2i <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||||
|
(MOVQi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||||
|
(MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
||||||
|
@ -386,6 +386,13 @@ func init() {
|
|||||||
{name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"}, // convert float64 to float32
|
{name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"}, // convert float64 to float32
|
||||||
{name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"}, // convert float32 to float64
|
{name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"}, // convert float32 to float64
|
||||||
|
|
||||||
|
// Move values between int and float registers, with no conversion.
|
||||||
|
// TODO: should we have generic versions of these?
|
||||||
|
{name: "MOVQi2f", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits from int to float reg
|
||||||
|
{name: "MOVQf2i", argLength: 1, reg: fpgp, typ: "UInt64"}, // move 64 bits from float to int reg
|
||||||
|
{name: "MOVLi2f", argLength: 1, reg: gpfp, typ: "Float32"}, // move 32 bits from int to float reg
|
||||||
|
{name: "MOVLf2i", argLength: 1, reg: fpgp, typ: "UInt32"}, // move 32 bits from float to int reg
|
||||||
|
|
||||||
{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
|
{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
|
||||||
|
|
||||||
{name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
|
{name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
|
||||||
|
@ -591,6 +591,10 @@ const (
|
|||||||
OpAMD64CVTSQ2SD
|
OpAMD64CVTSQ2SD
|
||||||
OpAMD64CVTSD2SS
|
OpAMD64CVTSD2SS
|
||||||
OpAMD64CVTSS2SD
|
OpAMD64CVTSS2SD
|
||||||
|
OpAMD64MOVQi2f
|
||||||
|
OpAMD64MOVQf2i
|
||||||
|
OpAMD64MOVLi2f
|
||||||
|
OpAMD64MOVLf2i
|
||||||
OpAMD64PXOR
|
OpAMD64PXOR
|
||||||
OpAMD64LEAQ
|
OpAMD64LEAQ
|
||||||
OpAMD64LEAQ1
|
OpAMD64LEAQ1
|
||||||
@ -7017,6 +7021,54 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "MOVQi2f",
|
||||||
|
argLen: 1,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "MOVQf2i",
|
||||||
|
argLen: 1,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "MOVLi2f",
|
||||||
|
argLen: 1,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "MOVLf2i",
|
||||||
|
argLen: 1,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PXOR",
|
name: "PXOR",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -10,14 +10,5 @@ package math
|
|||||||
// Abs(±Inf) = +Inf
|
// Abs(±Inf) = +Inf
|
||||||
// Abs(NaN) = NaN
|
// Abs(NaN) = NaN
|
||||||
func Abs(x float64) float64 {
|
func Abs(x float64) float64 {
|
||||||
// TODO: once golang.org/issue/13095 is fixed, change this to:
|
return Float64frombits(Float64bits(x) &^ (1 << 63))
|
||||||
// return Float64frombits(Float64bits(x) &^ (1 << 63))
|
|
||||||
// But for now, this generates better code and can also be inlined:
|
|
||||||
if x < 0 {
|
|
||||||
return -x
|
|
||||||
}
|
|
||||||
if x == 0 {
|
|
||||||
return 0 // return correctly abs(-0)
|
|
||||||
}
|
|
||||||
return x
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user