mirror of
https://github.com/golang/go
synced 2024-11-19 12:04:43 -07:00
cmd/internal/gc: use hardware instruction for math.Sqrt (amd64/arm)
I first prototyped this change in Sept 2011, and I discarded it because it made no difference in the obvious benchmark loop. It still makes no difference in the obvious benchmark loop, but in a less obvious one, doing some extra computation around the calls to Sqrt, not making the call does have a significant effect. benchmark old ns/op new ns/op delta BenchmarkSqrt 4.56 4.57 +0.22% BenchmarkSqrtIndirect 4.56 4.56 +0.00% BenchmarkSqrtGo 69.4 69.4 +0.00% BenchmarkSqrtPrime 4417 3647 -17.43% This is a warmup for using hardware expansions for some calls to 1-line assembly routines in the runtime (for example getg). Change-Id: Ie66be23f8c09d0f7dc4ddd7ca8a93cfce28f55a4 Reviewed-on: https://go-review.googlesource.com/8356 Reviewed-by: Rob Pike <r@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
90c0fefe51
commit
92dba0d278
@ -1055,6 +1055,9 @@ func optoas(op int, t *gc.Type) int {
|
||||
|
||||
case gc.ODIV<<16 | gc.TFLOAT64:
|
||||
a = arm.ADIVD
|
||||
|
||||
case gc.OSQRT<<16 | gc.TFLOAT64:
|
||||
a = arm.ASQRTD
|
||||
}
|
||||
|
||||
return a
|
||||
|
@ -1101,6 +1101,7 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {
|
||||
return 0
|
||||
|
||||
case obj.ANOP, /* read,, write */
|
||||
arm.ASQRTD,
|
||||
arm.AMOVW,
|
||||
arm.AMOVF,
|
||||
arm.AMOVD,
|
||||
|
@ -70,16 +70,17 @@ var progtable = [arm.ALAST]obj.ProgInfo{
|
||||
arm.ATST: {gc.SizeL | gc.LeftRead | gc.RightRead, 0, 0, 0},
|
||||
|
||||
// Floating point.
|
||||
arm.AADDD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AADDF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ACMPD: {gc.SizeD | gc.LeftRead | gc.RightRead, 0, 0, 0},
|
||||
arm.ACMPF: {gc.SizeF | gc.LeftRead | gc.RightRead, 0, 0, 0},
|
||||
arm.ADIVD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ADIVF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AMULD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AMULF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ASUBD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ASUBF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AADDD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AADDF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ACMPD: {gc.SizeD | gc.LeftRead | gc.RightRead, 0, 0, 0},
|
||||
arm.ACMPF: {gc.SizeF | gc.LeftRead | gc.RightRead, 0, 0, 0},
|
||||
arm.ADIVD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ADIVF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AMULD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.AMULF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ASUBD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ASUBF: {gc.SizeF | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
arm.ASQRTD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
|
||||
// Conversions.
|
||||
arm.AMOVWD: {gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv, 0, 0, 0},
|
||||
|
@ -1131,6 +1131,9 @@ func optoas(op int, t *gc.Type) int {
|
||||
|
||||
case gc.ODIV<<16 | gc.TFLOAT64:
|
||||
a = x86.ADIVSD
|
||||
|
||||
case gc.OSQRT<<16 | gc.TFLOAT64:
|
||||
a = x86.ASQRTSD
|
||||
}
|
||||
|
||||
return a
|
||||
|
@ -204,6 +204,7 @@ var progtable = [x86.ALAST]obj.ProgInfo{
|
||||
x86.ASHRL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
|
||||
x86.ASHRQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
|
||||
x86.ASHRW: {gc.SizeW | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
|
||||
x86.ASQRTSD: {gc.SizeD | gc.LeftRead | RightRdwr, 0, 0, 0},
|
||||
x86.ASTOSB: {gc.OK, AX | DI, DI, 0},
|
||||
x86.ASTOSL: {gc.OK, AX | DI, DI, 0},
|
||||
x86.ASTOSQ: {gc.OK, AX | DI, DI, 0},
|
||||
|
@ -409,6 +409,15 @@ func Cgen(n *Node, res *Node) {
|
||||
cgen_norm(n, &n1, res)
|
||||
return
|
||||
|
||||
case OSQRT:
|
||||
var n1 Node
|
||||
Regalloc(&n1, nl.Type, res)
|
||||
Cgen(n.Left, &n1)
|
||||
Thearch.Gins(Thearch.Optoas(OSQRT, nl.Type), &n1, &n1)
|
||||
Thearch.Gmove(&n1, res)
|
||||
Regfree(&n1)
|
||||
return
|
||||
|
||||
// symmetric binary
|
||||
case OAND,
|
||||
OOR,
|
||||
|
@ -1002,6 +1002,9 @@ func gen(n *Node) {
|
||||
case ORETURN, ORETJMP:
|
||||
cgen_ret(n)
|
||||
|
||||
case OSQRT:
|
||||
cgen_discard(n.Left)
|
||||
|
||||
case OCHECKNIL:
|
||||
Cgen_checknil(n.Left)
|
||||
|
||||
|
@ -293,7 +293,7 @@ const (
|
||||
OREGISTER // a register, such as AX.
|
||||
OINDREG // offset plus indirect of a register, such as 8(SP).
|
||||
|
||||
// 386/amd64-specific opcodes
|
||||
// arch-specific opcodes
|
||||
OCMP // compare: ACMP.
|
||||
ODEC // decrement: ADEC.
|
||||
OINC // increment: AINC.
|
||||
@ -303,6 +303,7 @@ const (
|
||||
ORROTC // right rotate-carry: ARCR.
|
||||
ORETJMP // return to other function
|
||||
OPS // compare parity set (for x86 NaN check)
|
||||
OSQRT // sqrt(float64), on systems that have hw support
|
||||
|
||||
OEND
|
||||
)
|
||||
|
@ -622,6 +622,16 @@ func walkexpr(np **Node, init **NodeList) {
|
||||
walkexpr(&n.Left, init)
|
||||
walkexprlist(n.List, init)
|
||||
|
||||
if n.Left.Op == ONAME && n.Left.Sym.Name == "Sqrt" && n.Left.Sym.Pkg.Path == "math" {
|
||||
switch Thearch.Thechar {
|
||||
case '5', '6':
|
||||
n.Op = OSQRT
|
||||
n.Left = n.List.N
|
||||
n.List = nil
|
||||
goto ret
|
||||
}
|
||||
}
|
||||
|
||||
ll := ascompatte(int(n.Op), n, n.Isddd, getinarg(t), n.List, 0, init)
|
||||
n.List = reorder1(ll)
|
||||
goto ret
|
||||
|
@ -2977,15 +2977,56 @@ func BenchmarkSinh(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
var Global float64
|
||||
|
||||
func BenchmarkSqrt(b *testing.B) {
|
||||
x, y := 0.0, 10.0
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sqrt(10)
|
||||
x += Sqrt(y)
|
||||
}
|
||||
Global = x
|
||||
}
|
||||
|
||||
func BenchmarkSqrtIndirect(b *testing.B) {
|
||||
x, y := 0.0, 10.0
|
||||
f := Sqrt
|
||||
for i := 0; i < b.N; i++ {
|
||||
x += f(y)
|
||||
}
|
||||
Global = x
|
||||
}
|
||||
|
||||
func BenchmarkSqrtGo(b *testing.B) {
|
||||
x, y := 0.0, 10.0
|
||||
for i := 0; i < b.N; i++ {
|
||||
SqrtGo(10)
|
||||
x += SqrtGo(y)
|
||||
}
|
||||
Global = x
|
||||
}
|
||||
|
||||
func isPrime(i int) bool {
|
||||
// Yes, this is a dumb way to write this code,
|
||||
// but calling Sqrt repeatedly in this way demonstrates
|
||||
// the benefit of using a direct SQRT instruction on systems
|
||||
// that have one, whereas the obvious loop seems not to
|
||||
// demonstrate such a benefit.
|
||||
for j := 2; float64(j) <= Sqrt(float64(i)); j++ {
|
||||
if i%j == 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func BenchmarkSqrtPrime(b *testing.B) {
|
||||
any := false
|
||||
for i := 0; i < b.N; i++ {
|
||||
if isPrime(100003) {
|
||||
any = true
|
||||
}
|
||||
}
|
||||
if any {
|
||||
Global = 1
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,11 @@ package math
|
||||
// Sqrt(NaN) = NaN
|
||||
func Sqrt(x float64) float64
|
||||
|
||||
// Note: Sqrt is implemented in assembly on some systems.
|
||||
// Others have assembly stubs that jump to func sqrt below.
|
||||
// On systems where Sqrt is a single instruction, the compiler
|
||||
// may turn a direct call into a direct use of that instruction instead.
|
||||
|
||||
func sqrt(x float64) float64 {
|
||||
// special cases
|
||||
switch {
|
||||
|
Loading…
Reference in New Issue
Block a user