1
0
mirror of https://github.com/golang/go synced 2024-11-20 09:04:44 -07:00
go/src/pkg/runtime/complex.c
Shenghou Ma 0157c72d13 runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.

Result obtained on Core i7 620M, Darwin/amd64:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           57           28  -50.78%
BenchmarkComplex128DivNisNaN           49           15  -68.90%
BenchmarkComplex128DivDisNaN           49           15  -67.88%
BenchmarkComplex128DivNisInf           40           12  -68.50%
BenchmarkComplex128DivDisInf           33           13  -61.06%

Result obtained on Core i7 620M, Darwin/386:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           89           50  -44.05%
BenchmarkComplex128DivNisNaN          307          802  +161.24%
BenchmarkComplex128DivDisNaN          309          788  +155.02%
BenchmarkComplex128DivNisInf          278          237  -14.75%
BenchmarkComplex128DivDisInf           46           22  -52.46%

Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal         1557          465  -70.13%
BenchmarkComplex128DivNisNaN         1443          220  -84.75%
BenchmarkComplex128DivDisNaN         1481          218  -85.28%
BenchmarkComplex128DivNisInf          952          216  -77.31%
BenchmarkComplex128DivDisInf          861          231  -73.17%

The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.

R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 23:45:50 +08:00

63 lines
1.6 KiB
C

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "runtime.h"
typedef struct Complex128 Complex128;
void
runtime·complex128div(Complex128 n, Complex128 d, Complex128 q)
{
int32 ninf, dinf, nnan, dnan;
float64 a, b, ratio, denom;
// Special cases as in C99.
ninf = n.real == runtime·posinf || n.real == runtime·neginf ||
n.imag == runtime·posinf || n.imag == runtime·neginf;
dinf = d.real == runtime·posinf || d.real == runtime·neginf ||
d.imag == runtime·posinf || d.imag == runtime·neginf;
nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag));
dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag));
if(nnan || dnan) {
q.real = runtime·nan;
q.imag = runtime·nan;
} else if(ninf && !dinf) {
q.real = runtime·posinf;
q.imag = runtime·posinf;
} else if(!ninf && dinf) {
q.real = 0;
q.imag = 0;
} else if(d.real == 0 && d.imag == 0) {
if(n.real == 0 && n.imag == 0) {
q.real = runtime·nan;
q.imag = runtime·nan;
} else {
q.real = runtime·posinf;
q.imag = runtime·posinf;
}
} else {
// Standard complex arithmetic, factored to avoid unnecessary overflow.
a = d.real;
if(a < 0)
a = -a;
b = d.imag;
if(b < 0)
b = -b;
if(a <= b) {
ratio = d.real/d.imag;
denom = d.real*ratio + d.imag;
q.real = (n.real*ratio + n.imag) / denom;
q.imag = (n.imag*ratio - n.real) / denom;
} else {
ratio = d.imag/d.real;
denom = d.imag*ratio + d.real;
q.real = (n.imag*ratio + n.real) / denom;
q.imag = (n.imag - n.real*ratio) / denom;
}
}
FLUSH(&q);
}