2010-03-09 13:49:24 -07:00
|
|
|
// Copyright 2010 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "runtime.h"
|
|
|
|
|
2010-06-18 16:46:00 -06:00
|
|
|
typedef struct Complex128 Complex128;
|
|
|
|
|
2010-03-09 13:49:24 -07:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·complex128div(Complex128 n, Complex128 d, Complex128 q)
|
2010-03-09 13:49:24 -07:00
|
|
|
{
|
2010-06-18 16:46:00 -06:00
|
|
|
int32 ninf, dinf, nnan, dnan;
|
2010-03-09 13:49:24 -07:00
|
|
|
float64 a, b, ratio, denom;
|
|
|
|
|
2010-06-18 16:46:00 -06:00
|
|
|
// Special cases as in C99.
|
runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.
Result obtained on Core i7 620M, Darwin/amd64:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 57 28 -50.78%
BenchmarkComplex128DivNisNaN 49 15 -68.90%
BenchmarkComplex128DivDisNaN 49 15 -67.88%
BenchmarkComplex128DivNisInf 40 12 -68.50%
BenchmarkComplex128DivDisInf 33 13 -61.06%
Result obtained on Core i7 620M, Darwin/386:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 89 50 -44.05%
BenchmarkComplex128DivNisNaN 307 802 +161.24%
BenchmarkComplex128DivDisNaN 309 788 +155.02%
BenchmarkComplex128DivNisInf 278 237 -14.75%
BenchmarkComplex128DivDisInf 46 22 -52.46%
Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 1557 465 -70.13%
BenchmarkComplex128DivNisNaN 1443 220 -84.75%
BenchmarkComplex128DivDisNaN 1481 218 -85.28%
BenchmarkComplex128DivNisInf 952 216 -77.31%
BenchmarkComplex128DivDisInf 861 231 -73.17%
The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.
R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 09:45:50 -06:00
|
|
|
ninf = n.real == runtime·posinf || n.real == runtime·neginf ||
|
|
|
|
n.imag == runtime·posinf || n.imag == runtime·neginf;
|
|
|
|
dinf = d.real == runtime·posinf || d.real == runtime·neginf ||
|
|
|
|
d.imag == runtime·posinf || d.imag == runtime·neginf;
|
2010-06-18 16:46:00 -06:00
|
|
|
|
runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.
Result obtained on Core i7 620M, Darwin/amd64:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 57 28 -50.78%
BenchmarkComplex128DivNisNaN 49 15 -68.90%
BenchmarkComplex128DivDisNaN 49 15 -67.88%
BenchmarkComplex128DivNisInf 40 12 -68.50%
BenchmarkComplex128DivDisInf 33 13 -61.06%
Result obtained on Core i7 620M, Darwin/386:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 89 50 -44.05%
BenchmarkComplex128DivNisNaN 307 802 +161.24%
BenchmarkComplex128DivDisNaN 309 788 +155.02%
BenchmarkComplex128DivNisInf 278 237 -14.75%
BenchmarkComplex128DivDisInf 46 22 -52.46%
Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 1557 465 -70.13%
BenchmarkComplex128DivNisNaN 1443 220 -84.75%
BenchmarkComplex128DivDisNaN 1481 218 -85.28%
BenchmarkComplex128DivNisInf 952 216 -77.31%
BenchmarkComplex128DivDisInf 861 231 -73.17%
The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.
R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 09:45:50 -06:00
|
|
|
nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag));
|
|
|
|
dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag));
|
2010-06-18 16:46:00 -06:00
|
|
|
|
|
|
|
if(nnan || dnan) {
|
runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.
Result obtained on Core i7 620M, Darwin/amd64:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 57 28 -50.78%
BenchmarkComplex128DivNisNaN 49 15 -68.90%
BenchmarkComplex128DivDisNaN 49 15 -67.88%
BenchmarkComplex128DivNisInf 40 12 -68.50%
BenchmarkComplex128DivDisInf 33 13 -61.06%
Result obtained on Core i7 620M, Darwin/386:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 89 50 -44.05%
BenchmarkComplex128DivNisNaN 307 802 +161.24%
BenchmarkComplex128DivDisNaN 309 788 +155.02%
BenchmarkComplex128DivNisInf 278 237 -14.75%
BenchmarkComplex128DivDisInf 46 22 -52.46%
Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 1557 465 -70.13%
BenchmarkComplex128DivNisNaN 1443 220 -84.75%
BenchmarkComplex128DivDisNaN 1481 218 -85.28%
BenchmarkComplex128DivNisInf 952 216 -77.31%
BenchmarkComplex128DivDisInf 861 231 -73.17%
The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.
R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 09:45:50 -06:00
|
|
|
q.real = runtime·nan;
|
|
|
|
q.imag = runtime·nan;
|
|
|
|
} else if(ninf && !dinf) {
|
|
|
|
q.real = runtime·posinf;
|
|
|
|
q.imag = runtime·posinf;
|
|
|
|
} else if(!ninf && dinf) {
|
2010-06-18 16:46:00 -06:00
|
|
|
q.real = 0;
|
|
|
|
q.imag = 0;
|
|
|
|
} else if(d.real == 0 && d.imag == 0) {
|
|
|
|
if(n.real == 0 && n.imag == 0) {
|
runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.
Result obtained on Core i7 620M, Darwin/amd64:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 57 28 -50.78%
BenchmarkComplex128DivNisNaN 49 15 -68.90%
BenchmarkComplex128DivDisNaN 49 15 -67.88%
BenchmarkComplex128DivNisInf 40 12 -68.50%
BenchmarkComplex128DivDisInf 33 13 -61.06%
Result obtained on Core i7 620M, Darwin/386:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 89 50 -44.05%
BenchmarkComplex128DivNisNaN 307 802 +161.24%
BenchmarkComplex128DivDisNaN 309 788 +155.02%
BenchmarkComplex128DivNisInf 278 237 -14.75%
BenchmarkComplex128DivDisInf 46 22 -52.46%
Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 1557 465 -70.13%
BenchmarkComplex128DivNisNaN 1443 220 -84.75%
BenchmarkComplex128DivDisNaN 1481 218 -85.28%
BenchmarkComplex128DivNisInf 952 216 -77.31%
BenchmarkComplex128DivDisInf 861 231 -73.17%
The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.
R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 09:45:50 -06:00
|
|
|
q.real = runtime·nan;
|
|
|
|
q.imag = runtime·nan;
|
2010-06-18 16:46:00 -06:00
|
|
|
} else {
|
runtime: inline several float64 routines to speed up complex128 division
Depends on CL 6197045.
Result obtained on Core i7 620M, Darwin/amd64:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 57 28 -50.78%
BenchmarkComplex128DivNisNaN 49 15 -68.90%
BenchmarkComplex128DivDisNaN 49 15 -67.88%
BenchmarkComplex128DivNisInf 40 12 -68.50%
BenchmarkComplex128DivDisInf 33 13 -61.06%
Result obtained on Core i7 620M, Darwin/386:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 89 50 -44.05%
BenchmarkComplex128DivNisNaN 307 802 +161.24%
BenchmarkComplex128DivDisNaN 309 788 +155.02%
BenchmarkComplex128DivNisInf 278 237 -14.75%
BenchmarkComplex128DivDisInf 46 22 -52.46%
Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark old ns/op new ns/op delta
BenchmarkComplex128DivNormal 1557 465 -70.13%
BenchmarkComplex128DivNisNaN 1443 220 -84.75%
BenchmarkComplex128DivDisNaN 1481 218 -85.28%
BenchmarkComplex128DivNisInf 952 216 -77.31%
BenchmarkComplex128DivDisInf 861 231 -73.17%
The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.
R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
2012-08-07 09:45:50 -06:00
|
|
|
q.real = runtime·posinf;
|
|
|
|
q.imag = runtime·posinf;
|
2010-06-18 16:46:00 -06:00
|
|
|
}
|
2010-03-09 13:49:24 -07:00
|
|
|
} else {
|
2010-06-18 16:46:00 -06:00
|
|
|
// Standard complex arithmetic, factored to avoid unnecessary overflow.
|
|
|
|
a = d.real;
|
|
|
|
if(a < 0)
|
|
|
|
a = -a;
|
|
|
|
b = d.imag;
|
|
|
|
if(b < 0)
|
|
|
|
b = -b;
|
|
|
|
if(a <= b) {
|
|
|
|
ratio = d.real/d.imag;
|
|
|
|
denom = d.real*ratio + d.imag;
|
|
|
|
q.real = (n.real*ratio + n.imag) / denom;
|
|
|
|
q.imag = (n.imag*ratio - n.real) / denom;
|
|
|
|
} else {
|
|
|
|
ratio = d.imag/d.real;
|
|
|
|
denom = d.imag*ratio + d.real;
|
|
|
|
q.real = (n.imag*ratio + n.real) / denom;
|
|
|
|
q.imag = (n.imag - n.real*ratio) / denom;
|
|
|
|
}
|
2010-03-09 13:49:24 -07:00
|
|
|
}
|
2010-06-18 16:46:00 -06:00
|
|
|
FLUSH(&q);
|
2010-03-09 13:49:24 -07:00
|
|
|
}
|