1
0
mirror of https://github.com/golang/go synced 2024-10-04 20:31:22 -06:00
go/src/cmd/gc/cplx.c

487 lines
8.1 KiB
C
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <libc.h>
#include "gg.h"
static void subnode(Node *nr, Node *ni, Node *nc);
static void minus(Node *nl, Node *res);
void complexminus(Node*, Node*);
void complexadd(int op, Node*, Node*, Node*);
void complexmul(Node*, Node*, Node*);
#define CASE(a,b) (((a)<<16)|((b)<<0))
static int
overlap(Node *f, Node *t)
{
// check whether f and t could be overlapping stack references.
// not exact, because it's hard to check for the stack register
// in portable code. close enough: worst case we will allocate
// an extra temporary and the registerizer will clean it up.
return f->op == OINDREG &&
t->op == OINDREG &&
f->xoffset+f->type->width >= t->xoffset &&
t->xoffset+t->type->width >= f->xoffset;
}
/*
* generate:
* res = n;
* simplifies and calls gmove.
*/
void
complexmove(Node *f, Node *t)
{
int ft, tt;
Node n1, n2, n3, n4;
if(debug['g']) {
dump("\ncomplexmove-f", f);
dump("complexmove-t", t);
}
if(!t->addable)
fatal("complexmove: to not addable");
ft = simsimtype(f->type);
tt = simsimtype(t->type);
switch(CASE(ft,tt)) {
default:
fatal("complexmove: unknown conversion: %T -> %T\n",
f->type, t->type);
case CASE(TCOMPLEX64,TCOMPLEX64):
case CASE(TCOMPLEX64,TCOMPLEX128):
case CASE(TCOMPLEX128,TCOMPLEX64):
case CASE(TCOMPLEX128,TCOMPLEX128):
// complex to complex move/convert.
// make f addable.
// also use temporary if possible stack overlap.
if(!f->addable || overlap(f, t)) {
tempname(&n1, f->type);
complexmove(f, &n1);
f = &n1;
}
subnode(&n1, &n2, f);
subnode(&n3, &n4, t);
cgen(&n1, &n3);
cgen(&n2, &n4);
break;
}
}
int
complexop(Node *n, Node *res)
{
if(n != N && n->type != T)
if(iscomplex[n->type->etype]) {
goto maybe;
}
if(res != N && res->type != T)
if(iscomplex[res->type->etype]) {
goto maybe;
}
if(n->op == OREAL || n->op == OIMAG)
goto yes;
goto no;
maybe:
switch(n->op) {
case OCONV: // implemented ops
case OADD:
case OSUB:
case OMUL:
case OMINUS:
case OCOMPLEX:
case OREAL:
case OIMAG:
goto yes;
case ODOT:
case ODOTPTR:
case OINDEX:
case OIND:
case ONAME:
goto yes;
}
no:
//dump("\ncomplex-no", n);
return 0;
yes:
//dump("\ncomplex-yes", n);
return 1;
}
void
complexgen(Node *n, Node *res)
{
Node *nl, *nr;
Node tnl, tnr;
Node n1, n2, tmp;
int tl, tr;
if(debug['g']) {
dump("\ncomplexgen-n", n);
dump("complexgen-res", res);
}
while(n->op == OCONVNOP)
n = n->left;
// pick off float/complex opcodes
switch(n->op) {
case OCOMPLEX:
if(res->addable) {
subnode(&n1, &n2, res);
tempname(&tmp, n1.type);
cgen(n->left, &tmp);
cgen(n->right, &n2);
cgen(&tmp, &n1);
return;
}
break;
case OREAL:
case OIMAG:
nl = n->left;
if(!nl->addable) {
tempname(&tmp, nl->type);
complexgen(nl, &tmp);
nl = &tmp;
}
subnode(&n1, &n2, nl);
if(n->op == OREAL) {
cgen(&n1, res);
return;
}
cgen(&n2, res);
return;
}
// perform conversion from n to res
tl = simsimtype(res->type);
tl = cplxsubtype(tl);
tr = simsimtype(n->type);
tr = cplxsubtype(tr);
if(tl != tr) {
if(!n->addable) {
tempname(&n1, n->type);
complexmove(n, &n1);
n = &n1;
}
complexmove(n, res);
return;
}
if(!res->addable) {
igen(res, &n1, N);
cgen(n, &n1);
regfree(&n1);
return;
}
if(n->addable) {
complexmove(n, res);
return;
}
switch(n->op) {
default:
dump("complexgen: unknown op", n);
fatal("complexgen: unknown op %O", n->op);
case ODOT:
case ODOTPTR:
case OINDEX:
case OIND:
case ONAME: // PHEAP or PPARAMREF var
case OCALLFUNC:
case OCALLMETH:
case OCALLINTER:
igen(n, &n1, res);
complexmove(&n1, res);
regfree(&n1);
return;
case OCONV:
case OADD:
case OSUB:
case OMUL:
case OMINUS:
case OCOMPLEX:
case OREAL:
case OIMAG:
break;
}
nl = n->left;
if(nl == N)
return;
nr = n->right;
// make both sides addable in ullman order
if(nr != N) {
if(nl->ullman > nr->ullman && !nl->addable) {
tempname(&tnl, nl->type);
cgen(nl, &tnl);
nl = &tnl;
}
if(!nr->addable) {
tempname(&tnr, nr->type);
cgen(nr, &tnr);
nr = &tnr;
}
}
if(!nl->addable) {
tempname(&tnl, nl->type);
cgen(nl, &tnl);
nl = &tnl;
}
switch(n->op) {
default:
fatal("complexgen: unknown op %O", n->op);
break;
case OCONV:
complexmove(nl, res);
break;
case OMINUS:
complexminus(nl, res);
break;
case OADD:
case OSUB:
complexadd(n->op, nl, nr, res);
break;
case OMUL:
complexmul(nl, nr, res);
break;
}
}
void
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
complexbool(int op, Node *nl, Node *nr, int true, int likely, Prog *to)
{
Node tnl, tnr;
Node n1, n2, n3, n4;
Node na, nb, nc;
// make both sides addable in ullman order
if(nr != N) {
if(nl->ullman > nr->ullman && !nl->addable) {
tempname(&tnl, nl->type);
cgen(nl, &tnl);
nl = &tnl;
}
if(!nr->addable) {
tempname(&tnr, nr->type);
cgen(nr, &tnr);
nr = &tnr;
}
}
if(!nl->addable) {
tempname(&tnl, nl->type);
cgen(nl, &tnl);
nl = &tnl;
}
// build tree
// real(l) == real(r) && imag(l) == imag(r)
subnode(&n1, &n2, nl);
subnode(&n3, &n4, nr);
memset(&na, 0, sizeof(na));
na.op = OANDAND;
na.left = &nb;
na.right = &nc;
na.type = types[TBOOL];
memset(&nb, 0, sizeof(na));
nb.op = OEQ;
nb.left = &n1;
nb.right = &n3;
nb.type = types[TBOOL];
memset(&nc, 0, sizeof(na));
nc.op = OEQ;
nc.left = &n2;
nc.right = &n4;
nc.type = types[TBOOL];
if(op == ONE)
true = !true;
cmd/gc: contiguous loop layout Drop expecttaken function in favor of extra argument to gbranch and bgen. Mark loop condition as likely to be true, so that loops are generated inline. The main benefit here is contiguous code when trying to read the generated assembly. It has only minor effects on the timing, and they mostly cancel the minor effects that aligning function entry points had. One exception: both changes made Fannkuch faster. Compared to before CL 6244066 (before aligned functions) benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4222117400 4201958800 -0.48% BenchmarkFannkuch11 3462631800 3215908600 -7.13% BenchmarkGobDecode 20887622 20899164 +0.06% BenchmarkGobEncode 9548772 9439083 -1.15% BenchmarkGzip 151687 152060 +0.25% BenchmarkGunzip 8742 8711 -0.35% BenchmarkJSONEncode 62730560 62686700 -0.07% BenchmarkJSONDecode 252569180 252368960 -0.08% BenchmarkMandelbrot200 5267599 5252531 -0.29% BenchmarkRevcomp25M 980813500 985248400 +0.45% BenchmarkTemplate 361259100 357414680 -1.06% Compared to tip (aligned functions): benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4140739800 4201958800 +1.48% BenchmarkFannkuch11 3259914400 3215908600 -1.35% BenchmarkGobDecode 20620222 20899164 +1.35% BenchmarkGobEncode 9384886 9439083 +0.58% BenchmarkGzip 150333 152060 +1.15% BenchmarkGunzip 8741 8711 -0.34% BenchmarkJSONEncode 65210990 62686700 -3.87% BenchmarkJSONDecode 249394860 252368960 +1.19% BenchmarkMandelbrot200 5273394 5252531 -0.40% BenchmarkRevcomp25M 996013800 985248400 -1.08% BenchmarkTemplate 360620840 357414680 -0.89% R=ken2 CC=golang-dev https://golang.org/cl/6245069
2012-05-30 16:07:39 -06:00
bgen(&na, true, likely, to);
}
void
nodfconst(Node *n, Type *t, Mpflt* fval)
{
memset(n, 0, sizeof(*n));
n->op = OLITERAL;
n->addable = 1;
ullmancalc(n);
n->val.u.fval = fval;
n->val.ctype = CTFLT;
n->type = t;
if(!isfloat[t->etype])
fatal("nodfconst: bad type %T", t);
}
// break addable nc-complex into nr-real and ni-imaginary
static void
subnode(Node *nr, Node *ni, Node *nc)
{
int tc;
Type *t;
if(!nc->addable)
fatal("subnode not addable");
tc = simsimtype(nc->type);
tc = cplxsubtype(tc);
t = types[tc];
if(nc->op == OLITERAL) {
nodfconst(nr, t, &nc->val.u.cval->real);
nodfconst(ni, t, &nc->val.u.cval->imag);
return;
}
*nr = *nc;
nr->type = t;
*ni = *nc;
ni->type = t;
ni->xoffset += t->width;
}
// generate code res = -nl
static void
minus(Node *nl, Node *res)
{
Node ra;
memset(&ra, 0, sizeof(ra));
ra.op = OMINUS;
ra.left = nl;
ra.type = nl->type;
cgen(&ra, res);
}
// build and execute tree
// real(res) = -real(nl)
// imag(res) = -imag(nl)
void
complexminus(Node *nl, Node *res)
{
Node n1, n2, n5, n6;
subnode(&n1, &n2, nl);
subnode(&n5, &n6, res);
minus(&n1, &n5);
minus(&n2, &n6);
}
// build and execute tree
// real(res) = real(nl) op real(nr)
// imag(res) = imag(nl) op imag(nr)
void
complexadd(int op, Node *nl, Node *nr, Node *res)
{
Node n1, n2, n3, n4, n5, n6;
Node ra;
subnode(&n1, &n2, nl);
subnode(&n3, &n4, nr);
subnode(&n5, &n6, res);
memset(&ra, 0, sizeof(ra));
ra.op = op;
ra.left = &n1;
ra.right = &n3;
ra.type = n1.type;
cgen(&ra, &n5);
memset(&ra, 0, sizeof(ra));
ra.op = op;
ra.left = &n2;
ra.right = &n4;
ra.type = n2.type;
cgen(&ra, &n6);
}
// build and execute tree
// tmp = real(nl)*real(nr) - imag(nl)*imag(nr)
// imag(res) = real(nl)*imag(nr) + imag(nl)*real(nr)
// real(res) = tmp
void
complexmul(Node *nl, Node *nr, Node *res)
{
Node n1, n2, n3, n4, n5, n6;
Node rm1, rm2, ra, tmp;
subnode(&n1, &n2, nl);
subnode(&n3, &n4, nr);
subnode(&n5, &n6, res);
tempname(&tmp, n5.type);
// real part -> tmp
memset(&rm1, 0, sizeof(ra));
rm1.op = OMUL;
rm1.left = &n1;
rm1.right = &n3;
rm1.type = n1.type;
memset(&rm2, 0, sizeof(ra));
rm2.op = OMUL;
rm2.left = &n2;
rm2.right = &n4;
rm2.type = n2.type;
memset(&ra, 0, sizeof(ra));
ra.op = OSUB;
ra.left = &rm1;
ra.right = &rm2;
ra.type = rm1.type;
cgen(&ra, &tmp);
// imag part
memset(&rm1, 0, sizeof(ra));
rm1.op = OMUL;
rm1.left = &n1;
rm1.right = &n4;
rm1.type = n1.type;
memset(&rm2, 0, sizeof(ra));
rm2.op = OMUL;
rm2.left = &n2;
rm2.right = &n3;
rm2.type = n2.type;
memset(&ra, 0, sizeof(ra));
ra.op = OADD;
ra.left = &rm1;
ra.right = &rm2;
ra.type = rm1.type;
cgen(&ra, &n6);
// tmp ->real part
cgen(&tmp, &n5);
}