mirror of
https://github.com/golang/go
synced 2024-10-04 14:41:25 -06:00
6c7cbf086c
REP MOVSQ and REP STOSQ have a really high startup overhead. Use a Duff's device to do the repetition instead. benchmark old ns/op new ns/op delta BenchmarkClearFat32 7.20 1.60 -77.78% BenchmarkCopyFat32 6.88 2.38 -65.41% BenchmarkClearFat64 7.15 3.20 -55.24% BenchmarkCopyFat64 6.88 3.44 -50.00% BenchmarkClearFat128 9.53 5.34 -43.97% BenchmarkCopyFat128 9.27 5.56 -40.02% BenchmarkClearFat256 13.8 9.53 -30.94% BenchmarkCopyFat256 13.5 10.3 -23.70% BenchmarkClearFat512 22.3 18.0 -19.28% BenchmarkCopyFat512 22.0 19.7 -10.45% BenchmarkCopyFat1024 36.5 38.4 +5.21% BenchmarkClearFat1024 35.1 35.0 -0.28% TODO: use for stack frame zeroing TODO: REP prefixes are still used for "reverse" copying when src/dst regions overlap. Might be worth fixing. LGTM=rsc R=golang-codereviews, rsc CC=golang-codereviews, r https://golang.org/cl/81370046
1306 lines
26 KiB
C
1306 lines
26 KiB
C
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#undef EXTERN
|
|
#define EXTERN
|
|
#include <u.h>
|
|
#include <libc.h>
|
|
#include "gg.h"
|
|
#include "opt.h"
|
|
|
|
static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
|
|
|
|
void
|
|
defframe(Prog *ptxt)
|
|
{
|
|
uint32 frame;
|
|
Prog *p;
|
|
vlong i;
|
|
NodeList *l;
|
|
Node *n;
|
|
|
|
// fill in argument size
|
|
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
|
|
|
|
// fill in final stack size
|
|
frame = rnd(stksize+maxarg, widthptr);
|
|
ptxt->to.offset = frame;
|
|
|
|
// insert code to contain ambiguously live variables
|
|
// so that garbage collector only sees initialized values
|
|
// when it looks for pointers.
|
|
//
|
|
// TODO: determine best way to zero the given values.
|
|
// among other problems, AX is initialized to 0 multiple times,
|
|
// but that's really the tip of the iceberg.
|
|
p = ptxt;
|
|
for(l=curfn->dcl; l != nil; l = l->next) {
|
|
n = l->n;
|
|
if(!n->needzero)
|
|
continue;
|
|
if(n->class != PAUTO)
|
|
fatal("needzero class %d", n->class);
|
|
if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
|
|
fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
|
|
if(n->type->width <= 2*widthptr) {
|
|
for(i = 0; i < n->type->width; i += widthptr)
|
|
p = appendpp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame+n->xoffset+i);
|
|
} else if(n->type->width <= 16*widthptr) {
|
|
p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
|
|
for(i = 0; i < n->type->width; i += widthptr)
|
|
p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+n->xoffset+i);
|
|
} else {
|
|
p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
|
|
p = appendpp(p, AMOVL, D_CONST, n->type->width/widthptr, D_CX, 0);
|
|
p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+n->xoffset, D_DI, 0);
|
|
p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
|
|
p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
static Prog*
|
|
appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
|
|
{
|
|
Prog *q;
|
|
q = mal(sizeof(*q));
|
|
clearp(q);
|
|
q->as = as;
|
|
q->lineno = p->lineno;
|
|
q->from.type = ftype;
|
|
q->from.offset = foffset;
|
|
q->to.type = ttype;
|
|
q->to.offset = toffset;
|
|
q->link = p->link;
|
|
p->link = q;
|
|
return q;
|
|
}
|
|
|
|
// Sweep the prog list to mark any used nodes.
|
|
void
|
|
markautoused(Prog* p)
|
|
{
|
|
for (; p; p = p->link) {
|
|
if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
|
|
continue;
|
|
|
|
if (p->from.node)
|
|
p->from.node->used = 1;
|
|
|
|
if (p->to.node)
|
|
p->to.node->used = 1;
|
|
}
|
|
}
|
|
|
|
// Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
|
|
void
|
|
fixautoused(Prog* p)
|
|
{
|
|
Prog **lp;
|
|
|
|
for (lp=&p; (p=*lp) != P; ) {
|
|
if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
|
|
*lp = p->link;
|
|
continue;
|
|
}
|
|
if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
|
|
// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
|
|
// VARDEFs are interspersed with other code, and a jump might be using the
|
|
// VARDEF as a target. Replace with a no-op instead. A later pass will remove
|
|
// the no-ops.
|
|
p->to.type = D_NONE;
|
|
p->to.node = N;
|
|
p->as = ANOP;
|
|
continue;
|
|
}
|
|
|
|
if (p->from.type == D_AUTO && p->from.node)
|
|
p->from.offset += p->from.node->stkdelta;
|
|
|
|
if (p->to.type == D_AUTO && p->to.node)
|
|
p->to.offset += p->to.node->stkdelta;
|
|
|
|
lp = &p->link;
|
|
}
|
|
}
|
|
|
|
void
|
|
clearfat(Node *nl)
|
|
{
|
|
uint32 w, c, q;
|
|
Node n1;
|
|
Prog *p;
|
|
|
|
/* clear a fat object */
|
|
if(debug['g'])
|
|
dump("\nclearfat", nl);
|
|
|
|
w = nl->type->width;
|
|
// Avoid taking the address for simple enough types.
|
|
if(componentgen(N, nl))
|
|
return;
|
|
|
|
c = w % 4; // bytes
|
|
q = w / 4; // quads
|
|
|
|
nodreg(&n1, types[tptr], D_DI);
|
|
agen(nl, &n1);
|
|
gconreg(AMOVL, 0, D_AX);
|
|
|
|
if(q > 128) {
|
|
gconreg(AMOVL, q, D_CX);
|
|
gins(AREP, N, N); // repeat
|
|
gins(ASTOSL, N, N); // STOL AL,*(DI)+
|
|
} else if(q >= 4) {
|
|
p = gins(ADUFFZERO, N, N);
|
|
p->to.type = D_ADDR;
|
|
p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
|
|
// 1 and 128 = magic constants: see ../../pkg/runtime/asm_386.s
|
|
p->to.offset = 1*(128-q);
|
|
} else
|
|
while(q > 0) {
|
|
gins(ASTOSL, N, N); // STOL AL,*(DI)+
|
|
q--;
|
|
}
|
|
|
|
while(c > 0) {
|
|
gins(ASTOSB, N, N); // STOB AL,*(DI)+
|
|
c--;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* generate:
|
|
* call f
|
|
* proc=-1 normal call but no return
|
|
* proc=0 normal call
|
|
* proc=1 goroutine run in new proc
|
|
* proc=2 defer call save away stack
|
|
* proc=3 normal call to C pointer (not Go func value)
|
|
*/
|
|
void
|
|
ginscall(Node *f, int proc)
|
|
{
|
|
int32 arg;
|
|
Prog *p;
|
|
Node reg, r1, con;
|
|
|
|
if(f->type != T)
|
|
setmaxarg(f->type);
|
|
|
|
arg = -1;
|
|
// Most functions have a fixed-size argument block, so traceback uses that during unwind.
|
|
// Not all, though: there are some variadic functions in package runtime,
|
|
// and for those we emit call-specific metadata recorded by caller.
|
|
// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
|
|
// so we do this for all indirect calls as well.
|
|
if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
|
|
arg = f->type->argwid;
|
|
if(proc == 1 || proc == 2)
|
|
arg += 2*widthptr;
|
|
}
|
|
|
|
if(arg != -1)
|
|
gargsize(arg);
|
|
|
|
switch(proc) {
|
|
default:
|
|
fatal("ginscall: bad proc %d", proc);
|
|
break;
|
|
|
|
case 0: // normal call
|
|
case -1: // normal call but no return
|
|
if(f->op == ONAME && f->class == PFUNC) {
|
|
if(f == deferreturn) {
|
|
// Deferred calls will appear to be returning to
|
|
// the CALL deferreturn(SB) that we are about to emit.
|
|
// However, the stack trace code will show the line
|
|
// of the instruction byte before the return PC.
|
|
// To avoid that being an unrelated instruction,
|
|
// insert an x86 NOP that we will have the right line number.
|
|
// x86 NOP 0x90 is really XCHG AX, AX; use that description
|
|
// because the NOP pseudo-instruction will be removed by
|
|
// the linker.
|
|
nodreg(®, types[TINT], D_AX);
|
|
gins(AXCHGL, ®, ®);
|
|
}
|
|
p = gins(ACALL, N, f);
|
|
afunclit(&p->to, f);
|
|
if(proc == -1 || noreturn(p))
|
|
gins(AUNDEF, N, N);
|
|
break;
|
|
}
|
|
nodreg(®, types[tptr], D_DX);
|
|
nodreg(&r1, types[tptr], D_BX);
|
|
gmove(f, ®);
|
|
reg.op = OINDREG;
|
|
gmove(®, &r1);
|
|
reg.op = OREGISTER;
|
|
gins(ACALL, ®, &r1);
|
|
break;
|
|
|
|
case 3: // normal call of c function pointer
|
|
gins(ACALL, N, f);
|
|
break;
|
|
|
|
case 1: // call in new proc (go)
|
|
case 2: // deferred call (defer)
|
|
nodreg(®, types[TINT32], D_CX);
|
|
gins(APUSHL, f, N);
|
|
nodconst(&con, types[TINT32], argsize(f->type));
|
|
gins(APUSHL, &con, N);
|
|
if(proc == 1)
|
|
ginscall(newproc, 0);
|
|
else
|
|
ginscall(deferproc, 0);
|
|
gins(APOPL, N, ®);
|
|
gins(APOPL, N, ®);
|
|
if(proc == 2) {
|
|
nodreg(®, types[TINT64], D_AX);
|
|
gins(ATESTL, ®, ®);
|
|
patch(gbranch(AJNE, T, -1), retpc);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if(arg != -1)
|
|
gargsize(-1);
|
|
}
|
|
|
|
/*
|
|
* n is call to interface method.
|
|
* generate res = n.
|
|
*/
|
|
void
|
|
cgen_callinter(Node *n, Node *res, int proc)
|
|
{
|
|
Node *i, *f;
|
|
Node tmpi, nodi, nodo, nodr, nodsp;
|
|
|
|
i = n->left;
|
|
if(i->op != ODOTINTER)
|
|
fatal("cgen_callinter: not ODOTINTER %O", i->op);
|
|
|
|
f = i->right; // field
|
|
if(f->op != ONAME)
|
|
fatal("cgen_callinter: not ONAME %O", f->op);
|
|
|
|
i = i->left; // interface
|
|
|
|
if(!i->addable) {
|
|
tempname(&tmpi, i->type);
|
|
cgen(i, &tmpi);
|
|
i = &tmpi;
|
|
}
|
|
|
|
genlist(n->list); // assign the args
|
|
|
|
// i is now addable, prepare an indirected
|
|
// register to hold its address.
|
|
igen(i, &nodi, res); // REG = &inter
|
|
|
|
nodindreg(&nodsp, types[tptr], D_SP);
|
|
nodi.type = types[tptr];
|
|
nodi.xoffset += widthptr;
|
|
cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data
|
|
|
|
regalloc(&nodo, types[tptr], res);
|
|
nodi.type = types[tptr];
|
|
nodi.xoffset -= widthptr;
|
|
cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab
|
|
regfree(&nodi);
|
|
|
|
regalloc(&nodr, types[tptr], &nodo);
|
|
if(n->left->xoffset == BADWIDTH)
|
|
fatal("cgen_callinter: badwidth");
|
|
cgen_checknil(&nodo);
|
|
nodo.op = OINDREG;
|
|
nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
|
|
|
|
if(proc == 0) {
|
|
// plain call: use direct c function pointer - more efficient
|
|
cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f]
|
|
proc = 3;
|
|
} else {
|
|
// go/defer. generate go func value.
|
|
gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f]
|
|
}
|
|
|
|
nodr.type = n->left->type;
|
|
ginscall(&nodr, proc);
|
|
|
|
regfree(&nodr);
|
|
regfree(&nodo);
|
|
}
|
|
|
|
/*
|
|
* generate function call;
|
|
* proc=0 normal call
|
|
* proc=1 goroutine run in new proc
|
|
* proc=2 defer call save away stack
|
|
*/
|
|
void
|
|
cgen_call(Node *n, int proc)
|
|
{
|
|
Type *t;
|
|
Node nod, afun;
|
|
|
|
if(n == N)
|
|
return;
|
|
|
|
if(n->left->ullman >= UINF) {
|
|
// if name involves a fn call
|
|
// precompute the address of the fn
|
|
tempname(&afun, types[tptr]);
|
|
cgen(n->left, &afun);
|
|
}
|
|
|
|
genlist(n->list); // assign the args
|
|
t = n->left->type;
|
|
|
|
// call tempname pointer
|
|
if(n->left->ullman >= UINF) {
|
|
regalloc(&nod, types[tptr], N);
|
|
cgen_as(&nod, &afun);
|
|
nod.type = t;
|
|
ginscall(&nod, proc);
|
|
regfree(&nod);
|
|
return;
|
|
}
|
|
|
|
// call pointer
|
|
if(n->left->op != ONAME || n->left->class != PFUNC) {
|
|
regalloc(&nod, types[tptr], N);
|
|
cgen_as(&nod, n->left);
|
|
nod.type = t;
|
|
ginscall(&nod, proc);
|
|
regfree(&nod);
|
|
return;
|
|
}
|
|
|
|
// call direct
|
|
n->left->method = 1;
|
|
ginscall(n->left, proc);
|
|
}
|
|
|
|
/*
|
|
* call to n has already been generated.
|
|
* generate:
|
|
* res = return value from call.
|
|
*/
|
|
void
|
|
cgen_callret(Node *n, Node *res)
|
|
{
|
|
Node nod;
|
|
Type *fp, *t;
|
|
Iter flist;
|
|
|
|
t = n->left->type;
|
|
if(t->etype == TPTR32 || t->etype == TPTR64)
|
|
t = t->type;
|
|
|
|
fp = structfirst(&flist, getoutarg(t));
|
|
if(fp == T)
|
|
fatal("cgen_callret: nil");
|
|
|
|
memset(&nod, 0, sizeof(nod));
|
|
nod.op = OINDREG;
|
|
nod.val.u.reg = D_SP;
|
|
nod.addable = 1;
|
|
|
|
nod.xoffset = fp->width;
|
|
nod.type = fp->type;
|
|
cgen_as(res, &nod);
|
|
}
|
|
|
|
/*
|
|
* call to n has already been generated.
|
|
* generate:
|
|
* res = &return value from call.
|
|
*/
|
|
void
|
|
cgen_aret(Node *n, Node *res)
|
|
{
|
|
Node nod1, nod2;
|
|
Type *fp, *t;
|
|
Iter flist;
|
|
|
|
t = n->left->type;
|
|
if(isptr[t->etype])
|
|
t = t->type;
|
|
|
|
fp = structfirst(&flist, getoutarg(t));
|
|
if(fp == T)
|
|
fatal("cgen_aret: nil");
|
|
|
|
memset(&nod1, 0, sizeof(nod1));
|
|
nod1.op = OINDREG;
|
|
nod1.val.u.reg = D_SP;
|
|
nod1.addable = 1;
|
|
|
|
nod1.xoffset = fp->width;
|
|
nod1.type = fp->type;
|
|
|
|
if(res->op != OREGISTER) {
|
|
regalloc(&nod2, types[tptr], res);
|
|
gins(ALEAL, &nod1, &nod2);
|
|
gins(AMOVL, &nod2, res);
|
|
regfree(&nod2);
|
|
} else
|
|
gins(ALEAL, &nod1, res);
|
|
}
|
|
|
|
/*
|
|
* generate return.
|
|
* n->left is assignments to return values.
|
|
*/
|
|
void
|
|
cgen_ret(Node *n)
|
|
{
|
|
Prog *p;
|
|
|
|
genlist(n->list); // copy out args
|
|
if(retpc) {
|
|
gjmp(retpc);
|
|
return;
|
|
}
|
|
p = gins(ARET, N, N);
|
|
if(n->op == ORETJMP) {
|
|
p->to.type = D_EXTERN;
|
|
p->to.sym = linksym(n->left->sym);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* generate += *= etc.
|
|
*/
|
|
void
|
|
cgen_asop(Node *n)
|
|
{
|
|
Node n1, n2, n3, n4;
|
|
Node *nl, *nr;
|
|
Prog *p1;
|
|
Addr addr;
|
|
int a;
|
|
|
|
nl = n->left;
|
|
nr = n->right;
|
|
|
|
if(nr->ullman >= UINF && nl->ullman >= UINF) {
|
|
tempname(&n1, nr->type);
|
|
cgen(nr, &n1);
|
|
n2 = *n;
|
|
n2.right = &n1;
|
|
cgen_asop(&n2);
|
|
goto ret;
|
|
}
|
|
|
|
if(!isint[nl->type->etype])
|
|
goto hard;
|
|
if(!isint[nr->type->etype])
|
|
goto hard;
|
|
if(is64(nl->type) || is64(nr->type))
|
|
goto hard;
|
|
|
|
switch(n->etype) {
|
|
case OADD:
|
|
if(smallintconst(nr))
|
|
if(mpgetfix(nr->val.u.xval) == 1) {
|
|
a = optoas(OINC, nl->type);
|
|
if(nl->addable) {
|
|
gins(a, N, nl);
|
|
goto ret;
|
|
}
|
|
if(sudoaddable(a, nl, &addr)) {
|
|
p1 = gins(a, N, N);
|
|
p1->to = addr;
|
|
sudoclean();
|
|
goto ret;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case OSUB:
|
|
if(smallintconst(nr))
|
|
if(mpgetfix(nr->val.u.xval) == 1) {
|
|
a = optoas(ODEC, nl->type);
|
|
if(nl->addable) {
|
|
gins(a, N, nl);
|
|
goto ret;
|
|
}
|
|
if(sudoaddable(a, nl, &addr)) {
|
|
p1 = gins(a, N, N);
|
|
p1->to = addr;
|
|
sudoclean();
|
|
goto ret;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
switch(n->etype) {
|
|
case OADD:
|
|
case OSUB:
|
|
case OXOR:
|
|
case OAND:
|
|
case OOR:
|
|
a = optoas(n->etype, nl->type);
|
|
if(nl->addable) {
|
|
if(smallintconst(nr)) {
|
|
gins(a, nr, nl);
|
|
goto ret;
|
|
}
|
|
regalloc(&n2, nr->type, N);
|
|
cgen(nr, &n2);
|
|
gins(a, &n2, nl);
|
|
regfree(&n2);
|
|
goto ret;
|
|
}
|
|
if(nr->ullman < UINF)
|
|
if(sudoaddable(a, nl, &addr)) {
|
|
if(smallintconst(nr)) {
|
|
p1 = gins(a, nr, N);
|
|
p1->to = addr;
|
|
sudoclean();
|
|
goto ret;
|
|
}
|
|
regalloc(&n2, nr->type, N);
|
|
cgen(nr, &n2);
|
|
p1 = gins(a, &n2, N);
|
|
p1->to = addr;
|
|
regfree(&n2);
|
|
sudoclean();
|
|
goto ret;
|
|
}
|
|
}
|
|
|
|
hard:
|
|
n2.op = 0;
|
|
n1.op = 0;
|
|
if(nr->ullman >= nl->ullman || nl->addable) {
|
|
mgen(nr, &n2, N);
|
|
nr = &n2;
|
|
} else {
|
|
tempname(&n2, nr->type);
|
|
cgen(nr, &n2);
|
|
nr = &n2;
|
|
}
|
|
if(!nl->addable) {
|
|
igen(nl, &n1, N);
|
|
nl = &n1;
|
|
}
|
|
|
|
n3 = *n;
|
|
n3.left = nl;
|
|
n3.right = nr;
|
|
n3.op = n->etype;
|
|
|
|
mgen(&n3, &n4, N);
|
|
gmove(&n4, nl);
|
|
|
|
if(n1.op)
|
|
regfree(&n1);
|
|
mfree(&n2);
|
|
mfree(&n4);
|
|
|
|
ret:
|
|
;
|
|
}
|
|
|
|
int
|
|
samereg(Node *a, Node *b)
|
|
{
|
|
if(a->op != OREGISTER)
|
|
return 0;
|
|
if(b->op != OREGISTER)
|
|
return 0;
|
|
if(a->val.u.reg != b->val.u.reg)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* generate division.
|
|
* caller must set:
|
|
* ax = allocated AX register
|
|
* dx = allocated DX register
|
|
* generates one of:
|
|
* res = nl / nr
|
|
* res = nl % nr
|
|
* according to op.
|
|
*/
|
|
void
|
|
dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
|
|
{
|
|
int check;
|
|
Node n1, t1, t2, t3, t4, n4, nz;
|
|
Type *t, *t0;
|
|
Prog *p1, *p2;
|
|
|
|
// Have to be careful about handling
|
|
// most negative int divided by -1 correctly.
|
|
// The hardware will trap.
|
|
// Also the byte divide instruction needs AH,
|
|
// which we otherwise don't have to deal with.
|
|
// Easiest way to avoid for int8, int16: use int32.
|
|
// For int32 and int64, use explicit test.
|
|
// Could use int64 hw for int32.
|
|
t = nl->type;
|
|
t0 = t;
|
|
check = 0;
|
|
if(issigned[t->etype]) {
|
|
check = 1;
|
|
if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
|
|
check = 0;
|
|
else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
|
|
check = 0;
|
|
}
|
|
if(t->width < 4) {
|
|
if(issigned[t->etype])
|
|
t = types[TINT32];
|
|
else
|
|
t = types[TUINT32];
|
|
check = 0;
|
|
}
|
|
|
|
tempname(&t1, t);
|
|
tempname(&t2, t);
|
|
if(t0 != t) {
|
|
tempname(&t3, t0);
|
|
tempname(&t4, t0);
|
|
cgen(nl, &t3);
|
|
cgen(nr, &t4);
|
|
// Convert.
|
|
gmove(&t3, &t1);
|
|
gmove(&t4, &t2);
|
|
} else {
|
|
cgen(nl, &t1);
|
|
cgen(nr, &t2);
|
|
}
|
|
|
|
if(!samereg(ax, res) && !samereg(dx, res))
|
|
regalloc(&n1, t, res);
|
|
else
|
|
regalloc(&n1, t, N);
|
|
gmove(&t2, &n1);
|
|
gmove(&t1, ax);
|
|
p2 = P;
|
|
if(nacl) {
|
|
// Native Client does not relay the divide-by-zero trap
|
|
// to the executing program, so we must insert a check
|
|
// for ourselves.
|
|
nodconst(&n4, t, 0);
|
|
gins(optoas(OCMP, t), &n1, &n4);
|
|
p1 = gbranch(optoas(ONE, t), T, +1);
|
|
if(panicdiv == N)
|
|
panicdiv = sysfunc("panicdivide");
|
|
ginscall(panicdiv, -1);
|
|
patch(p1, pc);
|
|
}
|
|
if(check) {
|
|
nodconst(&n4, t, -1);
|
|
gins(optoas(OCMP, t), &n1, &n4);
|
|
p1 = gbranch(optoas(ONE, t), T, +1);
|
|
if(op == ODIV) {
|
|
// a / (-1) is -a.
|
|
gins(optoas(OMINUS, t), N, ax);
|
|
gmove(ax, res);
|
|
} else {
|
|
// a % (-1) is 0.
|
|
nodconst(&n4, t, 0);
|
|
gmove(&n4, res);
|
|
}
|
|
p2 = gbranch(AJMP, T, 0);
|
|
patch(p1, pc);
|
|
}
|
|
if(!issigned[t->etype]) {
|
|
nodconst(&nz, t, 0);
|
|
gmove(&nz, dx);
|
|
} else
|
|
gins(optoas(OEXTEND, t), N, N);
|
|
gins(optoas(op, t), &n1, N);
|
|
regfree(&n1);
|
|
|
|
if(op == ODIV)
|
|
gmove(ax, res);
|
|
else
|
|
gmove(dx, res);
|
|
if(check)
|
|
patch(p2, pc);
|
|
}
|
|
|
|
static void
|
|
savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
|
|
{
|
|
int r;
|
|
|
|
r = reg[dr];
|
|
nodreg(x, types[TINT32], dr);
|
|
|
|
// save current ax and dx if they are live
|
|
// and not the destination
|
|
memset(oldx, 0, sizeof *oldx);
|
|
if(r > 0 && !samereg(x, res)) {
|
|
tempname(oldx, types[TINT32]);
|
|
gmove(x, oldx);
|
|
}
|
|
|
|
regalloc(x, t, x);
|
|
}
|
|
|
|
static void
|
|
restx(Node *x, Node *oldx)
|
|
{
|
|
regfree(x);
|
|
|
|
if(oldx->op != 0) {
|
|
x->type = types[TINT32];
|
|
gmove(oldx, x);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* generate division according to op, one of:
|
|
* res = nl / nr
|
|
* res = nl % nr
|
|
*/
|
|
void
|
|
cgen_div(int op, Node *nl, Node *nr, Node *res)
|
|
{
|
|
Node ax, dx, oldax, olddx;
|
|
Type *t;
|
|
|
|
if(is64(nl->type))
|
|
fatal("cgen_div %T", nl->type);
|
|
|
|
if(issigned[nl->type->etype])
|
|
t = types[TINT32];
|
|
else
|
|
t = types[TUINT32];
|
|
savex(D_AX, &ax, &oldax, res, t);
|
|
savex(D_DX, &dx, &olddx, res, t);
|
|
dodiv(op, nl, nr, res, &ax, &dx);
|
|
restx(&dx, &olddx);
|
|
restx(&ax, &oldax);
|
|
}
|
|
|
|
/*
|
|
* generate shift according to op, one of:
|
|
* res = nl << nr
|
|
* res = nl >> nr
|
|
*/
|
|
void
|
|
cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
|
|
{
|
|
Node n1, n2, nt, cx, oldcx, hi, lo;
|
|
int a, w;
|
|
Prog *p1, *p2;
|
|
uvlong sc;
|
|
|
|
if(nl->type->width > 4)
|
|
fatal("cgen_shift %T", nl->type);
|
|
|
|
w = nl->type->width * 8;
|
|
|
|
a = optoas(op, nl->type);
|
|
|
|
if(nr->op == OLITERAL) {
|
|
tempname(&n2, nl->type);
|
|
cgen(nl, &n2);
|
|
regalloc(&n1, nl->type, res);
|
|
gmove(&n2, &n1);
|
|
sc = mpgetfix(nr->val.u.xval);
|
|
if(sc >= nl->type->width*8) {
|
|
// large shift gets 2 shifts by width-1
|
|
gins(a, ncon(w-1), &n1);
|
|
gins(a, ncon(w-1), &n1);
|
|
} else
|
|
gins(a, nr, &n1);
|
|
gmove(&n1, res);
|
|
regfree(&n1);
|
|
return;
|
|
}
|
|
|
|
memset(&oldcx, 0, sizeof oldcx);
|
|
nodreg(&cx, types[TUINT32], D_CX);
|
|
if(reg[D_CX] > 1 && !samereg(&cx, res)) {
|
|
tempname(&oldcx, types[TUINT32]);
|
|
gmove(&cx, &oldcx);
|
|
}
|
|
|
|
if(nr->type->width > 4) {
|
|
tempname(&nt, nr->type);
|
|
n1 = nt;
|
|
} else {
|
|
nodreg(&n1, types[TUINT32], D_CX);
|
|
regalloc(&n1, nr->type, &n1); // to hold the shift type in CX
|
|
}
|
|
|
|
if(samereg(&cx, res))
|
|
regalloc(&n2, nl->type, N);
|
|
else
|
|
regalloc(&n2, nl->type, res);
|
|
if(nl->ullman >= nr->ullman) {
|
|
cgen(nl, &n2);
|
|
cgen(nr, &n1);
|
|
} else {
|
|
cgen(nr, &n1);
|
|
cgen(nl, &n2);
|
|
}
|
|
|
|
// test and fix up large shifts
|
|
if(bounded) {
|
|
if(nr->type->width > 4) {
|
|
// delayed reg alloc
|
|
nodreg(&n1, types[TUINT32], D_CX);
|
|
regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
|
|
split64(&nt, &lo, &hi);
|
|
gmove(&lo, &n1);
|
|
splitclean();
|
|
}
|
|
} else {
|
|
if(nr->type->width > 4) {
|
|
// delayed reg alloc
|
|
nodreg(&n1, types[TUINT32], D_CX);
|
|
regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
|
|
split64(&nt, &lo, &hi);
|
|
gmove(&lo, &n1);
|
|
gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
|
|
p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
|
|
gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
|
|
p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
|
|
splitclean();
|
|
patch(p2, pc);
|
|
} else {
|
|
gins(optoas(OCMP, nr->type), &n1, ncon(w));
|
|
p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
|
|
}
|
|
if(op == ORSH && issigned[nl->type->etype]) {
|
|
gins(a, ncon(w-1), &n2);
|
|
} else {
|
|
gmove(ncon(0), &n2);
|
|
}
|
|
patch(p1, pc);
|
|
}
|
|
gins(a, &n1, &n2);
|
|
|
|
if(oldcx.op != 0)
|
|
gmove(&oldcx, &cx);
|
|
|
|
gmove(&n2, res);
|
|
|
|
regfree(&n1);
|
|
regfree(&n2);
|
|
}
|
|
|
|
/*
|
|
* generate byte multiply:
|
|
* res = nl * nr
|
|
* there is no 2-operand byte multiply instruction so
|
|
* we do a full-width multiplication and truncate afterwards.
|
|
*/
|
|
void
|
|
cgen_bmul(int op, Node *nl, Node *nr, Node *res)
|
|
{
|
|
Node n1, n2, nt, *tmp;
|
|
Type *t;
|
|
int a;
|
|
|
|
// copy from byte to full registers
|
|
t = types[TUINT32];
|
|
if(issigned[nl->type->etype])
|
|
t = types[TINT32];
|
|
|
|
// largest ullman on left.
|
|
if(nl->ullman < nr->ullman) {
|
|
tmp = nl;
|
|
nl = nr;
|
|
nr = tmp;
|
|
}
|
|
|
|
tempname(&nt, nl->type);
|
|
cgen(nl, &nt);
|
|
regalloc(&n1, t, res);
|
|
cgen(nr, &n1);
|
|
regalloc(&n2, t, N);
|
|
gmove(&nt, &n2);
|
|
a = optoas(op, t);
|
|
gins(a, &n2, &n1);
|
|
regfree(&n2);
|
|
gmove(&n1, res);
|
|
regfree(&n1);
|
|
}
|
|
|
|
/*
|
|
* generate high multiply:
|
|
* res = (nl*nr) >> width
|
|
*/
|
|
void
|
|
cgen_hmul(Node *nl, Node *nr, Node *res)
|
|
{
|
|
Type *t;
|
|
int a;
|
|
Node n1, n2, ax, dx;
|
|
|
|
t = nl->type;
|
|
a = optoas(OHMUL, t);
|
|
// gen nl in n1.
|
|
tempname(&n1, t);
|
|
cgen(nl, &n1);
|
|
// gen nr in n2.
|
|
regalloc(&n2, t, res);
|
|
cgen(nr, &n2);
|
|
|
|
// multiply.
|
|
nodreg(&ax, t, D_AX);
|
|
gmove(&n2, &ax);
|
|
gins(a, &n1, N);
|
|
regfree(&n2);
|
|
|
|
if(t->width == 1) {
|
|
// byte multiply behaves differently.
|
|
nodreg(&ax, t, D_AH);
|
|
nodreg(&dx, t, D_DL);
|
|
gmove(&ax, &dx);
|
|
}
|
|
nodreg(&dx, t, D_DX);
|
|
gmove(&dx, res);
|
|
}
|
|
|
|
static void cgen_float387(Node *n, Node *res);
|
|
static void cgen_floatsse(Node *n, Node *res);
|
|
|
|
/*
|
|
* generate floating-point operation.
|
|
*/
|
|
void
|
|
cgen_float(Node *n, Node *res)
|
|
{
|
|
Node *nl;
|
|
Node n1, n2;
|
|
Prog *p1, *p2, *p3;
|
|
|
|
nl = n->left;
|
|
switch(n->op) {
|
|
case OEQ:
|
|
case ONE:
|
|
case OLT:
|
|
case OLE:
|
|
case OGE:
|
|
p1 = gbranch(AJMP, T, 0);
|
|
p2 = pc;
|
|
gmove(nodbool(1), res);
|
|
p3 = gbranch(AJMP, T, 0);
|
|
patch(p1, pc);
|
|
bgen(n, 1, 0, p2);
|
|
gmove(nodbool(0), res);
|
|
patch(p3, pc);
|
|
return;
|
|
|
|
case OPLUS:
|
|
cgen(nl, res);
|
|
return;
|
|
|
|
case OCONV:
|
|
if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
|
|
cgen(nl, res);
|
|
return;
|
|
}
|
|
|
|
tempname(&n2, n->type);
|
|
mgen(nl, &n1, res);
|
|
gmove(&n1, &n2);
|
|
gmove(&n2, res);
|
|
mfree(&n1);
|
|
return;
|
|
}
|
|
|
|
if(use_sse)
|
|
cgen_floatsse(n, res);
|
|
else
|
|
cgen_float387(n, res);
|
|
}
|
|
|
|
// floating-point. 387 (not SSE2)
|
|
static void
|
|
cgen_float387(Node *n, Node *res)
|
|
{
|
|
Node f0, f1;
|
|
Node *nl, *nr;
|
|
|
|
nl = n->left;
|
|
nr = n->right;
|
|
nodreg(&f0, nl->type, D_F0);
|
|
nodreg(&f1, n->type, D_F0+1);
|
|
if(nr != N)
|
|
goto flt2;
|
|
|
|
// unary
|
|
cgen(nl, &f0);
|
|
if(n->op != OCONV && n->op != OPLUS)
|
|
gins(foptoas(n->op, n->type, 0), N, N);
|
|
gmove(&f0, res);
|
|
return;
|
|
|
|
flt2: // binary
|
|
if(nl->ullman >= nr->ullman) {
|
|
cgen(nl, &f0);
|
|
if(nr->addable)
|
|
gins(foptoas(n->op, n->type, 0), nr, &f0);
|
|
else {
|
|
cgen(nr, &f0);
|
|
gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
|
|
}
|
|
} else {
|
|
cgen(nr, &f0);
|
|
if(nl->addable)
|
|
gins(foptoas(n->op, n->type, Frev), nl, &f0);
|
|
else {
|
|
cgen(nl, &f0);
|
|
gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
|
|
}
|
|
}
|
|
gmove(&f0, res);
|
|
return;
|
|
|
|
}
|
|
|
|
static void
|
|
cgen_floatsse(Node *n, Node *res)
|
|
{
|
|
Node *nl, *nr, *r;
|
|
Node n1, n2, nt;
|
|
int a;
|
|
|
|
nl = n->left;
|
|
nr = n->right;
|
|
switch(n->op) {
|
|
default:
|
|
dump("cgen_floatsse", n);
|
|
fatal("cgen_floatsse %O", n->op);
|
|
return;
|
|
|
|
case OMINUS:
|
|
case OCOM:
|
|
nr = nodintconst(-1);
|
|
convlit(&nr, n->type);
|
|
a = foptoas(OMUL, nl->type, 0);
|
|
goto sbop;
|
|
|
|
// symmetric binary
|
|
case OADD:
|
|
case OMUL:
|
|
a = foptoas(n->op, nl->type, 0);
|
|
goto sbop;
|
|
|
|
// asymmetric binary
|
|
case OSUB:
|
|
case OMOD:
|
|
case ODIV:
|
|
a = foptoas(n->op, nl->type, 0);
|
|
goto abop;
|
|
}
|
|
|
|
sbop: // symmetric binary
|
|
if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
|
|
r = nl;
|
|
nl = nr;
|
|
nr = r;
|
|
}
|
|
|
|
abop: // asymmetric binary
|
|
if(nl->ullman >= nr->ullman) {
|
|
tempname(&nt, nl->type);
|
|
cgen(nl, &nt);
|
|
mgen(nr, &n2, N);
|
|
regalloc(&n1, nl->type, res);
|
|
gmove(&nt, &n1);
|
|
gins(a, &n2, &n1);
|
|
gmove(&n1, res);
|
|
regfree(&n1);
|
|
mfree(&n2);
|
|
} else {
|
|
regalloc(&n2, nr->type, res);
|
|
cgen(nr, &n2);
|
|
regalloc(&n1, nl->type, N);
|
|
cgen(nl, &n1);
|
|
gins(a, &n2, &n1);
|
|
regfree(&n2);
|
|
gmove(&n1, res);
|
|
regfree(&n1);
|
|
}
|
|
return;
|
|
}
|
|
|
|
void
|
|
bgen_float(Node *n, int true, int likely, Prog *to)
|
|
{
|
|
int et, a;
|
|
Node *nl, *nr, *r;
|
|
Node n1, n2, n3, tmp, t1, t2, ax;
|
|
Prog *p1, *p2;
|
|
|
|
nl = n->left;
|
|
nr = n->right;
|
|
a = n->op;
|
|
if(!true) {
|
|
// brcom is not valid on floats when NaN is involved.
|
|
p1 = gbranch(AJMP, T, 0);
|
|
p2 = gbranch(AJMP, T, 0);
|
|
patch(p1, pc);
|
|
// No need to avoid re-genning ninit.
|
|
bgen_float(n, 1, -likely, p2);
|
|
patch(gbranch(AJMP, T, 0), to);
|
|
patch(p2, pc);
|
|
return;
|
|
}
|
|
|
|
if(use_sse)
|
|
goto sse;
|
|
else
|
|
goto x87;
|
|
|
|
x87:
|
|
a = brrev(a); // because the args are stacked
|
|
if(a == OGE || a == OGT) {
|
|
// only < and <= work right with NaN; reverse if needed
|
|
r = nr;
|
|
nr = nl;
|
|
nl = r;
|
|
a = brrev(a);
|
|
}
|
|
|
|
nodreg(&tmp, nr->type, D_F0);
|
|
nodreg(&n2, nr->type, D_F0 + 1);
|
|
nodreg(&ax, types[TUINT16], D_AX);
|
|
et = simsimtype(nr->type);
|
|
if(et == TFLOAT64) {
|
|
if(nl->ullman > nr->ullman) {
|
|
cgen(nl, &tmp);
|
|
cgen(nr, &tmp);
|
|
gins(AFXCHD, &tmp, &n2);
|
|
} else {
|
|
cgen(nr, &tmp);
|
|
cgen(nl, &tmp);
|
|
}
|
|
gins(AFUCOMIP, &tmp, &n2);
|
|
gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF
|
|
} else {
|
|
// TODO(rsc): The moves back and forth to memory
|
|
// here are for truncating the value to 32 bits.
|
|
// This handles 32-bit comparison but presumably
|
|
// all the other ops have the same problem.
|
|
// We need to figure out what the right general
|
|
// solution is, besides telling people to use float64.
|
|
tempname(&t1, types[TFLOAT32]);
|
|
tempname(&t2, types[TFLOAT32]);
|
|
cgen(nr, &t1);
|
|
cgen(nl, &t2);
|
|
gmove(&t2, &tmp);
|
|
gins(AFCOMFP, &t1, &tmp);
|
|
gins(AFSTSW, N, &ax);
|
|
gins(ASAHF, N, N);
|
|
}
|
|
|
|
goto ret;
|
|
|
|
sse:
|
|
if(!nl->addable) {
|
|
tempname(&n1, nl->type);
|
|
cgen(nl, &n1);
|
|
nl = &n1;
|
|
}
|
|
if(!nr->addable) {
|
|
tempname(&tmp, nr->type);
|
|
cgen(nr, &tmp);
|
|
nr = &tmp;
|
|
}
|
|
regalloc(&n2, nr->type, N);
|
|
gmove(nr, &n2);
|
|
nr = &n2;
|
|
|
|
if(nl->op != OREGISTER) {
|
|
regalloc(&n3, nl->type, N);
|
|
gmove(nl, &n3);
|
|
nl = &n3;
|
|
}
|
|
|
|
if(a == OGE || a == OGT) {
|
|
// only < and <= work right with NaN; reverse if needed
|
|
r = nr;
|
|
nr = nl;
|
|
nl = r;
|
|
a = brrev(a);
|
|
}
|
|
|
|
gins(foptoas(OCMP, nr->type, 0), nl, nr);
|
|
if(nl->op == OREGISTER)
|
|
regfree(nl);
|
|
regfree(nr);
|
|
|
|
ret:
|
|
if(a == OEQ) {
|
|
// neither NE nor P
|
|
p1 = gbranch(AJNE, T, -likely);
|
|
p2 = gbranch(AJPS, T, -likely);
|
|
patch(gbranch(AJMP, T, 0), to);
|
|
patch(p1, pc);
|
|
patch(p2, pc);
|
|
} else if(a == ONE) {
|
|
// either NE or P
|
|
patch(gbranch(AJNE, T, likely), to);
|
|
patch(gbranch(AJPS, T, likely), to);
|
|
} else
|
|
patch(gbranch(optoas(a, nr->type), T, likely), to);
|
|
|
|
}
|
|
|
|
// Called after regopt and peep have run.
|
|
// Expand CHECKNIL pseudo-op into actual nil pointer check.
|
|
void
|
|
expandchecks(Prog *firstp)
|
|
{
|
|
Prog *p, *p1, *p2;
|
|
|
|
for(p = firstp; p != P; p = p->link) {
|
|
if(p->as != ACHECKNIL)
|
|
continue;
|
|
if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
|
|
warnl(p->lineno, "generated nil check");
|
|
// check is
|
|
// CMP arg, $0
|
|
// JNE 2(PC) (likely)
|
|
// MOV AX, 0
|
|
p1 = mal(sizeof *p1);
|
|
p2 = mal(sizeof *p2);
|
|
clearp(p1);
|
|
clearp(p2);
|
|
p1->link = p2;
|
|
p2->link = p->link;
|
|
p->link = p1;
|
|
p1->lineno = p->lineno;
|
|
p2->lineno = p->lineno;
|
|
p1->pc = 9999;
|
|
p2->pc = 9999;
|
|
p->as = ACMPL;
|
|
p->to.type = D_CONST;
|
|
p->to.offset = 0;
|
|
p1->as = AJNE;
|
|
p1->from.type = D_CONST;
|
|
p1->from.offset = 1; // likely
|
|
p1->to.type = D_BRANCH;
|
|
p1->to.u.branch = p2->link;
|
|
// crash by write to memory address 0.
|
|
// if possible, since we know arg is 0, use 0(arg),
|
|
// which will be shorter to encode than plain 0.
|
|
p2->as = AMOVL;
|
|
p2->from.type = D_AX;
|
|
if(regtyp(&p->from))
|
|
p2->to.type = p->from.type + D_INDIR;
|
|
else
|
|
p2->to.type = D_INDIR+D_NONE;
|
|
p2->to.offset = 0;
|
|
}
|
|
}
|