1
0
mirror of https://github.com/golang/go synced 2024-11-18 12:44:49 -07:00

better 64-bit handling in 8g.

fewer moves, fewer stupid LEALs.
powser1 runs (with evaln commented out).
beginnings of floating point.

R=ken
OCL=29540
CL=29543
This commit is contained in:
Russ Cox 2009-05-28 15:48:47 -07:00
parent 63e1b714de
commit 3aa006b8cd
7 changed files with 705 additions and 315 deletions

View File

@ -505,7 +505,7 @@ gmove(Node *f, Node *t)
switch(CASE(ft, tt)) {
default:
fatal("gmove %T -> %T", f, t);
fatal("gmove %lT -> %lT", f->type, t->type);
/*
* integer copy and truncate

View File

@ -57,7 +57,7 @@ Bconv(Fmt *fp)
if(str[0])
strcat(str, " ");
if(var[i].sym == S) {
sprint(ss, "$%ld", var[i].offset);
sprint(ss, "$%d", var[i].offset);
s = ss;
} else
s = var[i].sym->name;
@ -108,7 +108,7 @@ Dconv(Fmt *fp)
i = a->type;
if(i >= D_INDIR) {
if(a->offset)
sprint(str, "%ld(%R)", a->offset, i-D_INDIR);
sprint(str, "%d(%R)", a->offset, i-D_INDIR);
else
sprint(str, "(%R)", i-D_INDIR);
goto brk;
@ -117,7 +117,7 @@ Dconv(Fmt *fp)
default:
if(a->offset)
sprint(str, "$%ld,%R", a->offset, i);
sprint(str, "$%d,%R", a->offset, i);
else
sprint(str, "%R", i);
break;
@ -127,35 +127,35 @@ Dconv(Fmt *fp)
break;
case D_BRANCH:
sprint(str, "%ld(PC)", a->offset-pc);
sprint(str, "%d(PC)", a->offset-pc);
break;
case D_EXTERN:
sprint(str, "%s+%ld(SB)", a->sym->name, a->offset);
sprint(str, "%s+%d(SB)", a->sym->name, a->offset);
break;
case D_STATIC:
sprint(str, "%s<>+%ld(SB)", a->sym->name,
sprint(str, "%s<>+%d(SB)", a->sym->name,
a->offset);
break;
case D_AUTO:
sprint(str, "%s+%ld(SP)", a->sym->name, a->offset);
sprint(str, "%s+%d(SP)", a->sym->name, a->offset);
break;
case D_PARAM:
if(a->sym)
sprint(str, "%s+%ld(FP)", a->sym->name, a->offset);
sprint(str, "%s+%d(FP)", a->sym->name, a->offset);
else
sprint(str, "%ld(FP)", a->offset);
sprint(str, "%d(FP)", a->offset);
break;
case D_CONST:
sprint(str, "$%ld", a->offset);
sprint(str, "$%d", a->offset);
break;
case D_CONST2:
sprint(str, "$%ld-%ld", a->offset, a->offset2);
sprint(str, "$%d-%d", a->offset, a->offset2);
break;
case D_FCONST:
@ -185,7 +185,7 @@ conv:
char* regstr[] =
{
"AL", /*[D_AL]*/
"AL", /*[D_AL]*/
"CL",
"DL",
"BL",

View File

@ -27,6 +27,42 @@ is64(Type *t)
return 0;
}
int
noconv(Type *t1, Type *t2)
{
int e1, e2;
e1 = simtype[t1->etype];
e2 = simtype[t2->etype];
switch(e1) {
case TINT8:
case TUINT8:
return e2 == TINT8 || e2 == TUINT8;
case TINT16:
case TUINT16:
return e2 == TINT16 || e2 == TUINT16;
case TINT32:
case TUINT32:
case TPTR32:
return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
case TINT64:
case TUINT64:
case TPTR64:
return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
case TFLOAT32:
return e2 == TFLOAT32;
case TFLOAT64:
return e2 == TFLOAT64;
}
return 0;
}
/*
* generate:
* res = n;
@ -38,7 +74,7 @@ is64(Type *t)
void
cgen(Node *n, Node *res)
{
Node *nl, *nr, *r, n1, n2, rr;
Node *nl, *nr, *r, n1, n2, rr, f0, f1;
Prog *p1, *p2, *p3;
int a;
@ -65,13 +101,13 @@ cgen(Node *n, Node *res)
sgen(n, res, n->type->width);
return;
}
// if both are addressable, move
if(n->addable && res->addable) {
gmove(n, res);
return;
}
// if both are not addressable, use a temporary.
if(!n->addable && !res->addable) {
tempalloc(&n1, n->type);
@ -96,7 +132,7 @@ cgen(Node *n, Node *res)
// 64-bit ops are hard on 32-bit machine.
if(is64(n->type) && cancgen64(n, res))
return;
// use ullman to pick operand to eval first.
nl = n->left;
nr = n->right;
@ -112,12 +148,15 @@ cgen(Node *n, Node *res)
return;
}
if(isfloat[n->type->etype] && isfloat[nl->type->etype])
goto flt;
switch(n->op) {
default:
dump("cgen", n);
fatal("cgen %O", n->op);
break;
// these call bgen to get a bool value
case OOROR:
case OANDAND:
@ -162,7 +201,7 @@ cgen(Node *n, Node *res)
goto abop;
case OCONV:
if(eqtype(n->type, nl->type)) {
if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
cgen(nl, res);
break;
}
@ -236,7 +275,7 @@ cgen(Node *n, Node *res)
case OADDR:
agen(nl, res);
break;
case OCALLMETH:
cgen_callmeth(n, 0);
cgen_callret(n, res);
@ -303,6 +342,29 @@ uop: // unary
gmove(&n1, res);
tempfree(&n1);
return;
flt: // floating-point. 387 (not SSE2) to interoperate with 6c
nodreg(&f0, n->type, D_F0);
nodreg(&f1, n->type, D_F0+1);
if(nl->ullman >= nr->ullman) {
cgen(nl, &f0);
if(nr->addable)
gins(foptoas(n->op, n->type, 0), nr, &f0);
else {
cgen(nr, &f0);
gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
}
} else {
cgen(nr, &f0);
if(nl->addable)
gins(foptoas(n->op, n->type, Frev), nl, &f0);
else {
cgen(nl, &f0);
gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
}
}
gmove(&f0, res);
return;
}
/*
@ -334,21 +396,21 @@ agen(Node *n, Node *res)
regfree(&n1);
return;
}
// let's compute
nl = n->left;
nr = n->right;
switch(n->op) {
default:
fatal("agen %O", n->op);
case OCONV:
if(!eqtype(n->type, nl->type))
fatal("agen: non-trivial OCONV");
agen(nl, res);
break;
case OCALLMETH:
cgen_callmeth(n, 0);
cgen_aret(n, res);
@ -506,11 +568,11 @@ agen(Node *n, Node *res)
gins(optoas(OADD, types[tptr]), &n1, res);
}
break;
case OIND:
cgen(nl, res);
break;
case ODOT:
t = nl->type;
agen(nl, res);
@ -719,7 +781,7 @@ bgen(Node *n, int true, Prog *to)
regfree(&n1);
break;
}
if(is64(nr->type)) {
if(!nl->addable) {
tempalloc(&n1, nl->type);
@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w)
static int
cancgen64(Node *n, Node *res)
{
Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r;
Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r;
Node lo1, lo2, hi1, hi2;
Prog *p1, *p2;
if(n->op == OCALL)
@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res)
return 1;
case OMINUS:
nodconst(&zero, types[TINT32], 0);
cgen(n->left, res);
gins(ANEGL, N, res);
res->xoffset += 4;
regalloc(&nod, types[TINT32], N);
gins(AXORL, &nod, &nod);
gins(ASBBL, res, &nod);
gins(AMOVL, &nod, res);
regfree(&nod);
split64(res, &lo1, &hi1);
gins(ANEGL, N, &lo1);
gins(AADCL, &zero, &hi1);
gins(ANEGL, N, &hi1);
splitclean();
return 1;
case OADD:
@ -951,7 +1013,7 @@ cancgen64(Node *n, Node *res)
case OMUL:
break;
}
l = n->left;
r = n->right;
if(!l->addable) {
@ -963,97 +1025,73 @@ cancgen64(Node *n, Node *res)
tempalloc(&t2, r->type);
cgen(r, &t2);
r = &t2;
}
}
// Setup for binary operation.
tempalloc(&adr1, types[TPTR32]);
agen(l, &adr1);
tempalloc(&adr2, types[TPTR32]);
agen(r, &adr2);
split64(l, &lo1, &hi1);
split64(r, &lo2, &hi2);
nodreg(&r1, types[TPTR32], D_AX);
nodreg(&r2, types[TPTR32], D_DX);
nodreg(&r3, types[TPTR32], D_CX);
nodreg(&ax, types[TPTR32], D_AX);
nodreg(&cx, types[TPTR32], D_CX);
nodreg(&dx, types[TPTR32], D_DX);
// Do op. Leave result in DX:AX.
switch(n->op) {
case OADD:
case OSUB:
gmove(&adr1, &r3);
r3.op = OINDREG;
r3.xoffset = 0;
gins(AMOVL, &r3, &r1);
r3.xoffset = 4;
gins(AMOVL, &r3, &r2);
r3.xoffset = 0;
r3.op = OREGISTER;
gmove(&adr2, &r3);
r3.op = OINDREG;
if(n->op == OADD)
gins(AADDL, &r3, &r1);
else
gins(ASUBL, &r3, &r1);
r3.xoffset = 4;
if(n->op == OADD)
gins(AADCL, &r3, &r2);
else
gins(ASBBL, &r3, &r2);
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(AADDL, &lo2, &ax);
gins(AADCL, &hi2, &dx);
break;
case OMUL:
regalloc(&r4, types[TPTR32], N);
regalloc(&r5, types[TPTR32], N);
// load args into r2:r1 and r4:r3.
// leave result in r2:r1 (DX:AX)
gmove(&adr1, &r5);
r5.op = OINDREG;
r5.xoffset = 0;
gmove(&r5, &r1);
r5.xoffset = 4;
gmove(&r5, &r2);
r5.xoffset = 0;
r5.op = OREGISTER;
gmove(&adr2, &r5);
r5.op = OINDREG;
gmove(&r5, &r3);
r5.xoffset = 4;
gmove(&r5, &r4);
r5.xoffset = 0;
r5.op = OREGISTER;
case OSUB:
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(ASUBL, &lo2, &ax);
gins(ASBBL, &hi2, &dx);
break;
// if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply
gmove(&r2, &r5);
gins(AORL, &r4, &r5);
case OMUL:
// let's call the next two EX and FX.
regalloc(&ex, types[TPTR32], N);
regalloc(&fx, types[TPTR32], N);
// load args into DX:AX and EX:CX.
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(AMOVL, &lo2, &cx);
gins(AMOVL, &hi2, &ex);
// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
gins(AMOVL, &dx, &fx);
gins(AORL, &ex, &fx);
p1 = gbranch(AJNE, T);
gins(AMULL, &r3, N); // AX (=r1) is implied
gins(AMULL, &cx, N); // implicit &ax
p2 = gbranch(AJMP, T);
patch(p1, pc);
// full 64x64 -> 64, from 32 x 32 -> 64.
gins(AIMULL, &r3, &r2);
gins(AMOVL, &r1, &r5);
gins(AIMULL, &r4, &r5);
gins(AADDL, &r2, &r5);
gins(AMOVL, &r3, &r2);
gins(AMULL, &r2, N); // AX (=r1) is implied
gins(AADDL, &r5, &r2);
patch(p2, pc);
regfree(&r4);
regfree(&r5);
break;
}
tempfree(&adr2);
tempfree(&adr1);
// Store result.
gins(AMOVL, &r1, res);
res->xoffset += 4;
gins(AMOVL, &r2, res);
res->xoffset -= 4;
// full 64x64 -> 64, from 32x32 -> 64.
gins(AIMULL, &cx, &dx);
gins(AMOVL, &ax, &fx);
gins(AIMULL, &ex, &fx);
gins(AADDL, &dx, &fx);
gins(AMOVL, &cx, &dx);
gins(AMULL, &dx, N); // implicit &ax
gins(AADDL, &fx, &dx);
patch(p2, pc);
regfree(&ex);
regfree(&fx);
break;
}
splitclean();
splitclean();
split64(res, &lo1, &hi1);
gins(AMOVL, &ax, &lo1);
gins(AMOVL, &dx, &hi1);
splitclean();
if(r == &t2)
tempfree(&t2);
if(l == &t1)
@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res)
void
cmp64(Node *nl, Node *nr, int op, Prog *to)
{
int64 x;
Node adr1, adr2, rr;
Prog *br, *p;
Node lo1, hi1, lo2, hi2, rr;
Prog *br;
Type *t;
t = nr->type;
memset(&adr1, 0, sizeof adr1);
memset(&adr2, 0, sizeof adr2);
regalloc(&adr1, types[TPTR32], N);
agen(nl, &adr1);
adr1.op = OINDREG;
nl = &adr1;
x = 0;
if(nr->op == OLITERAL) {
if(!isconst(nr, CTINT))
fatal("bad const in cmp64");
x = mpgetfix(nr->val.u.xval);
} else {
regalloc(&adr2, types[TPTR32], N);
agen(nr, &adr2);
adr2.op = OINDREG;
nr = &adr2;
}
split64(nl, &lo1, &hi1);
split64(nr, &lo2, &hi2);
// compare most significant word
nl->xoffset += 4;
if(nr->op == OLITERAL) {
p = gins(ACMPL, nl, nodintconst((uint32)(x>>32)));
} else {
regalloc(&rr, types[TUINT32], N);
nr->xoffset += 4;
gins(AMOVL, nr, &rr);
gins(ACMPL, nl, &rr);
nr->xoffset -= 4;
t = hi1.type;
if(nl->op == OLITERAL || nr->op == OLITERAL)
gins(ACMPL, &hi1, &hi2);
else {
regalloc(&rr, types[TINT32], N);
gins(AMOVL, &hi1, &rr);
gins(ACMPL, &rr, &hi2);
regfree(&rr);
}
nl->xoffset -= 4;
br = P;
switch(op) {
default:
@ -1149,39 +1163,28 @@ cmp64(Node *nl, Node *nr, int op, Prog *to)
// L:
patch(gbranch(optoas(OLT, t), T), to);
br = gbranch(optoas(OGT, t), T);
break;
break;
}
// compare least significant word
if(nr->op == OLITERAL) {
p = gins(ACMPL, nl, nodintconst((uint32)x));
} else {
regalloc(&rr, types[TUINT32], N);
gins(AMOVL, nr, &rr);
gins(ACMPL, nl, &rr);
t = lo1.type;
if(nl->op == OLITERAL || nr->op == OLITERAL)
gins(ACMPL, &lo1, &lo2);
else {
regalloc(&rr, types[TINT32], N);
gins(AMOVL, &lo1, &rr);
gins(ACMPL, &rr, &lo2);
regfree(&rr);
}
// jump again
switch(op) {
default:
fatal("cmp64 %O %T", op, nr->type);
case OEQ:
case ONE:
case OGE:
case OGT:
case OLE:
case OLT:
patch(gbranch(optoas(op, t), T), to);
break;
}
patch(gbranch(optoas(op, t), T), to);
// point first branch down here if appropriate
if(br != P)
patch(br, pc);
regfree(&adr1);
if(nr == &adr2)
regfree(&adr2);
splitclean();
splitclean();
}

View File

@ -43,6 +43,14 @@ struct Prog
void* reg; // pointer to containing Reg struct
};
// foptoas flags
enum
{
Frev = 1<<0,
Fpop = 1<<1,
Fpop2 = 1<<2,
};
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
@ -114,6 +122,7 @@ Prog* gop(int, Node*, Node*, Node*);
void setconst(Addr*, vlong);
void setaddr(Addr*, Node*);
int optoas(int, Type*);
int foptoas(int, Type*, int);
void ginit(void);
void gclean(void);
void regalloc(Node*, Type*, Node*);
@ -131,7 +140,10 @@ Plist* newplist(void);
int isfat(Type*);
void sudoclean(void);
int sudoaddable(int, Node*, Addr*);
int dotaddable(Node*, Node*);
void afunclit(Addr*);
void split64(Node*, Node*, Node*);
void splitclean(void);
/*
* list.c

View File

@ -468,7 +468,7 @@ cgen_asop(Node *n)
hard:
if(nr->ullman > nl->ullman) {
regalloc(&n2, nr->type, N);
tempalloc(&n2, nr->type);
cgen(nr, &n2);
igen(nl, &n1, N);
} else {

View File

@ -571,6 +571,81 @@ optoas(int op, Type *t)
return a;
}
#define FCASE(a, b, c) (((a)<<16)|((b)<<8)|(c))
int
foptoas(int op, Type *t, int flg)
{
int et;
et = t->etype;
// clear Frev if unneeded
switch(op) {
case OADD:
case OMUL:
flg &= ~Frev;
break;
}
switch(FCASE(op, et, flg)) {
case FCASE(OADD, TFLOAT32, 0):
return AFADDF;
case FCASE(OADD, TFLOAT64, 0):
return AFADDD;
case FCASE(OADD, TFLOAT64, Fpop):
return AFADDDP;
case FCASE(OSUB, TFLOAT32, 0):
return AFSUBF;
case FCASE(OSUB, TFLOAT32, Frev):
return AFSUBRF;
case FCASE(OSUB, TFLOAT64, 0):
return AFSUBD;
case FCASE(OSUB, TFLOAT64, Frev):
return AFSUBRD;
case FCASE(OSUB, TFLOAT64, Fpop):
return AFSUBDP;
case FCASE(OSUB, TFLOAT64, Fpop|Frev):
return AFSUBRDP;
case FCASE(OMUL, TFLOAT32, 0):
return AFMULF;
case FCASE(OMUL, TFLOAT64, 0):
return AFMULD;
case FCASE(OMUL, TFLOAT64, Fpop):
return AFMULDP;
case FCASE(ODIV, TFLOAT32, 0):
return AFDIVF;
case FCASE(ODIV, TFLOAT32, Frev):
return AFDIVRF;
case FCASE(ODIV, TFLOAT64, 0):
return AFDIVD;
case FCASE(ODIV, TFLOAT64, Frev):
return AFDIVRD;
case FCASE(ODIV, TFLOAT64, Fpop):
return AFDIVDP;
case FCASE(ODIV, TFLOAT64, Fpop|Frev):
return AFDIVRDP;
case FCASE(OCMP, TFLOAT32, 0):
return AFCOMF;
case FCASE(OCMP, TFLOAT32, Fpop):
return AFCOMFP;
case FCASE(OCMP, TFLOAT64, 0):
return AFCOMD;
case FCASE(OCMP, TFLOAT64, Fpop):
return AFCOMDP;
case FCASE(OCMP, TFLOAT64, Fpop2):
return AFCOMDPP;
}
fatal("foptoas %O %T %#x", op, t, flg);
return 0;
}
static int resvd[] =
{
// D_DI, // for movstring
@ -600,6 +675,8 @@ ginit(void)
reg[resvd[i]]++;
}
ulong regpc[D_NONE];
void
gclean(void)
{
@ -610,14 +687,12 @@ gclean(void)
for(i=D_AX; i<=D_DI; i++)
if(reg[i])
yyerror("reg %R left allocated\n", i);
yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
for(i=D_F0; i<=D_F7; i++)
if(reg[i])
yyerror("reg %R left allocated\n", i);
}
ulong regpc[D_NONE];
/*
* allocate register of type t, leave in n.
* if o != N, o is desired fixed register.
@ -681,7 +756,7 @@ err:
out:
if(reg[i] == 0) {
regpc[i] = getcallerpc(&n);
regpc[i] = (ulong)__builtin_return_address(0);
if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) {
dump("regalloc-o", o);
fatal("regalloc %R", i);
@ -837,168 +912,445 @@ gconreg(int as, vlong c, int reg)
gins(as, &n1, &n2);
}
/*
* generate move:
* t = f
* f may be in memory,
* t is known to be a 32-bit register.
* Is this node a memory operand?
*/
void
gload(Node *f, Node *t)
int
ismem(Node *n)
{
int a, ft;
ft = simtype[f->type->etype];
switch(ft) {
default:
fatal("gload %T", f->type);
case TINT8:
a = AMOVBLSX;
if(isconst(f, CTINT) || isconst(f, CTBOOL))
a = AMOVL;
break;
case TBOOL:
case TUINT8:
a = AMOVBLZX;
if(isconst(f, CTINT) || isconst(f, CTBOOL))
a = AMOVL;
break;
case TINT16:
a = AMOVWLSX;
if(isconst(f, CTINT) || isconst(f, CTBOOL))
a = AMOVL;
break;
case TUINT16:
a = AMOVWLZX;
if(isconst(f, CTINT))
a = AMOVL;
break;
case TINT32:
case TUINT32:
case TPTR32:
a = AMOVL;
break;
case TINT64:
case TUINT64:
a = AMOVL; // truncating
break;
switch(n->op) {
case OINDREG:
case ONAME:
case OPARAM:
return 1;
}
gins(a, f, t);
return 0;
}
Node sclean[10];
int nsclean;
/*
* generate move:
* t = f
* f is known to be a 32-bit register.
* t may be in memory.
* n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves.
*/
void
gstore(Node *f, Node *t)
split64(Node *n, Node *lo, Node *hi)
{
int a, ft, tt;
Node nod, adr;
Node n1;
int64 i;
ft = simtype[f->type->etype];
tt = simtype[t->type->etype];
if(!is64(n->type))
fatal("split64 %T", n->type);
switch(tt) {
sclean[nsclean].op = OEMPTY;
if(nsclean >= nelem(sclean))
fatal("split64 clean");
nsclean++;
switch(n->op) {
default:
fatal("gstore %T", t->type);
case TINT8:
case TBOOL:
case TUINT8:
a = AMOVB;
break;
case TINT16:
case TUINT16:
a = AMOVW;
break;
case TINT32:
case TUINT32:
case TPTR32:
a = AMOVL;
break;
case TINT64:
case TUINT64:
if(t->op == OREGISTER)
fatal("gstore %T %O", t->type, t->op);
memset(&adr, 0, sizeof adr);
igen(t, &adr, N);
t = &adr;
t->xoffset += 4;
switch(ft) {
default:
fatal("gstore %T -> %T", f, t);
break;
case TINT32:
nodreg(&nod, types[TINT32], D_AX);
gins(AMOVL, f, &nod);
gins(ACDQ, N, N);
nodreg(&nod, types[TINT32], D_DX);
gins(AMOVL, &nod, t);
break;
case TUINT32:
gins(AMOVL, nodintconst(0), t);
break;
if(!dotaddable(n, &n1)) {
igen(n, &n1, N);
sclean[nsclean-1] = n1;
}
t->xoffset -= 4;
a = AMOVL;
}
n = &n1;
// fall through
case ONAME:
case OINDREG:
*lo = *n;
*hi = *n;
lo->type = types[TUINT32];
if(n->type->etype == TINT64)
hi->type = types[TINT32];
else
hi->type = types[TUINT32];
hi->xoffset += 4;
break;
gins(a, f, t);
if(t == &adr)
regfree(&adr);
case OLITERAL:
convconst(&n1, n->type, &n->val);
i = mpgetfix(n1.val.u.xval);
nodconst(lo, types[TUINT32], (uint32)i);
i >>= 32;
if(n->type->etype == TINT64)
nodconst(hi, types[TINT32], (int32)i);
else
nodconst(hi, types[TUINT32], (uint32)i);
break;
}
}
void
splitclean(void)
{
if(nsclean <= 0)
fatal("splitclean");
nsclean--;
if(sclean[nsclean].op != OEMPTY)
regfree(&sclean[nsclean]);
}
void
gmove(Node *f, Node *t)
{
int ft, tt, t64, a;
Node nod;
ft = simtype[f->type->etype];
tt = simtype[t->type->etype];
a = AGOK;
t64 = 0;
if(tt == TINT64 || tt == TUINT64 || tt == TPTR64)
t64 = 1;
int a, ft, tt;
Type *cvt;
Node r1, r2, flo, fhi, tlo, thi, con;
if(debug['M'])
print("gop: %O %O[%E],%O[%E]\n", OAS,
f->op, ft, t->op, tt);
if(isfloat[ft] && f->op == OCONST) {
fatal("fp");
/* TO DO: pick up special constants, possibly preloaded */
//F
/*
if(mpgetflt(f->val.u.fval) == 0.0) {
regalloc(&nod, t->type, t);
gins(AXORPD, &nod, &nod);
gmove(&nod, t);
regfree(&nod);
return;
print("gmove %N -> %N\n", f, t);
ft = simsimtype(f->type);
tt = simsimtype(t->type);
cvt = t->type;
// cannot have two memory operands;
// except 64-bit, which always copies via registers anyway.
if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
goto hard;
// convert constant to desired type
if(f->op == OLITERAL) {
convconst(&con, t->type, &f->val);
f = &con;
ft = tt; // so big switch will choose a simple mov
// some constants can't move directly to memory.
if(ismem(t)) {
// float constants come from memory.
if(isfloat[tt])
goto hard;
}
}
// value -> value copy, only one memory operand.
// figure out the instruction to use.
// break out of switch for one-instruction gins.
// goto rdst for "destination must be register".
// goto hard for "convert to cvt type first".
// otherwise handle and return.
switch(CASE(ft, tt)) {
default:
fatal("gmove %N -> %N", f, t);
/*
* integer copy and truncate
*/
case CASE(TINT8, TINT8): // same size
case CASE(TINT8, TUINT8):
case CASE(TUINT8, TINT8):
case CASE(TUINT8, TUINT8):
case CASE(TINT16, TINT8): // truncate
case CASE(TUINT16, TINT8):
case CASE(TINT32, TINT8):
case CASE(TUINT32, TINT8):
// case CASE(TINT64, TINT8):
// case CASE(TUINT64, TINT8):
case CASE(TINT16, TUINT8):
case CASE(TUINT16, TUINT8):
case CASE(TINT32, TUINT8):
case CASE(TUINT32, TUINT8):
// case CASE(TINT64, TUINT8):
// case CASE(TUINT64, TUINT8):
a = AMOVB;
break;
case CASE(TINT16, TINT16): // same size
case CASE(TINT16, TUINT16):
case CASE(TUINT16, TINT16):
case CASE(TUINT16, TUINT16):
case CASE(TINT32, TINT16): // truncate
case CASE(TUINT32, TINT16):
// case CASE(TINT64, TINT16):
// case CASE(TUINT64, TINT16):
case CASE(TINT32, TUINT16):
case CASE(TUINT32, TUINT16):
// case CASE(TINT64, TUINT16):
// case CASE(TUINT64, TUINT16):
a = AMOVW;
break;
case CASE(TINT32, TINT32): // same size
case CASE(TINT32, TUINT32):
case CASE(TUINT32, TINT32):
case CASE(TUINT32, TUINT32):
// case CASE(TINT64, TINT32): // truncate
// case CASE(TUINT64, TINT32):
// case CASE(TINT64, TUINT32):
// case CASE(TUINT64, TUINT32):
a = AMOVL;
break;
case CASE(TINT64, TINT64): // same size
case CASE(TINT64, TUINT64):
case CASE(TUINT64, TINT64):
case CASE(TUINT64, TUINT64):
split64(f, &flo, &fhi);
split64(t, &tlo, &thi);
if(f->op == OLITERAL) {
gins(AMOVL, &flo, &tlo);
gins(AMOVL, &fhi, &thi);
} else {
regalloc(&r1, types[TUINT32], N);
regalloc(&r2, types[TUINT32], N);
gins(AMOVL, &flo, &r1);
gins(AMOVL, &fhi, &r2);
gins(AMOVL, &r1, &tlo);
gins(AMOVL, &r2, &thi);
regfree(&r2);
regfree(&r1);
}
splitclean();
splitclean();
return;
/*
* integer up-conversions
*/
case CASE(TINT8, TINT16): // sign extend int8
case CASE(TINT8, TUINT16):
a = AMOVBWSX;
goto rdst;
case CASE(TINT8, TINT32):
case CASE(TINT8, TUINT32):
a = AMOVBLSX;
goto rdst;
// case CASE(TINT8, TINT64):
// case CASE(TINT8, TUINT64):
// a = AMOVBQSX;
// goto rdst;
case CASE(TUINT8, TINT16): // zero extend uint8
case CASE(TUINT8, TUINT16):
a = AMOVBWZX;
goto rdst;
case CASE(TUINT8, TINT32):
case CASE(TUINT8, TUINT32):
a = AMOVBLZX;
goto rdst;
// case CASE(TUINT8, TINT64):
// case CASE(TUINT8, TUINT64):
// a = AMOVBQZX;
// goto rdst;
case CASE(TINT16, TINT32): // sign extend int16
case CASE(TINT16, TUINT32):
a = AMOVWLSX;
goto rdst;
// case CASE(TINT16, TINT64):
// case CASE(TINT16, TUINT64):
// a = AMOVWQSX;
// goto rdst;
case CASE(TUINT16, TINT32): // zero extend uint16
case CASE(TUINT16, TUINT32):
a = AMOVWLZX;
goto rdst;
// case CASE(TUINT16, TINT64):
// case CASE(TUINT16, TUINT64):
// a = AMOVWQZX;
// goto rdst;
case CASE(TINT32, TINT64): // sign extend int32
case CASE(TINT32, TUINT64):
split64(t, &tlo, &thi);
nodreg(&flo, tlo.type, D_AX);
nodreg(&fhi, thi.type, D_DX);
gmove(f, &flo);
gins(ACDQ, N, N);
gins(AMOVL, &flo, &tlo);
gins(AMOVL, &fhi, &thi);
return;
// case CASE(TUINT32, TINT64): // zero extend uint32
// case CASE(TUINT32, TUINT64):
// // AMOVL into a register zeros the top of the register,
// // so this is not always necessary, but if we rely on AMOVL
// // the optimizer is almost certain to screw with us.
// a = AMOVLQZX;
// goto rdst;
/*
* float to integer
*
case CASE(TFLOAT32, TINT16):
case CASE(TFLOAT32, TINT32):
case CASE(TFLOAT32, TINT64):
case CASE(TFLOAT64, TINT16):
case CASE(TFLOAT64, TINT32):
case CASE(TFLOAT64, TINT64):
if(ft == TFLOAT32)
gins(AFMOVF, f, &f0);
else
gins(AFMOVD, f, &f0);
if(tt == TINT16)
gins(AFMOVWP, &f0, t);
else if(tt == TINT32)
gins(AFMOVLP, &f0, t);
else
gins(AFMOVVP, &f0, t);
return;
case CASE(TFLOAT32, TINT8):
case CASE(TFLOAT32, TUINT16):
case CASE(TFLOAT32, TUINT8):
case CASE(TFLOAT64, TINT8):
case CASE(TFLOAT64, TUINT16):
case CASE(TFLOAT64, TUINT8):
// convert via int32.
cvt = types[TINT32];
goto hard;
case CASE(TFLOAT32, TUINT32):
case CASE(TFLOAT64, TUINT32):
// could potentially convert via int64.
cvt = types[TINT64];
goto hard;
case CASE(TFLOAT32, TUINT64):
case CASE(TFLOAT64, TUINT64):
if(ft == TFLOAT32)
gins(AFMOVF, f, &f0);
else
gins(AFMOVD, f, &f0);
// algorithm is:
// if small enough, use native float64 -> int64 conversion.
// otherwise, subtract 2^63, convert, and add it back.
bignodes();
regalloc(&r1, types[ft], N);
regalloc(&r2, types[ft], N);
gins(optoas(OCMP, f->type), &bigf, &r1);
p1 = gbranch(optoas(OLE, f->type), T);
gins(a, &r1, &r2);
p2 = gbranch(AJMP, T);
patch(p1, pc);
gins(optoas(OAS, f->type), &bigf, &r3);
gins(optoas(OSUB, f->type), &r3, &r1);
gins(a, &r1, &r2);
gins(AMOVQ, &bigi, &r4);
gins(AXORQ, &r4, &r2);
patch(p2, pc);
gmove(&r2, t);
regfree(&r4);
regfree(&r3);
regfree(&r2);
regfree(&r1);
fatal("lazy");
return;
*/
/*
* integer to float
*
case CASE(TINT32, TFLOAT32):
a = ACVTSL2SS;
goto rdst;
case CASE(TINT32, TFLOAT64):
a = ACVTSL2SD;
goto rdst;
case CASE(TINT64, TFLOAT32):
a = ACVTSQ2SS;
goto rdst;
case CASE(TINT64, TFLOAT64):
a = ACVTSQ2SD;
goto rdst;
case CASE(TINT16, TFLOAT32):
case CASE(TINT16, TFLOAT64):
case CASE(TINT8, TFLOAT32):
case CASE(TINT8, TFLOAT64):
case CASE(TUINT16, TFLOAT32):
case CASE(TUINT16, TFLOAT64):
case CASE(TUINT8, TFLOAT32):
case CASE(TUINT8, TFLOAT64):
// convert via int32
cvt = types[TINT32];
goto hard;
case CASE(TUINT32, TFLOAT32):
case CASE(TUINT32, TFLOAT64):
// convert via int64.
cvt = types[TINT64];
goto hard;
case CASE(TUINT64, TFLOAT32):
case CASE(TUINT64, TFLOAT64):
// algorithm is:
// if small enough, use native int64 -> uint64 conversion.
// otherwise, halve (rounding to odd?), convert, and double.
a = ACVTSQ2SS;
if(tt == TFLOAT64)
a = ACVTSQ2SD;
nodconst(&zero, types[TUINT64], 0);
nodconst(&one, types[TUINT64], 1);
regalloc(&r1, f->type, f);
regalloc(&r2, t->type, t);
regalloc(&r3, f->type, N);
regalloc(&r4, f->type, N);
gmove(f, &r1);
gins(ACMPQ, &r1, &zero);
p1 = gbranch(AJLT, T);
gins(a, &r1, &r2);
p2 = gbranch(AJMP, T);
patch(p1, pc);
gmove(&r1, &r3);
gins(ASHRQ, &one, &r3);
gmove(&r1, &r4);
gins(AANDL, &one, &r4);
gins(AORQ, &r4, &r3);
gins(a, &r3, &r2);
gins(optoas(OADD, t->type), &r2, &r2);
patch(p2, pc);
gmove(&r2, t);
regfree(&r4);
regfree(&r3);
regfree(&r2);
regfree(&r1);
return;
*/
/*
* float to float
*
case CASE(TFLOAT32, TFLOAT32):
a = AMOVSS;
break;
case CASE(TFLOAT64, TFLOAT64):
a = AMOVSD;
break;
case CASE(TFLOAT32, TFLOAT64):
a = ACVTSS2SD;
goto rdst;
case CASE(TFLOAT64, TFLOAT32):
a = ACVTSD2SS;
goto rdst;
*/
}
if(is64(types[ft]) && isconst(f, CTINT)) {
f->type = types[TINT32]; // XXX check constant value, choose correct type
ft = TINT32;
}
gins(a, f, t);
return;
if(is64(types[ft]) && is64(types[tt])) {
sgen(f, t, 8);
return;
}
rdst:
// requires register destination
regalloc(&r1, t->type, t);
gins(a, f, &r1);
gmove(&r1, t);
regfree(&r1);
return;
regalloc(&nod, types[TINT32], t);
gload(f, &nod);
gstore(&nod, t);
regfree(&nod);
hard:
// requires register intermediate
regalloc(&r1, cvt, t);
gmove(f, &r1);
gmove(&r1, t);
regfree(&r1);
return;
}
int
@ -1025,9 +1377,13 @@ gins(int as, Node *f, Node *t)
{
Prog *p;
// generating AMOVL BX, BX is just dumb.
if(f != N && t != N && samaddr(f, t) && as == AMOVL)
return nil;
switch(as) {
case AMOVB:
case AMOVW:
case AMOVL:
if(f != N && t != N && samaddr(f, t))
return nil;
}
p = prog(as);
if(f != N)
@ -1173,6 +1529,25 @@ naddr(Node *n, Addr *a)
}
}
int
dotaddable(Node *n, Node *n1)
{
int o, oary[10];
Node *nn;
if(n->op != ODOT)
return 0;
o = dotoffset(n, oary, &nn);
if(nn != N && nn->addable && o == 1 && oary[0] >= 0) {
*n1 = *nn;
n1->type = n->type;
n1->xoffset += oary[0];
return 1;
}
return 0;
}
void
sudoclean(void)
{

View File

@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val)
con->val.u.xval = mal(sizeof *con->val.u.xval);
switch(val->ctype) {
default:
fatal("convconst ctype=%d %lT", val->ctype, t->type);
fatal("convconst ctype=%d %lT", val->ctype, t);
case CTINT:
i = mpgetfix(val->u.xval);
break;