1
0
mirror of https://github.com/golang/go synced 2024-11-25 22:57:58 -07:00
* floating point -> integer conversions.
    x86 defines that overflow/underflow
    results in 1<<15, 1<<31, 1<<63 for
    int16, int32, int64.  when building the
    unsigned conversions out of the native signed
    ones, 8g turns overflow/underflow into zero.
    the spec does not say what should happen.

  * many tiny bug fixes.  can run a large number
    of files from go/test now, and can fmt.Printf.

  * struggling with byte register allocation
    and float32 computation.

R=ken
OCL=29642
CL=29811
This commit is contained in:
Russ Cox 2009-06-02 23:25:17 -07:00
parent 7f9d2c8264
commit a00bfb5b49
3 changed files with 483 additions and 248 deletions

View File

@ -7,58 +7,6 @@
#include "gg.h"
static int cancgen64(Node *n, Node *res);
int
is64(Type *t)
{
if(t == T)
return 0;
switch(simtype[t->etype]) {
case TINT64:
case TUINT64:
case TPTR64:
return 1;
}
return 0;
}
int
noconv(Type *t1, Type *t2)
{
int e1, e2;
e1 = simtype[t1->etype];
e2 = simtype[t2->etype];
switch(e1) {
case TINT8:
case TUINT8:
return e2 == TINT8 || e2 == TUINT8;
case TINT16:
case TUINT16:
return e2 == TINT16 || e2 == TUINT16;
case TINT32:
case TUINT32:
case TPTR32:
return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
case TINT64:
case TUINT64:
case TPTR64:
return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
case TFLOAT32:
return e2 == TFLOAT32;
case TFLOAT64:
return e2 == TFLOAT64;
}
return 0;
}
/*
* generate:
* res = n;
@ -84,11 +32,16 @@ cgen(Node *n, Node *res)
if(res == N || res->type == T)
fatal("cgen: res nil");
// static initializations
if(initflag && gen_as_init(n, res))
return;
// function calls on both sides? introduce temporary
if(n->ullman >= UINF && res->ullman >= UINF) {
tempname(&n1, n->type);
tempalloc(&n1, n->type);
cgen(n, &n1);
cgen(&n1, res);
tempfree(&n1);
return;
}
@ -125,10 +78,6 @@ cgen(Node *n, Node *res)
// otherwise, the result is addressable but n is not.
// let's do some computation.
// 64-bit ops are hard on 32-bit machine.
if(is64(n->type) && cancgen64(n, res))
return;
// use ullman to pick operand to eval first.
nl = n->left;
nr = n->right;
@ -144,6 +93,25 @@ cgen(Node *n, Node *res)
return;
}
// 64-bit ops are hard on 32-bit machine.
if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) {
switch(n->op) {
// math goes to cgen64.
case OMINUS:
case OCOM:
case OADD:
case OSUB:
case OMUL:
case OLSH:
case ORSH:
case OAND:
case OOR:
case OXOR:
cgen64(n, res);
return;
}
}
if(isfloat[n->type->etype] && isfloat[nl->type->etype])
goto flt;
@ -178,6 +146,7 @@ cgen(Node *n, Node *res)
return;
case OMINUS:
case OCOM:
a = optoas(n->op, nl->type);
goto uop;
@ -218,8 +187,8 @@ cgen(Node *n, Node *res)
break;
case OLEN:
if(istype(nl->type, TSTRING) || istype(nl->type, TMAP)) {
// both string and map have len in the first 32-bit word.
if(istype(nl->type, TMAP)) {
// map has len in the first 32-bit word.
// a zero pointer means zero length
tempalloc(&n1, types[tptr]);
cgen(nl, &n1);
@ -243,7 +212,9 @@ cgen(Node *n, Node *res)
regfree(&n1);
break;
}
if(isslice(nl->type)) {
if(istype(nl->type, TSTRING) || isslice(nl->type)) {
// both slice and string have len one pointer into the struct.
// a zero pointer means zero length
igen(nl, &n1, res);
n1.op = OINDREG;
n1.type = types[TUINT32];
@ -289,10 +260,6 @@ cgen(Node *n, Node *res)
case OMOD:
case ODIV:
if(isfloat[n->type->etype]) {
a = optoas(n->op, nl->type);
goto abop;
}
cgen_div(n->op, nl, nr, res);
break;
@ -340,8 +307,19 @@ uop: // unary
return;
flt: // floating-point. 387 (not SSE2) to interoperate with 6c
nodreg(&f0, n->type, D_F0);
nodreg(&f0, nl->type, D_F0);
nodreg(&f1, n->type, D_F0+1);
if(nr != N)
goto flt2;
// unary
cgen(nl, &f0);
if(n->op != OCONV)
gins(foptoas(n->op, n->type, 0), &f0, &f0);
gmove(&f0, res);
return;
flt2: // binary
if(nl->ullman >= nr->ullman) {
cgen(nl, &f0);
if(nr->addable)
@ -402,7 +380,7 @@ agen(Node *n, Node *res)
fatal("agen %O", n->op);
case OCONV:
if(!eqtype(n->type, nl->type))
if(!cvttype(n->type, nl->type))
fatal("agen: non-trivial OCONV");
agen(nl, res);
break;
@ -427,8 +405,11 @@ agen(Node *n, Node *res)
if(nr->addable) {
agenr(nl, &n3, res);
if(!isconst(nr, CTINT)) {
tempalloc(&tmp, nr->type);
cgen(nr, &tmp);
regalloc(&n1, nr->type, N);
cgen(nr, &n1);
gmove(&tmp, &n1);
tempfree(&tmp);
}
} else if(nl->addable) {
if(!isconst(nr, CTINT)) {
@ -640,7 +621,7 @@ bgen(Node *n, int true, Prog *to)
{
int et, a;
Node *nl, *nr, *r;
Node n1, n2, tmp;
Node n1, n2, tmp, t1, t2, ax;
Prog *p1, *p2;
if(debug['g']) {
@ -778,6 +759,37 @@ bgen(Node *n, int true, Prog *to)
break;
}
if(isfloat[nr->type->etype]) {
nodreg(&tmp, nr->type, D_F0);
nodreg(&n2, nr->type, D_F0 + 1);
nodreg(&ax, types[TUINT16], D_AX);
et = simsimtype(nr->type);
if(et == TFLOAT64) {
// easy - do in FPU
cgen(nr, &tmp);
cgen(nl, &tmp);
gins(AFUCOMPP, &tmp, &n2);
} else {
// NOTE(rsc): This is wrong.
// It's right for comparison but presumably all the
// other ops have the same problem. We need to
// figure out what the right solution is, besides
// tell people to use float64.
tempalloc(&t1, types[TFLOAT32]);
tempalloc(&t2, types[TFLOAT32]);
cgen(nr, &t1);
cgen(nl, &t2);
gmove(&t1, &tmp);
gins(AFCOMFP, &t1, &tmp);
tempfree(&t2);
tempfree(&t1);
}
gins(AFSTSW, N, &ax);
gins(ASAHF, N, N);
patch(gbranch(optoas(brrev(a), nr->type), T), to);
break;
}
if(is64(nr->type)) {
if(!nl->addable) {
tempalloc(&n1, nl->type);
@ -800,45 +812,43 @@ bgen(Node *n, int true, Prog *to)
a = optoas(a, nr->type);
if(nr->ullman >= UINF) {
regalloc(&n1, nr->type, N);
cgen(nr, &n1);
tempalloc(&tmp, nr->type);
cgen(nr, &tmp);
tempname(&tmp, nr->type);
gmove(&n1, &tmp);
regfree(&n1);
regalloc(&n1, nl->type, N);
tempalloc(&n1, nl->type);
cgen(nl, &n1);
regalloc(&n2, nr->type, &n2);
regalloc(&n2, nr->type, N);
cgen(&tmp, &n2);
gins(optoas(OCMP, nr->type), &n1, &n2);
patch(gbranch(a, nr->type), to);
regfree(&n1);
tempfree(&n1);
tempfree(&tmp);
regfree(&n2);
break;
}
regalloc(&n1, nl->type, N);
tempalloc(&n1, nl->type);
cgen(nl, &n1);
if(smallintconst(nr)) {
gins(optoas(OCMP, nr->type), &n1, nr);
patch(gbranch(a, nr->type), to);
regfree(&n1);
tempfree(&n1);
break;
}
tempalloc(&tmp, nr->type);
cgen(nr, &tmp);
regalloc(&n2, nr->type, N);
cgen(nr, &n2);
gmove(&tmp, &n2);
tempfree(&tmp);
gins(optoas(OCMP, nr->type), &n1, &n2);
patch(gbranch(a, nr->type), to);
regfree(&n1);
regfree(&n2);
tempfree(&n1);
break;
}
}
@ -883,7 +893,7 @@ stkof(Node *n)
void
sgen(Node *n, Node *res, int w)
{
Node nodl, nodr;
Node dst, src, tdst, tsrc;
int32 c, q, odst, osrc;
if(debug['g']) {
@ -904,22 +914,29 @@ sgen(Node *n, Node *res, int w)
osrc = stkof(n);
odst = stkof(res);
// TODO(rsc): Should these be tempalloc instead?
nodreg(&nodl, types[tptr], D_DI);
nodreg(&nodr, types[tptr], D_SI);
nodreg(&dst, types[tptr], D_DI);
nodreg(&src, types[tptr], D_SI);
if(n->ullman >= res->ullman) {
agen(n, &nodr);
agen(res, &nodl);
} else {
agen(res, &nodl);
agen(n, &nodr);
}
tempalloc(&tsrc, types[tptr]);
tempalloc(&tdst, types[tptr]);
if(!n->addable)
agen(n, &tsrc);
if(!res->addable)
agen(res, &tdst);
if(n->addable)
agen(n, &src);
else
gmove(&tsrc, &src);
if(res->addable)
agen(res, &dst);
else
gmove(&tdst, &dst);
tempfree(&tdst);
tempfree(&tsrc);
c = w % 4; // bytes
q = w / 4; // doublewords
gins(ACLD, N, N);
// if we are copying forward on the stack and
// the src and dst overlap, then reverse direction
if(osrc < odst && odst < osrc+w) {
@ -949,6 +966,7 @@ sgen(Node *n, Node *res, int w)
// we leave with the flag clear
gins(ACLD, N, N);
} else {
gins(ACLD, N, N); // paranoia. TODO(rsc): remove?
// normal direction
if(q >= 4) {
gconreg(AMOVL, q, D_CX);
@ -966,34 +984,13 @@ sgen(Node *n, Node *res, int w)
}
}
void
nswap(Node *a, Node *b)
{
Node t;
t = *a;
*a = *b;
*b = t;
}
Node*
ncon(uint32 i)
{
static Node n;
if(n.type == T)
nodconst(&n, types[TUINT32], 0);
mpmovecfix(n.val.u.xval, i);
return &n;
}
/*
* attempt to generate 64-bit
* res = n
* return 1 on success, 0 if op not handled.
*/
static int
cancgen64(Node *n, Node *res)
void
cgen64(Node *n, Node *res)
{
Node t1, t2, ax, dx, cx, ex, fx, *l, *r;
Node lo1, lo2, lo3, hi1, hi2, hi3;
@ -1001,8 +998,6 @@ cancgen64(Node *n, Node *res)
uint64 v;
uint32 lv, hv;
if(n->op == OCALL)
return 0;
if(res->op != OINDREG && res->op != ONAME) {
dump("n", n);
dump("res", res);
@ -1010,12 +1005,7 @@ cancgen64(Node *n, Node *res)
}
switch(n->op) {
default:
return 0;
case ONAME:
case ODOT:
gmove(n, res);
return 1;
fatal("cgen64 %O", n->op);
case OMINUS:
cgen(n->left, res);
@ -1024,7 +1014,7 @@ cancgen64(Node *n, Node *res)
gins(AADCL, ncon(0), &hi1);
gins(ANEGL, N, &hi1);
splitclean();
return 1;
return;
case OCOM:
cgen(n->left, res);
@ -1032,7 +1022,7 @@ cancgen64(Node *n, Node *res)
gins(ANOTL, N, &lo1);
gins(ANOTL, N, &hi1);
splitclean();
return 1;
return;
case OADD:
case OSUB:
@ -1408,7 +1398,6 @@ out:
tempfree(&t2);
if(l == &t1)
tempfree(&t1);
return 1;
}
/*

View File

@ -2,7 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <libc.h>
@ -68,7 +67,7 @@ EXTERN Node* throwreturn;
EXTERN int maxstksize;
/*
* gen.c
* ggen.c
*/
void compile(Node*);
void proglist(void);
@ -90,7 +89,7 @@ void checklabels();
void ginscall(Node*, int);
/*
* cgen
* cgen.c
*/
void agen(Node*, Node*);
void agenr(Node *n, Node *a, Node *res);
@ -103,10 +102,14 @@ Prog* gins(int, Node*, Node*);
int samaddr(Node*, Node*);
void naddr(Node*, Addr*);
void cgen_aret(Node*, Node*);
int is64(Type*);
void cmp64(Node*, Node*, int, Prog*);
Node* ncon(uint32);
/*
* cgen64.c
*/
void cmp64(Node*, Node*, int, Prog*);
void cgen64(Node*, Node*);
/*
* gsubr.c
*/
@ -133,9 +136,10 @@ void tempfree(Node*);
Node* nodarg(Type*, int);
void nodreg(Node*, Type*, int);
void nodindreg(Node*, Type*, int);
void nodconst(Node*, Type*, vlong);
void nodconst(Node*, Type*, int64);
void gconreg(int, vlong, int);
void datagostring(Strlit*, Addr*);
void datastring(char*, int, Addr*);
void buildtxt(void);
Plist* newplist(void);
int isfat(Type*);
@ -145,6 +149,7 @@ int dotaddable(Node*, Node*);
void afunclit(Addr*);
void split64(Node*, Node*, Node*);
void splitclean(void);
void nswap(Node*, Node*);
/*
* list.c

View File

@ -407,6 +407,22 @@ optoas(int op, Type *t)
a = ADECL;
break;
case CASE(OCOM, TINT8):
case CASE(OCOM, TUINT8):
a = ANOTB;
break;
case CASE(OCOM, TINT16):
case CASE(OCOM, TUINT16):
a = ANOTW;
break;
case CASE(OCOM, TINT32):
case CASE(OCOM, TUINT32):
case CASE(OCOM, TPTR32):
a = ANOTL;
break;
case CASE(OMINUS, TINT8):
case CASE(OMINUS, TUINT8):
a = ANEGB;
@ -560,6 +576,10 @@ optoas(int op, Type *t)
a = ADIVL;
break;
case CASE(OEXTEND, TINT8):
a = ACBW;
break;
case CASE(OEXTEND, TINT16):
a = ACWD;
break;
@ -577,7 +597,13 @@ foptoas(int op, Type *t, int flg)
{
int et;
et = t->etype;
et = simtype[t->etype];
// If we need Fpop, it means we're working on
// two different floating-point registers, not memory.
// There the instruction only has a float64 form.
if(flg & Fpop)
et = TFLOAT64;
// clear Frev if unneeded
switch(op) {
@ -655,6 +681,9 @@ static int resvd[] =
D_CX, // for shift
D_DX, // for divide
D_SP, // for stack
D_BL, // because D_BX can be allocated
D_BH,
};
void
@ -664,7 +693,7 @@ ginit(void)
for(i=0; i<nelem(reg); i++)
reg[i] = 1;
for(i=D_AX; i<=D_DI; i++)
for(i=D_AL; i<=D_DI; i++)
reg[i] = 0;
// TODO: Use MMX ?
@ -685,7 +714,7 @@ gclean(void)
for(i=0; i<nelem(resvd); i++)
reg[resvd[i]]--;
for(i=D_AX; i<=D_DI; i++)
for(i=D_AL; i<=D_DI; i++)
if(reg[i])
yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
for(i=D_F0; i<=D_F7; i++)
@ -701,7 +730,7 @@ gclean(void)
void
regalloc(Node *n, Type *t, Node *o)
{
int i, et;
int i, et, min, max;
if(t == T)
fatal("regalloc: t nil");
@ -710,6 +739,13 @@ regalloc(Node *n, Type *t, Node *o)
switch(et) {
case TINT8:
case TUINT8:
// This is going to come back to bite us;
// we're not tracking tiny registers vs big ones.
// The hope is that because we use temporaries
// everywhere instead of registers, this will be okay.
min = D_AL;
max = D_BH;
goto try;
case TINT16:
case TUINT16:
case TINT32:
@ -719,17 +755,20 @@ regalloc(Node *n, Type *t, Node *o)
case TPTR32:
case TPTR64:
case TBOOL:
min = D_AX;
max = D_DI;
try:
if(o != N && o->op == OREGISTER) {
i = o->val.u.reg;
if(i >= D_AX && i <= D_DI)
if(i >= D_AX && i <= max)
goto out;
}
for(i=D_AX; i<=D_DI; i++)
for(i=min; i<=max; i++)
if(reg[i] == 0)
goto out;
fprint(2, "registers allocated at\n");
for(i=D_AX; i<=D_DI; i++)
for(i=min; i<=max; i++)
fprint(2, "\t%R\t%#lux\n", i, regpc[i]);
yyerror("out of fixed registers");
goto err;
@ -805,6 +844,7 @@ tempalloc(Node *n, Type *t)
stksize += w;
stksize = rnd(stksize, w);
n->xoffset = -stksize;
//print("tempalloc %d -> %d from %p\n", n->ostk, n->xoffset, __builtin_return_address(0));
if(stksize > maxstksize)
maxstksize = stksize;
}
@ -812,6 +852,7 @@ tempalloc(Node *n, Type *t)
void
tempfree(Node *n)
{
//print("tempfree %d\n", n->xoffset);
if(n->xoffset != -stksize)
fatal("tempfree %lld %d", -n->xoffset, stksize);
stksize = n->ostk;
@ -912,6 +953,33 @@ gconreg(int as, vlong c, int reg)
gins(as, &n1, &n2);
}
/*
* swap node contents
*/
void
nswap(Node *a, Node *b)
{
Node t;
t = *a;
*a = *b;
*b = t;
}
/*
* return constant i node.
* overwritten by next call, but useful in calls to gins.
*/
Node*
ncon(uint32 i)
{
static Node n;
if(n.type == T)
nodconst(&n, types[TUINT32], 0);
mpmovecfix(n.val.u.xval, i);
return &n;
}
/*
* Is this node a memory operand?
@ -954,9 +1022,17 @@ split64(Node *n, Node *lo, Node *hi)
sclean[nsclean-1] = n1;
}
n = &n1;
// fall through
goto common;
case ONAME:
if(n->class == PPARAMREF) {
cgen(n->heapaddr, &n1);
sclean[nsclean-1] = n1;
// fall through.
n = &n1;
}
goto common;
case OINDREG:
common:
*lo = *n;
*hi = *n;
lo->type = types[TUINT32];
@ -990,12 +1066,44 @@ splitclean(void)
regfree(&sclean[nsclean]);
}
/*
* set up nodes representing fp constants
*/
Node zerof;
Node two64f;
Node two63f;
void
bignodes(void)
{
static int did;
if(did)
return;
did = 1;
two64f = *ncon(0);
two64f.type = types[TFLOAT64];
two64f.val.ctype = CTFLT;
two64f.val.u.fval = mal(sizeof *two64f.val.u.fval);
mpmovecflt(two64f.val.u.fval, 18446744073709551616.);
two63f = two64f;
two63f.val.u.fval = mal(sizeof *two63f.val.u.fval);
mpmovecflt(two63f.val.u.fval, 9223372036854775808.);
zerof = two64f;
zerof.val.u.fval = mal(sizeof *zerof.val.u.fval);
mpmovecflt(zerof.val.u.fval, 0);
}
void
gmove(Node *f, Node *t)
{
int a, ft, tt;
Type *cvt;
Node r1, r2, flo, fhi, tlo, thi, con;
Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx;
Prog *p1, *p2, *p3;
if(debug['M'])
print("gmove %N -> %N\n", f, t);
@ -1004,16 +1112,19 @@ gmove(Node *f, Node *t)
tt = simsimtype(t->type);
cvt = t->type;
// cannot have two memory operands;
// cannot have two integer memory operands;
// except 64-bit, which always copies via registers anyway.
if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
goto hard;
// convert constant to desired type
if(f->op == OLITERAL) {
convconst(&con, t->type, &f->val);
if(tt == TFLOAT32)
convconst(&con, types[TFLOAT64], &f->val);
else
convconst(&con, t->type, &f->val);
f = &con;
ft = tt; // so big switch will choose a simple mov
ft = simsimtype(con.type);
// some constants can't move directly to memory.
if(ismem(t)) {
@ -1032,7 +1143,7 @@ gmove(Node *f, Node *t)
switch(CASE(ft, tt)) {
default:
fatal("gmove %N -> %N", f, t);
goto fatal;
/*
* integer copy and truncate
@ -1057,10 +1168,9 @@ gmove(Node *f, Node *t)
case CASE(TINT64, TUINT8):
case CASE(TUINT64, TUINT8):
split64(f, &flo, &fhi);
regalloc(&r1, t->type, t);
nodreg(&r1, t->type, D_AX);
gins(AMOVB, &flo, &r1);
gins(AMOVB, &r1, t);
regfree(&r1);
splitclean();
return;
@ -1080,10 +1190,9 @@ gmove(Node *f, Node *t)
case CASE(TINT64, TUINT16):
case CASE(TUINT64, TUINT16):
split64(f, &flo, &fhi);
regalloc(&r1, t->type, t);
nodreg(&r1, t->type, D_AX);
gins(AMOVW, &flo, &r1);
gins(AMOVW, &r1, t);
regfree(&r1);
splitclean();
return;
@ -1099,10 +1208,9 @@ gmove(Node *f, Node *t)
case CASE(TINT64, TUINT32):
case CASE(TUINT64, TUINT32):
split64(f, &flo, &fhi);
regalloc(&r1, t->type, t);
nodreg(&r1, t->type, D_AX);
gins(AMOVL, &flo, &r1);
gins(AMOVL, &r1, t);
regfree(&r1);
splitclean();
return;
@ -1116,14 +1224,12 @@ gmove(Node *f, Node *t)
gins(AMOVL, &flo, &tlo);
gins(AMOVL, &fhi, &thi);
} else {
regalloc(&r1, types[TUINT32], N);
regalloc(&r2, types[TUINT32], N);
nodreg(&r1, t->type, D_AX);
nodreg(&r2, t->type, D_DX);
gins(AMOVL, &flo, &r1);
gins(AMOVL, &fhi, &r2);
gins(AMOVL, &r1, &tlo);
gins(AMOVL, &r2, &thi);
regfree(&r2);
regfree(&r1);
}
splitclean();
splitclean();
@ -1198,23 +1304,36 @@ gmove(Node *f, Node *t)
/*
* float to integer
*
*/
case CASE(TFLOAT32, TINT16):
case CASE(TFLOAT32, TINT32):
case CASE(TFLOAT32, TINT64):
case CASE(TFLOAT64, TINT16):
case CASE(TFLOAT64, TINT32):
case CASE(TFLOAT64, TINT64):
if(t->op == OREGISTER)
goto hardmem;
nodreg(&r1, types[ft], D_F0);
if(ft == TFLOAT32)
gins(AFMOVF, f, &f0);
gins(AFMOVF, f, &r1);
else
gins(AFMOVD, f, &f0);
gins(AFMOVD, f, &r1);
// set round to zero mode during conversion
tempalloc(&t1, types[TUINT16]);
tempalloc(&t2, types[TUINT16]);
gins(AFSTCW, N, &t1);
gins(AMOVW, ncon(0xf7f), &t2);
gins(AFLDCW, &t2, N);
if(tt == TINT16)
gins(AFMOVWP, &f0, t);
gins(AFMOVWP, &r1, t);
else if(tt == TINT32)
gins(AFMOVLP, &f0, t);
gins(AFMOVLP, &r1, t);
else
gins(AFMOVVP, &f0, t);
gins(AFMOVVP, &r1, t);
gins(AFLDCW, &t1, N);
tempfree(&t2);
tempfree(&t1);
return;
case CASE(TFLOAT32, TINT8):
@ -1224,139 +1343,249 @@ gmove(Node *f, Node *t)
case CASE(TFLOAT64, TUINT16):
case CASE(TFLOAT64, TUINT8):
// convert via int32.
cvt = types[TINT32];
goto hard;
tempalloc(&t1, types[TINT32]);
gmove(f, &t1);
switch(tt) {
default:
fatal("gmove %T", t);
case TINT8:
gins(ACMPL, &t1, ncon(-0x80));
p1 = gbranch(optoas(OLT, types[TINT32]), T);
gins(ACMPL, &t1, ncon(0x7f));
p2 = gbranch(optoas(OGT, types[TINT32]), T);
p3 = gbranch(AJMP, T);
patch(p1, pc);
patch(p2, pc);
gmove(ncon(-0x80), &t1);
patch(p3, pc);
gmove(&t1, t);
break;
case TUINT8:
gins(ATESTL, ncon(0xffffff00), &t1);
p1 = gbranch(AJEQ, T);
gins(AMOVB, ncon(0), &t1);
patch(p1, pc);
gmove(&t1, t);
break;
case TUINT16:
gins(ATESTL, ncon(0xffff0000), &t1);
p1 = gbranch(AJEQ, T);
gins(AMOVW, ncon(0), &t1);
patch(p1, pc);
gmove(&t1, t);
break;
}
tempfree(&t1);
return;
case CASE(TFLOAT32, TUINT32):
case CASE(TFLOAT64, TUINT32):
// could potentially convert via int64.
cvt = types[TINT64];
goto hard;
// convert via int64.
tempalloc(&t1, types[TINT64]);
gmove(f, &t1);
split64(&t1, &tlo, &thi);
gins(ACMPL, &thi, ncon(0));
p1 = gbranch(AJEQ, T);
gins(AMOVL, ncon(0), &tlo);
patch(p1, pc);
gmove(&tlo, t);
splitclean();
tempfree(&t1);
return;
case CASE(TFLOAT32, TUINT64):
case CASE(TFLOAT64, TUINT64):
bignodes();
nodreg(&f0, types[ft], D_F0);
nodreg(&f1, types[ft], D_F0 + 1);
nodreg(&ax, types[TUINT16], D_AX);
if(ft == TFLOAT32)
gins(AFMOVF, f, &f0);
else
gins(AFMOVD, f, &f0);
// algorithm is:
// if 0 > v { answer = 0 }
gmove(&zerof, &f0);
gins(AFUCOMP, &f0, &f1);
gins(AFSTSW, N, &ax);
gins(ASAHF, N, N);
p1 = gbranch(optoas(OGT, types[tt]), T);
// if 1<<64 <= v { answer = 0 too }
gmove(&two64f, &f0);
gins(AFUCOMP, &f0, &f1);
gins(AFSTSW, N, &ax);
gins(ASAHF, N, N);
p2 = gbranch(optoas(OGT, types[tt]), T);
patch(p1, pc);
gins(AFMOVVP, &f0, t); // don't care about t, but will pop the stack
split64(t, &tlo, &thi);
gins(AMOVL, ncon(0), &tlo);
gins(AMOVL, ncon(0), &thi);
splitclean();
p1 = gbranch(AJMP, T);
patch(p2, pc);
// in range; algorithm is:
// if small enough, use native float64 -> int64 conversion.
// otherwise, subtract 2^63, convert, and add it back.
bignodes();
regalloc(&r1, types[ft], N);
regalloc(&r2, types[ft], N);
gins(optoas(OCMP, f->type), &bigf, &r1);
p1 = gbranch(optoas(OLE, f->type), T);
gins(a, &r1, &r2);
p2 = gbranch(AJMP, T);
patch(p1, pc);
gins(optoas(OAS, f->type), &bigf, &r3);
gins(optoas(OSUB, f->type), &r3, &r1);
gins(a, &r1, &r2);
gins(AMOVQ, &bigi, &r4);
gins(AXORQ, &r4, &r2);
// set round to zero mode during conversion
tempalloc(&t1, types[TUINT16]);
tempalloc(&t2, types[TUINT16]);
gins(AFSTCW, N, &t1);
gins(AMOVW, ncon(0xf7f), &t2);
gins(AFLDCW, &t2, N);
tempfree(&t2);
// actual work
gmove(&two63f, &f0);
gins(AFUCOMP, &f0, &f1);
gins(AFSTSW, N, &ax);
gins(ASAHF, N, N);
p2 = gbranch(optoas(OLE, types[tt]), T);
gins(AFMOVVP, &f0, t);
p3 = gbranch(AJMP, T);
patch(p2, pc);
gmove(&r2, t);
regfree(&r4);
regfree(&r3);
regfree(&r2);
regfree(&r1);
fatal("lazy");
gmove(&two63f, &f0);
gins(AFSUBDP, &f0, &f1);
gins(AFMOVVP, &f0, t);
split64(t, &tlo, &thi);
gins(AXORL, ncon(0x80000000), &thi); // + 2^63
patch(p3, pc);
patch(p1, pc);
splitclean();
// restore rounding mode
gins(AFLDCW, &t1, N);
tempfree(&t1);
return;
*/
/*
* integer to float
*
case CASE(TINT32, TFLOAT32):
a = ACVTSL2SS;
goto rdst;
case CASE(TINT32, TFLOAT64):
a = ACVTSL2SD;
goto rdst;
case CASE(TINT64, TFLOAT32):
a = ACVTSQ2SS;
goto rdst;
case CASE(TINT64, TFLOAT64):
a = ACVTSQ2SD;
goto rdst;
*/
case CASE(TINT16, TFLOAT32):
case CASE(TINT16, TFLOAT64):
case CASE(TINT32, TFLOAT32):
case CASE(TINT32, TFLOAT64):
case CASE(TINT64, TFLOAT32):
case CASE(TINT64, TFLOAT64):
if(t->op != OREGISTER)
goto hard;
if(f->op == OREGISTER) {
cvt = f->type;
goto hardmem;
}
switch(ft) {
case TINT16:
a = AFMOVW;
break;
case TINT32:
a = AFMOVL;
break;
default:
a = AFMOVV;
break;
}
break;
case CASE(TINT8, TFLOAT32):
case CASE(TINT8, TFLOAT64):
case CASE(TUINT16, TFLOAT32):
case CASE(TUINT16, TFLOAT64):
case CASE(TUINT8, TFLOAT32):
case CASE(TUINT8, TFLOAT64):
// convert via int32
// convert via int32 memory
cvt = types[TINT32];
goto hard;
goto hardmem;
case CASE(TUINT32, TFLOAT32):
case CASE(TUINT32, TFLOAT64):
// convert via int64.
// convert via int64 memory
cvt = types[TINT64];
goto hard;
goto hardmem;
case CASE(TUINT64, TFLOAT32):
case CASE(TUINT64, TFLOAT64):
// algorithm is:
// if small enough, use native int64 -> uint64 conversion.
// otherwise, halve (rounding to odd?), convert, and double.
a = ACVTSQ2SS;
if(tt == TFLOAT64)
a = ACVTSQ2SD;
nodconst(&zero, types[TUINT64], 0);
nodconst(&one, types[TUINT64], 1);
regalloc(&r1, f->type, f);
regalloc(&r2, t->type, t);
regalloc(&r3, f->type, N);
regalloc(&r4, f->type, N);
gmove(f, &r1);
gins(ACMPQ, &r1, &zero);
nodreg(&ax, types[TUINT32], D_AX);
nodreg(&dx, types[TUINT32], D_DX);
nodreg(&cx, types[TUINT32], D_CX);
tempalloc(&t1, f->type);
split64(&t1, &tlo, &thi);
gmove(f, &t1);
gins(ACMPL, &thi, ncon(0));
p1 = gbranch(AJLT, T);
gins(a, &r1, &r2);
// native
t1.type = types[TINT64];
gmove(&t1, t);
p2 = gbranch(AJMP, T);
// simulated
patch(p1, pc);
gmove(&r1, &r3);
gins(ASHRQ, &one, &r3);
gmove(&r1, &r4);
gins(AANDL, &one, &r4);
gins(AORQ, &r4, &r3);
gins(a, &r3, &r2);
gins(optoas(OADD, t->type), &r2, &r2);
gmove(&tlo, &ax);
gmove(&thi, &dx);
p1 = gins(ASHRL, ncon(1), &ax);
p1->from.index = D_DX; // double-width shift DX -> AX
p1->from.scale = 0;
gins(ASETCC, N, &cx);
gins(AORB, &cx, &ax);
gins(ASHRL, ncon(1), &dx);
gmove(&dx, &thi);
gmove(&ax, &tlo);
nodreg(&r1, types[tt], D_F0);
nodreg(&r2, types[tt], D_F0 + 1);
gmove(&t1, &r1); // t1.type is TINT64 now, set above
gins(AFMOVD, &r1, &r1);
gins(AFADDDP, &r1, &r2);
gmove(&r1, t);
patch(p2, pc);
gmove(&r2, t);
regfree(&r4);
regfree(&r3);
regfree(&r2);
regfree(&r1);
splitclean();
tempfree(&t1);
return;
*/
/*
* float to float
*/
case CASE(TFLOAT32, TFLOAT32):
a = AFMOVF;
break;
case CASE(TFLOAT64, TFLOAT64):
a = AFMOVD;
// The way the code generator uses floating-point
// registers, a move from F0 to F0 is intended as a no-op.
// On the x86, it's not: it pushes a second copy of F0
// on the floating point stack. So toss it away here.
// Also, F0 is the *only* register we ever evaluate
// into, so we should only see register/register as F0/F0.
if(f->op == OREGISTER && t->op == OREGISTER) {
if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
goto fatal;
return;
}
if(ismem(f) && ismem(t))
goto hard;
a = AFMOVF;
if(ft == TFLOAT64)
a = AFMOVD;
if(ismem(t)) {
a = AFMOVFP;
if(ft == TFLOAT64)
a = AFMOVDP;
}
break;
/*
case CASE(TFLOAT32, TFLOAT64):
a = ACVTSS2SD;
goto rdst;
if(f->op == OREGISTER)
gins(AFMOVD, f, t);
else
gins(AFMOVF, f, t);
return;
case CASE(TFLOAT64, TFLOAT32):
a = ACVTSD2SS;
goto rdst;
*/
if(f->op == OREGISTER)
gins(AFMOVF, f, t);
else
gins(AFMOVD, f, t);
return;
}
gins(a, f, t);
@ -1377,6 +1606,18 @@ hard:
gmove(&r1, t);
regfree(&r1);
return;
hardmem:
// requires memory intermediate
tempalloc(&r1, cvt);
gmove(f, &r1);
gmove(&r1, t);
tempfree(&r1);
return;
fatal:
// should not happen
fatal("gmove %N -> %N", f, t);
}
int