1
0
mirror of https://github.com/golang/go synced 2024-11-20 11:04:56 -07:00

more work on divide by constant.

no real change -- disabled because of bugs

R=rsc
OCL=32923
CL=32923
This commit is contained in:
Ken Thompson 2009-08-07 20:21:04 -07:00
parent 4b90833803
commit 8b8a2bd945
3 changed files with 363 additions and 76 deletions

View File

@ -42,6 +42,23 @@ struct Prog
void* reg; // pointer to containing Reg struct
};
typedef struct Magic Magic;
struct Magic
{
int w; // input for both - width
int s; // output for both - shift
int bad; // output for both - unexpected failure
// magic multiplier for signed literal divisors
int64 sd; // input - literal divisor
int64 sm; // output - multiplier
// magic multiplier for unsigned literal divisors
uint64 ud; // input - literal divisor
uint64 um; // output - multiplier
int ua; // output - adder
};
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
@ -127,6 +144,8 @@ void afunclit(Addr*);
void datagostring(Strlit*, Addr*);
int powtwo(Node*);
Type* tounsigned(Type*);
void smagic(Magic*);
void umagic(Magic*);
/*
* obj.c

View File

@ -525,6 +525,36 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
gmove(dx, res);
}
static void
savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
{
int r;
r = reg[dr];
nodreg(x, types[TINT64], dr);
regalloc(x, t, x);
// save current ax and dx if they are live
// and not the destination
memset(oldx, 0, sizeof *oldx);
if(r > 0 && !samereg(x, res)) {
regalloc(oldx, t, N);
gmove(x, oldx);
}
}
static void
restx(Node *x, Node *oldx)
{
regfree(x);
if(oldx->op != 0) {
gmove(oldx, x);
regfree(oldx);
}
}
/*
* generate division according to op, one of:
* res = nl / nr
@ -533,18 +563,20 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
void
cgen_div(int op, Node *nl, Node *nr, Node *res)
{
Node ax, dx, oldax, olddx, n1, n2, n3;
int rax, rdx, n, w;
Node ax, dx, cx, oldax, olddx, oldcx;
Node n1, n2, n3, savl, savr;
int n, w, s;
Magic m;
if(nl->ullman >= UINF) {
tempname(&n1, nl->type);
cgen(nl, &n1);
nl = &n1;
tempname(&savl, nl->type);
cgen(nl, &savl);
nl = &savl;
}
if(nr->ullman >= UINF) {
tempname(&n2, nr->type);
cgen(nr, &n2);
nr = &n2;
tempname(&savr, nr->type);
cgen(nr, &savr);
nr = &savr;
}
if(nr->op != OLITERAL)
@ -552,8 +584,14 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
// special cases of mod/div
// by a constant
n = powtwo(nr);
w = nl->type->width*8;
s = 0;
n = powtwo(nr);
if(n >= 1000) {
// negative power of 2
s = 1;
n -= 1000;
}
if(n+1 >= w) {
// just sign bit
@ -571,97 +609,132 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
switch(n) {
case 0:
// divide by 1
cgen(nl, res);
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
if(s)
gins(optoas(OMINUS, nl->type), N, &n1);
gmove(&n1, res);
regfree(&n1);
return;
case 1:
// divide by 2
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
if(issigned[nl->type->etype]) {
// develop -1 iff nl is negative
regalloc(&n2, nl->type, N);
gmove(&n1, &n2);
nodconst(&n3, nl->type, w-1);
gins(optoas(ORSH, nl->type), &n3, &n2);
gins(optoas(OSUB, nl->type), &n2, &n1);
regfree(&n2);
}
nodconst(&n2, nl->type, n);
gins(optoas(ORSH, nl->type), &n2, &n1);
gmove(&n1, res);
regfree(&n1);
return;
if(!issigned[nl->type->etype])
break;
// develop -1 iff nl is negative
regalloc(&n2, nl->type, N);
gmove(&n1, &n2);
nodconst(&n3, nl->type, w-1);
gins(optoas(ORSH, nl->type), &n3, &n2);
gins(optoas(OSUB, nl->type), &n2, &n1);
regfree(&n2);
break;
default:
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
if(issigned[nl->type->etype]) {
// develop (2^k)-1 iff nl is negative
regalloc(&n2, nl->type, N);
gmove(&n1, &n2);
nodconst(&n3, nl->type, w-1);
gins(optoas(ORSH, nl->type), &n3, &n2);
nodconst(&n3, nl->type, w-n);
gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
gins(optoas(OADD, nl->type), &n2, &n1);
regfree(&n2);
}
nodconst(&n2, nl->type, n);
gins(optoas(ORSH, nl->type), &n2, &n1);
gmove(&n1, res);
regfree(&n1);
if(!issigned[nl->type->etype])
break;
// develop (2^k)-1 iff nl is negative
regalloc(&n2, nl->type, N);
gmove(&n1, &n2);
nodconst(&n3, nl->type, w-1);
gins(optoas(ORSH, nl->type), &n3, &n2);
nodconst(&n3, nl->type, w-n);
gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
gins(optoas(OADD, nl->type), &n2, &n1);
regfree(&n2);
break;
}
nodconst(&n2, nl->type, n);
gins(optoas(ORSH, nl->type), &n2, &n1);
if(s)
gins(optoas(OMINUS, nl->type), N, &n1);
gmove(&n1, res);
regfree(&n1);
return;
divbymul:
goto longdiv;
switch(simtype[nl->type->etype]) {
default:
goto longdiv;
case TINT32:
case TUINT16:
case TUINT32:
case TINT64:
case TUINT64:
m.w = w;
m.ud = mpgetfix(nr->val.u.xval);
umagic(&m);
if(m.bad)
break;
if(op == OMOD) {
// todo
break;
}
if(m.ua != 0) {
// todo fixup
break;
}
break;
case TINT16:
case TINT32:
case TINT64:
m.w = w;
m.sd = mpgetfix(nr->val.u.xval);
smagic(&m);
if(m.bad)
break;
if(op == OMOD) {
// todo
break;
}
if(m.sm < 0) {
// todo fixup
break;
}
savex(D_AX, &ax, &oldax, res, nl->type);
savex(D_DX, &dx, &olddx, res, nl->type);
savex(D_CX, &cx, &oldcx, res, nl->type);
regalloc(&n1, nl->type, N);
cgen(nl, &n1); // num -> reg(n1)
nodconst(&n2, nl->type, m.sm);
gmove(&n2, &ax); // const->ax
gins(optoas(OMUL, nl->type), &n1, N); // imul reg
nodconst(&n2, nl->type, m.s);
gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx
nodconst(&n2, nl->type, w-1);
gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
gins(optoas(OSUB, nl->type), &n1, &dx); // added
if(m.sd < 0)
gins(optoas(OMINUS, nl->type), N, &dx);
regfree(&n1);
gmove(&dx, res);
restx(&ax, &oldax);
restx(&dx, &olddx);
restx(&cx, &oldcx);
return;
}
// todo
goto longdiv;
longdiv:
rax = reg[D_AX];
rdx = reg[D_DX];
nodreg(&ax, types[TINT64], D_AX);
nodreg(&dx, types[TINT64], D_DX);
regalloc(&ax, nl->type, &ax);
regalloc(&dx, nl->type, &dx);
// save current ax and dx if they are live
// and not the destination
memset(&oldax, 0, sizeof oldax);
memset(&olddx, 0, sizeof olddx);
if(rax > 0 && !samereg(&ax, res)) {
regalloc(&oldax, nl->type, N);
gmove(&ax, &oldax);
}
if(rdx > 0 && !samereg(&dx, res)) {
regalloc(&olddx, nl->type, N);
gmove(&dx, &olddx);
}
savex(D_AX, &ax, &oldax, res, nl->type);
savex(D_DX, &dx, &olddx, res, nl->type);
dodiv(op, nl, nr, res, &ax, &dx);
regfree(&ax);
regfree(&dx);
if(oldax.op != 0) {
gmove(&oldax, &ax);
regfree(&oldax);
}
if(olddx.op != 0) {
gmove(&olddx, &dx);
regfree(&olddx);
}
restx(&ax, &oldax);
restx(&dx, &olddx);
}
/*

View File

@ -1881,6 +1881,17 @@ powtwo(Node *n)
b = b<<1;
}
if(!issigned[n->type->etype])
goto no;
v = -v;
b = 1ULL;
for(i=0; i<64; i++) {
if(b == v)
return i+1000;
b = b<<1;
}
no:
return -1;
}
@ -1894,6 +1905,7 @@ tounsigned(Type *t)
switch(t->etype) {
default:
print("tounsigned: unknown type %T\n", t);
t = T;
break;
case TINT:
t = types[TUINT];
@ -1913,3 +1925,186 @@ tounsigned(Type *t)
}
return t;
}
void
smagic(Magic *m)
{
int p;
uint64 ad, anc, delta, q1, r1, q2, r2, t, two31;
uint64 mask;
m->bad = 0;
switch(m->w) {
default:
m->bad = 1;
return;
case 8:
mask = 0xffLL;
break;
case 16:
mask = 0xffffLL;
break;
case 32:
mask = 0xffffffffLL;
break;
case 64:
mask = 0xffffffffffffffffLL;
break;
}
two31 = mask ^ (mask>>1);
p = m->w-1;
ad = m->sd;
if(m->sd < 0)
ad = -m->sd;
// bad denominators
if(ad == 0 || ad == 1 || ad == two31) {
m->bad = 1;
return;
}
t = two31;
ad &= mask;
anc = t - 1 - t%ad;
anc &= mask;
q1 = two31/anc;
r1 = two31 - q1*anc;
q1 &= mask;
r1 &= mask;
q2 = two31/ad;
r2 = two31 - q2*ad;
q2 &= mask;
r2 &= mask;
for(;;) {
p++;
q1 <<= 1;
r1 <<= 1;
q1 &= mask;
r1 &= mask;
if(r1 >= anc) {
q1++;
r1 -= anc;
q1 &= mask;
r1 &= mask;
}
q2 <<= 1;
r2 <<= 1;
q2 &= mask;
r2 &= mask;
if(r2 >= ad) {
q2++;
r2 -= ad;
q2 &= mask;
r2 &= mask;
}
delta = ad - r2;
delta &= mask;
if(q1 < delta || (q1 == delta && r1 == 0)) {
continue;
}
break;
}
m->sm = q2+1;
m->s = p-m->w;
}
void
umagic(Magic *m)
{
int p;
uint64 nc, delta, q1, r1, q2, r2, two31;
uint64 mask;
m->bad = 0;
m->ua = 0;
switch(m->w) {
default:
m->bad = 1;
return;
case 8:
mask = 0xffLL;
break;
case 16:
mask = 0xffffLL;
break;
case 32:
mask = 0xffffffffLL;
break;
case 64:
mask = 0xffffffffffffffffLL;
break;
}
two31 = mask ^ (mask>>1);
m->ud &= mask;
if(m->ud == 0 || m->ud == two31) {
m->bad = 1;
return;
}
nc = mask - (-m->ud&mask)%m->ud;
p = m->w-1;
q1 = two31/nc;
r1 = two31 - q1*nc;
q1 &= mask;
r1 &= mask;
q2 = (two31-1) / m->ud;
r2 = (two31-1) - q2*m->ud;
q2 &= mask;
r2 &= mask;
for(;;) {
p++;
if(r1 >= nc-r1) {
q1 <<= 1;
q1++;
r1 <<= 1;
r1 -= nc;
} else {
q1 <<= 1;
r1 <<= 1;
}
q1 &= mask;
r1 &= mask;
if(r2+1 >= m->ud-r2) {
if(q2 >= two31-1) {
m->ua = 1;
}
q2 <<= 1;
q2++;
r2 <<= 1;
r2++;
r2 -= m->ud;
} else {
if(q2 >= two31) {
m->ua = 1;
}
q2 <<= 1;
r2 <<= 1;
r2++;
}
q2 &= mask;
r2 &= mask;
delta = m->ud - 1 - r2;
delta &= mask;
if(p < m->w+m->w)
if(q1 < delta || (q1 == delta && r1 == 0)) {
continue;
}
break;
}
m->um = q2+1;
m->s = p-m->w;
}