From 3aa006b8cd82dfbc5ac4eac03db6518c4821da4d Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 28 May 2009 15:48:47 -0700 Subject: [PATCH] better 64-bit handling in 8g. fewer moves, fewer stupid LEALs. powser1 runs (with evaln commented out). beginnings of floating point. R=ken OCL=29540 CL=29543 --- src/cmd/6g/gsubr.c | 2 +- src/cmd/8c/list.c | 24 +- src/cmd/8g/cgen.c | 321 +++++++++++----------- src/cmd/8g/gg.h | 12 + src/cmd/8g/ggen.c | 2 +- src/cmd/8g/gsubr.c | 657 +++++++++++++++++++++++++++++++++++---------- src/cmd/gc/const.c | 2 +- 7 files changed, 705 insertions(+), 315 deletions(-) diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c index d8bd0767f0..f9b092039c 100644 --- a/src/cmd/6g/gsubr.c +++ b/src/cmd/6g/gsubr.c @@ -505,7 +505,7 @@ gmove(Node *f, Node *t) switch(CASE(ft, tt)) { default: - fatal("gmove %T -> %T", f, t); + fatal("gmove %lT -> %lT", f->type, t->type); /* * integer copy and truncate diff --git a/src/cmd/8c/list.c b/src/cmd/8c/list.c index ec5ac9d60f..c2ce5b2951 100644 --- a/src/cmd/8c/list.c +++ b/src/cmd/8c/list.c @@ -57,7 +57,7 @@ Bconv(Fmt *fp) if(str[0]) strcat(str, " "); if(var[i].sym == S) { - sprint(ss, "$%ld", var[i].offset); + sprint(ss, "$%d", var[i].offset); s = ss; } else s = var[i].sym->name; @@ -108,7 +108,7 @@ Dconv(Fmt *fp) i = a->type; if(i >= D_INDIR) { if(a->offset) - sprint(str, "%ld(%R)", a->offset, i-D_INDIR); + sprint(str, "%d(%R)", a->offset, i-D_INDIR); else sprint(str, "(%R)", i-D_INDIR); goto brk; @@ -117,7 +117,7 @@ Dconv(Fmt *fp) default: if(a->offset) - sprint(str, "$%ld,%R", a->offset, i); + sprint(str, "$%d,%R", a->offset, i); else sprint(str, "%R", i); break; @@ -127,35 +127,35 @@ Dconv(Fmt *fp) break; case D_BRANCH: - sprint(str, "%ld(PC)", a->offset-pc); + sprint(str, "%d(PC)", a->offset-pc); break; case D_EXTERN: - sprint(str, "%s+%ld(SB)", a->sym->name, a->offset); + sprint(str, "%s+%d(SB)", a->sym->name, a->offset); break; case D_STATIC: - sprint(str, "%s<>+%ld(SB)", a->sym->name, + sprint(str, "%s<>+%d(SB)", a->sym->name, a->offset); break; case D_AUTO: - sprint(str, "%s+%ld(SP)", a->sym->name, a->offset); + sprint(str, "%s+%d(SP)", a->sym->name, a->offset); break; case D_PARAM: if(a->sym) - sprint(str, "%s+%ld(FP)", a->sym->name, a->offset); + sprint(str, "%s+%d(FP)", a->sym->name, a->offset); else - sprint(str, "%ld(FP)", a->offset); + sprint(str, "%d(FP)", a->offset); break; case D_CONST: - sprint(str, "$%ld", a->offset); + sprint(str, "$%d", a->offset); break; case D_CONST2: - sprint(str, "$%ld-%ld", a->offset, a->offset2); + sprint(str, "$%d-%d", a->offset, a->offset2); break; case D_FCONST: @@ -185,7 +185,7 @@ conv: char* regstr[] = { - "AL", /*[D_AL]*/ + "AL", /*[D_AL]*/ "CL", "DL", "BL", diff --git a/src/cmd/8g/cgen.c b/src/cmd/8g/cgen.c index 609d900b04..faa81d3300 100644 --- a/src/cmd/8g/cgen.c +++ b/src/cmd/8g/cgen.c @@ -27,6 +27,42 @@ is64(Type *t) return 0; } +int +noconv(Type *t1, Type *t2) +{ + int e1, e2; + + e1 = simtype[t1->etype]; + e2 = simtype[t2->etype]; + + switch(e1) { + case TINT8: + case TUINT8: + return e2 == TINT8 || e2 == TUINT8; + + case TINT16: + case TUINT16: + return e2 == TINT16 || e2 == TUINT16; + + case TINT32: + case TUINT32: + case TPTR32: + return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32; + + case TINT64: + case TUINT64: + case TPTR64: + return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64; + + case TFLOAT32: + return e2 == TFLOAT32; + + case TFLOAT64: + return e2 == TFLOAT64; + } + return 0; +} + /* * generate: * res = n; @@ -38,7 +74,7 @@ is64(Type *t) void cgen(Node *n, Node *res) { - Node *nl, *nr, *r, n1, n2, rr; + Node *nl, *nr, *r, n1, n2, rr, f0, f1; Prog *p1, *p2, *p3; int a; @@ -65,13 +101,13 @@ cgen(Node *n, Node *res) sgen(n, res, n->type->width); return; } - + // if both are addressable, move if(n->addable && res->addable) { gmove(n, res); return; } - + // if both are not addressable, use a temporary. if(!n->addable && !res->addable) { tempalloc(&n1, n->type); @@ -96,7 +132,7 @@ cgen(Node *n, Node *res) // 64-bit ops are hard on 32-bit machine. if(is64(n->type) && cancgen64(n, res)) return; - + // use ullman to pick operand to eval first. nl = n->left; nr = n->right; @@ -112,12 +148,15 @@ cgen(Node *n, Node *res) return; } + if(isfloat[n->type->etype] && isfloat[nl->type->etype]) + goto flt; + switch(n->op) { default: dump("cgen", n); fatal("cgen %O", n->op); break; - + // these call bgen to get a bool value case OOROR: case OANDAND: @@ -162,7 +201,7 @@ cgen(Node *n, Node *res) goto abop; case OCONV: - if(eqtype(n->type, nl->type)) { + if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { cgen(nl, res); break; } @@ -236,7 +275,7 @@ cgen(Node *n, Node *res) case OADDR: agen(nl, res); break; - + case OCALLMETH: cgen_callmeth(n, 0); cgen_callret(n, res); @@ -303,6 +342,29 @@ uop: // unary gmove(&n1, res); tempfree(&n1); return; + +flt: // floating-point. 387 (not SSE2) to interoperate with 6c + nodreg(&f0, n->type, D_F0); + nodreg(&f1, n->type, D_F0+1); + if(nl->ullman >= nr->ullman) { + cgen(nl, &f0); + if(nr->addable) + gins(foptoas(n->op, n->type, 0), nr, &f0); + else { + cgen(nr, &f0); + gins(foptoas(n->op, n->type, Fpop), &f0, &f1); + } + } else { + cgen(nr, &f0); + if(nl->addable) + gins(foptoas(n->op, n->type, Frev), nl, &f0); + else { + cgen(nl, &f0); + gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); + } + } + gmove(&f0, res); + return; } /* @@ -334,21 +396,21 @@ agen(Node *n, Node *res) regfree(&n1); return; } - + // let's compute nl = n->left; nr = n->right; - + switch(n->op) { default: fatal("agen %O", n->op); - + case OCONV: if(!eqtype(n->type, nl->type)) fatal("agen: non-trivial OCONV"); agen(nl, res); break; - + case OCALLMETH: cgen_callmeth(n, 0); cgen_aret(n, res); @@ -506,11 +568,11 @@ agen(Node *n, Node *res) gins(optoas(OADD, types[tptr]), &n1, res); } break; - + case OIND: cgen(nl, res); break; - + case ODOT: t = nl->type; agen(nl, res); @@ -719,7 +781,7 @@ bgen(Node *n, int true, Prog *to) regfree(&n1); break; } - + if(is64(nr->type)) { if(!nl->addable) { tempalloc(&n1, nl->type); @@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w) static int cancgen64(Node *n, Node *res) { - Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r; + Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r; + Node lo1, lo2, hi1, hi2; Prog *p1, *p2; if(n->op == OCALL) @@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res) return 1; case OMINUS: + nodconst(&zero, types[TINT32], 0); cgen(n->left, res); - gins(ANEGL, N, res); - res->xoffset += 4; - regalloc(&nod, types[TINT32], N); - gins(AXORL, &nod, &nod); - gins(ASBBL, res, &nod); - gins(AMOVL, &nod, res); - regfree(&nod); + split64(res, &lo1, &hi1); + gins(ANEGL, N, &lo1); + gins(AADCL, &zero, &hi1); + gins(ANEGL, N, &hi1); + splitclean(); return 1; case OADD: @@ -951,7 +1013,7 @@ cancgen64(Node *n, Node *res) case OMUL: break; } - + l = n->left; r = n->right; if(!l->addable) { @@ -963,97 +1025,73 @@ cancgen64(Node *n, Node *res) tempalloc(&t2, r->type); cgen(r, &t2); r = &t2; - } + } // Setup for binary operation. - tempalloc(&adr1, types[TPTR32]); - agen(l, &adr1); - tempalloc(&adr2, types[TPTR32]); - agen(r, &adr2); + split64(l, &lo1, &hi1); + split64(r, &lo2, &hi2); - nodreg(&r1, types[TPTR32], D_AX); - nodreg(&r2, types[TPTR32], D_DX); - nodreg(&r3, types[TPTR32], D_CX); + nodreg(&ax, types[TPTR32], D_AX); + nodreg(&cx, types[TPTR32], D_CX); + nodreg(&dx, types[TPTR32], D_DX); + // Do op. Leave result in DX:AX. switch(n->op) { case OADD: - case OSUB: - gmove(&adr1, &r3); - r3.op = OINDREG; - r3.xoffset = 0; - gins(AMOVL, &r3, &r1); - r3.xoffset = 4; - gins(AMOVL, &r3, &r2); - - r3.xoffset = 0; - r3.op = OREGISTER; - gmove(&adr2, &r3); - r3.op = OINDREG; - if(n->op == OADD) - gins(AADDL, &r3, &r1); - else - gins(ASUBL, &r3, &r1); - r3.xoffset = 4; - if(n->op == OADD) - gins(AADCL, &r3, &r2); - else - gins(ASBBL, &r3, &r2); + gins(AMOVL, &lo1, &ax); + gins(AMOVL, &hi1, &dx); + gins(AADDL, &lo2, &ax); + gins(AADCL, &hi2, &dx); break; - case OMUL: - regalloc(&r4, types[TPTR32], N); - regalloc(&r5, types[TPTR32], N); - - // load args into r2:r1 and r4:r3. - // leave result in r2:r1 (DX:AX) - gmove(&adr1, &r5); - r5.op = OINDREG; - r5.xoffset = 0; - gmove(&r5, &r1); - r5.xoffset = 4; - gmove(&r5, &r2); - r5.xoffset = 0; - r5.op = OREGISTER; - gmove(&adr2, &r5); - r5.op = OINDREG; - gmove(&r5, &r3); - r5.xoffset = 4; - gmove(&r5, &r4); - r5.xoffset = 0; - r5.op = OREGISTER; + case OSUB: + gins(AMOVL, &lo1, &ax); + gins(AMOVL, &hi1, &dx); + gins(ASUBL, &lo2, &ax); + gins(ASBBL, &hi2, &dx); + break; - // if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply - gmove(&r2, &r5); - gins(AORL, &r4, &r5); + case OMUL: + // let's call the next two EX and FX. + regalloc(&ex, types[TPTR32], N); + regalloc(&fx, types[TPTR32], N); + + // load args into DX:AX and EX:CX. + gins(AMOVL, &lo1, &ax); + gins(AMOVL, &hi1, &dx); + gins(AMOVL, &lo2, &cx); + gins(AMOVL, &hi2, &ex); + + // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply. + gins(AMOVL, &dx, &fx); + gins(AORL, &ex, &fx); p1 = gbranch(AJNE, T); - gins(AMULL, &r3, N); // AX (=r1) is implied + gins(AMULL, &cx, N); // implicit &ax p2 = gbranch(AJMP, T); patch(p1, pc); - - // full 64x64 -> 64, from 32 x 32 -> 64. - gins(AIMULL, &r3, &r2); - gins(AMOVL, &r1, &r5); - gins(AIMULL, &r4, &r5); - gins(AADDL, &r2, &r5); - gins(AMOVL, &r3, &r2); - gins(AMULL, &r2, N); // AX (=r1) is implied - gins(AADDL, &r5, &r2); - patch(p2, pc); - regfree(&r4); - regfree(&r5); - break; - - } - - tempfree(&adr2); - tempfree(&adr1); - // Store result. - gins(AMOVL, &r1, res); - res->xoffset += 4; - gins(AMOVL, &r2, res); - res->xoffset -= 4; - + // full 64x64 -> 64, from 32x32 -> 64. + gins(AIMULL, &cx, &dx); + gins(AMOVL, &ax, &fx); + gins(AIMULL, &ex, &fx); + gins(AADDL, &dx, &fx); + gins(AMOVL, &cx, &dx); + gins(AMULL, &dx, N); // implicit &ax + gins(AADDL, &fx, &dx); + patch(p2, pc); + + regfree(&ex); + regfree(&fx); + break; + } + splitclean(); + splitclean(); + + split64(res, &lo1, &hi1); + gins(AMOVL, &ax, &lo1); + gins(AMOVL, &dx, &hi1); + splitclean(); + if(r == &t2) tempfree(&t2); if(l == &t1) @@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res) void cmp64(Node *nl, Node *nr, int op, Prog *to) { - int64 x; - Node adr1, adr2, rr; - Prog *br, *p; + Node lo1, hi1, lo2, hi2, rr; + Prog *br; Type *t; - - t = nr->type; - - memset(&adr1, 0, sizeof adr1); - memset(&adr2, 0, sizeof adr2); - regalloc(&adr1, types[TPTR32], N); - agen(nl, &adr1); - adr1.op = OINDREG; - nl = &adr1; - - x = 0; - if(nr->op == OLITERAL) { - if(!isconst(nr, CTINT)) - fatal("bad const in cmp64"); - x = mpgetfix(nr->val.u.xval); - } else { - regalloc(&adr2, types[TPTR32], N); - agen(nr, &adr2); - adr2.op = OINDREG; - nr = &adr2; - } - + split64(nl, &lo1, &hi1); + split64(nr, &lo2, &hi2); + // compare most significant word - nl->xoffset += 4; - if(nr->op == OLITERAL) { - p = gins(ACMPL, nl, nodintconst((uint32)(x>>32))); - } else { - regalloc(&rr, types[TUINT32], N); - nr->xoffset += 4; - gins(AMOVL, nr, &rr); - gins(ACMPL, nl, &rr); - nr->xoffset -= 4; + t = hi1.type; + if(nl->op == OLITERAL || nr->op == OLITERAL) + gins(ACMPL, &hi1, &hi2); + else { + regalloc(&rr, types[TINT32], N); + gins(AMOVL, &hi1, &rr); + gins(ACMPL, &rr, &hi2); regfree(&rr); } - nl->xoffset -= 4; - br = P; switch(op) { default: @@ -1149,39 +1163,28 @@ cmp64(Node *nl, Node *nr, int op, Prog *to) // L: patch(gbranch(optoas(OLT, t), T), to); br = gbranch(optoas(OGT, t), T); - break; + break; } // compare least significant word - if(nr->op == OLITERAL) { - p = gins(ACMPL, nl, nodintconst((uint32)x)); - } else { - regalloc(&rr, types[TUINT32], N); - gins(AMOVL, nr, &rr); - gins(ACMPL, nl, &rr); + t = lo1.type; + if(nl->op == OLITERAL || nr->op == OLITERAL) + gins(ACMPL, &lo1, &lo2); + else { + regalloc(&rr, types[TINT32], N); + gins(AMOVL, &lo1, &rr); + gins(ACMPL, &rr, &lo2); regfree(&rr); } // jump again - switch(op) { - default: - fatal("cmp64 %O %T", op, nr->type); - case OEQ: - case ONE: - case OGE: - case OGT: - case OLE: - case OLT: - patch(gbranch(optoas(op, t), T), to); - break; - } + patch(gbranch(optoas(op, t), T), to); // point first branch down here if appropriate if(br != P) patch(br, pc); - regfree(&adr1); - if(nr == &adr2) - regfree(&adr2); + splitclean(); + splitclean(); } diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h index d7a9851f40..9943c2b607 100644 --- a/src/cmd/8g/gg.h +++ b/src/cmd/8g/gg.h @@ -43,6 +43,14 @@ struct Prog void* reg; // pointer to containing Reg struct }; +// foptoas flags +enum +{ + Frev = 1<<0, + Fpop = 1<<1, + Fpop2 = 1<<2, +}; + EXTERN Biobuf* bout; EXTERN int32 dynloc; EXTERN uchar reg[D_NONE]; @@ -114,6 +122,7 @@ Prog* gop(int, Node*, Node*, Node*); void setconst(Addr*, vlong); void setaddr(Addr*, Node*); int optoas(int, Type*); +int foptoas(int, Type*, int); void ginit(void); void gclean(void); void regalloc(Node*, Type*, Node*); @@ -131,7 +140,10 @@ Plist* newplist(void); int isfat(Type*); void sudoclean(void); int sudoaddable(int, Node*, Addr*); +int dotaddable(Node*, Node*); void afunclit(Addr*); +void split64(Node*, Node*, Node*); +void splitclean(void); /* * list.c diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c index 47f21bcb6d..249c9fe8c7 100644 --- a/src/cmd/8g/ggen.c +++ b/src/cmd/8g/ggen.c @@ -468,7 +468,7 @@ cgen_asop(Node *n) hard: if(nr->ullman > nl->ullman) { - regalloc(&n2, nr->type, N); + tempalloc(&n2, nr->type); cgen(nr, &n2); igen(nl, &n1, N); } else { diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c index 99c2b8af12..6e82890d33 100755 --- a/src/cmd/8g/gsubr.c +++ b/src/cmd/8g/gsubr.c @@ -571,6 +571,81 @@ optoas(int op, Type *t) return a; } +#define FCASE(a, b, c) (((a)<<16)|((b)<<8)|(c)) +int +foptoas(int op, Type *t, int flg) +{ + int et; + + et = t->etype; + + // clear Frev if unneeded + switch(op) { + case OADD: + case OMUL: + flg &= ~Frev; + break; + } + + switch(FCASE(op, et, flg)) { + case FCASE(OADD, TFLOAT32, 0): + return AFADDF; + case FCASE(OADD, TFLOAT64, 0): + return AFADDD; + case FCASE(OADD, TFLOAT64, Fpop): + return AFADDDP; + + case FCASE(OSUB, TFLOAT32, 0): + return AFSUBF; + case FCASE(OSUB, TFLOAT32, Frev): + return AFSUBRF; + + case FCASE(OSUB, TFLOAT64, 0): + return AFSUBD; + case FCASE(OSUB, TFLOAT64, Frev): + return AFSUBRD; + case FCASE(OSUB, TFLOAT64, Fpop): + return AFSUBDP; + case FCASE(OSUB, TFLOAT64, Fpop|Frev): + return AFSUBRDP; + + case FCASE(OMUL, TFLOAT32, 0): + return AFMULF; + case FCASE(OMUL, TFLOAT64, 0): + return AFMULD; + case FCASE(OMUL, TFLOAT64, Fpop): + return AFMULDP; + + case FCASE(ODIV, TFLOAT32, 0): + return AFDIVF; + case FCASE(ODIV, TFLOAT32, Frev): + return AFDIVRF; + + case FCASE(ODIV, TFLOAT64, 0): + return AFDIVD; + case FCASE(ODIV, TFLOAT64, Frev): + return AFDIVRD; + case FCASE(ODIV, TFLOAT64, Fpop): + return AFDIVDP; + case FCASE(ODIV, TFLOAT64, Fpop|Frev): + return AFDIVRDP; + + case FCASE(OCMP, TFLOAT32, 0): + return AFCOMF; + case FCASE(OCMP, TFLOAT32, Fpop): + return AFCOMFP; + case FCASE(OCMP, TFLOAT64, 0): + return AFCOMD; + case FCASE(OCMP, TFLOAT64, Fpop): + return AFCOMDP; + case FCASE(OCMP, TFLOAT64, Fpop2): + return AFCOMDPP; + } + + fatal("foptoas %O %T %#x", op, t, flg); + return 0; +} + static int resvd[] = { // D_DI, // for movstring @@ -600,6 +675,8 @@ ginit(void) reg[resvd[i]]++; } +ulong regpc[D_NONE]; + void gclean(void) { @@ -610,14 +687,12 @@ gclean(void) for(i=D_AX; i<=D_DI; i++) if(reg[i]) - yyerror("reg %R left allocated\n", i); + yyerror("reg %R left allocated at %lux\n", i, regpc[i]); for(i=D_F0; i<=D_F7; i++) if(reg[i]) yyerror("reg %R left allocated\n", i); } -ulong regpc[D_NONE]; - /* * allocate register of type t, leave in n. * if o != N, o is desired fixed register. @@ -681,7 +756,7 @@ err: out: if(reg[i] == 0) { - regpc[i] = getcallerpc(&n); + regpc[i] = (ulong)__builtin_return_address(0); if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) { dump("regalloc-o", o); fatal("regalloc %R", i); @@ -837,168 +912,445 @@ gconreg(int as, vlong c, int reg) gins(as, &n1, &n2); } + /* - * generate move: - * t = f - * f may be in memory, - * t is known to be a 32-bit register. + * Is this node a memory operand? */ -void -gload(Node *f, Node *t) +int +ismem(Node *n) { - int a, ft; - - ft = simtype[f->type->etype]; - - switch(ft) { - default: - fatal("gload %T", f->type); - case TINT8: - a = AMOVBLSX; - if(isconst(f, CTINT) || isconst(f, CTBOOL)) - a = AMOVL; - break; - case TBOOL: - case TUINT8: - a = AMOVBLZX; - if(isconst(f, CTINT) || isconst(f, CTBOOL)) - a = AMOVL; - break; - case TINT16: - a = AMOVWLSX; - if(isconst(f, CTINT) || isconst(f, CTBOOL)) - a = AMOVL; - break; - case TUINT16: - a = AMOVWLZX; - if(isconst(f, CTINT)) - a = AMOVL; - break; - case TINT32: - case TUINT32: - case TPTR32: - a = AMOVL; - break; - case TINT64: - case TUINT64: - a = AMOVL; // truncating - break; + switch(n->op) { + case OINDREG: + case ONAME: + case OPARAM: + return 1; } - - gins(a, f, t); + return 0; } +Node sclean[10]; +int nsclean; + /* - * generate move: - * t = f - * f is known to be a 32-bit register. - * t may be in memory. + * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. */ void -gstore(Node *f, Node *t) +split64(Node *n, Node *lo, Node *hi) { - int a, ft, tt; - Node nod, adr; + Node n1; + int64 i; - ft = simtype[f->type->etype]; - tt = simtype[t->type->etype]; + if(!is64(n->type)) + fatal("split64 %T", n->type); - switch(tt) { + sclean[nsclean].op = OEMPTY; + if(nsclean >= nelem(sclean)) + fatal("split64 clean"); + nsclean++; + switch(n->op) { default: - fatal("gstore %T", t->type); - case TINT8: - case TBOOL: - case TUINT8: - a = AMOVB; - break; - case TINT16: - case TUINT16: - a = AMOVW; - break; - case TINT32: - case TUINT32: - case TPTR32: - a = AMOVL; - break; - case TINT64: - case TUINT64: - if(t->op == OREGISTER) - fatal("gstore %T %O", t->type, t->op); - memset(&adr, 0, sizeof adr); - igen(t, &adr, N); - t = &adr; - t->xoffset += 4; - switch(ft) { - default: - fatal("gstore %T -> %T", f, t); - break; - case TINT32: - nodreg(&nod, types[TINT32], D_AX); - gins(AMOVL, f, &nod); - gins(ACDQ, N, N); - nodreg(&nod, types[TINT32], D_DX); - gins(AMOVL, &nod, t); - break; - case TUINT32: - gins(AMOVL, nodintconst(0), t); - break; + if(!dotaddable(n, &n1)) { + igen(n, &n1, N); + sclean[nsclean-1] = n1; } - t->xoffset -= 4; - a = AMOVL; - } + n = &n1; + // fall through + case ONAME: + case OINDREG: + *lo = *n; + *hi = *n; + lo->type = types[TUINT32]; + if(n->type->etype == TINT64) + hi->type = types[TINT32]; + else + hi->type = types[TUINT32]; + hi->xoffset += 4; + break; - gins(a, f, t); - if(t == &adr) - regfree(&adr); + case OLITERAL: + convconst(&n1, n->type, &n->val); + i = mpgetfix(n1.val.u.xval); + nodconst(lo, types[TUINT32], (uint32)i); + i >>= 32; + if(n->type->etype == TINT64) + nodconst(hi, types[TINT32], (int32)i); + else + nodconst(hi, types[TUINT32], (uint32)i); + break; + } +} + +void +splitclean(void) +{ + if(nsclean <= 0) + fatal("splitclean"); + nsclean--; + if(sclean[nsclean].op != OEMPTY) + regfree(&sclean[nsclean]); } void gmove(Node *f, Node *t) { - int ft, tt, t64, a; - Node nod; - - ft = simtype[f->type->etype]; - tt = simtype[t->type->etype]; - - a = AGOK; - - t64 = 0; - if(tt == TINT64 || tt == TUINT64 || tt == TPTR64) - t64 = 1; + int a, ft, tt; + Type *cvt; + Node r1, r2, flo, fhi, tlo, thi, con; if(debug['M']) - print("gop: %O %O[%E],%O[%E]\n", OAS, - f->op, ft, t->op, tt); - if(isfloat[ft] && f->op == OCONST) { - fatal("fp"); - /* TO DO: pick up special constants, possibly preloaded */ - //F - /* - if(mpgetflt(f->val.u.fval) == 0.0) { - regalloc(&nod, t->type, t); - gins(AXORPD, &nod, &nod); - gmove(&nod, t); - regfree(&nod); - return; + print("gmove %N -> %N\n", f, t); + + ft = simsimtype(f->type); + tt = simsimtype(t->type); + cvt = t->type; + + // cannot have two memory operands; + // except 64-bit, which always copies via registers anyway. + if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type)) + goto hard; + + // convert constant to desired type + if(f->op == OLITERAL) { + convconst(&con, t->type, &f->val); + f = &con; + ft = tt; // so big switch will choose a simple mov + + // some constants can't move directly to memory. + if(ismem(t)) { + // float constants come from memory. + if(isfloat[tt]) + goto hard; } + } + + // value -> value copy, only one memory operand. + // figure out the instruction to use. + // break out of switch for one-instruction gins. + // goto rdst for "destination must be register". + // goto hard for "convert to cvt type first". + // otherwise handle and return. + + switch(CASE(ft, tt)) { + default: + fatal("gmove %N -> %N", f, t); + + /* + * integer copy and truncate + */ + case CASE(TINT8, TINT8): // same size + case CASE(TINT8, TUINT8): + case CASE(TUINT8, TINT8): + case CASE(TUINT8, TUINT8): + case CASE(TINT16, TINT8): // truncate + case CASE(TUINT16, TINT8): + case CASE(TINT32, TINT8): + case CASE(TUINT32, TINT8): +// case CASE(TINT64, TINT8): +// case CASE(TUINT64, TINT8): + case CASE(TINT16, TUINT8): + case CASE(TUINT16, TUINT8): + case CASE(TINT32, TUINT8): + case CASE(TUINT32, TUINT8): +// case CASE(TINT64, TUINT8): +// case CASE(TUINT64, TUINT8): + a = AMOVB; + break; + + case CASE(TINT16, TINT16): // same size + case CASE(TINT16, TUINT16): + case CASE(TUINT16, TINT16): + case CASE(TUINT16, TUINT16): + case CASE(TINT32, TINT16): // truncate + case CASE(TUINT32, TINT16): +// case CASE(TINT64, TINT16): +// case CASE(TUINT64, TINT16): + case CASE(TINT32, TUINT16): + case CASE(TUINT32, TUINT16): +// case CASE(TINT64, TUINT16): +// case CASE(TUINT64, TUINT16): + a = AMOVW; + break; + + case CASE(TINT32, TINT32): // same size + case CASE(TINT32, TUINT32): + case CASE(TUINT32, TINT32): + case CASE(TUINT32, TUINT32): +// case CASE(TINT64, TINT32): // truncate +// case CASE(TUINT64, TINT32): +// case CASE(TINT64, TUINT32): +// case CASE(TUINT64, TUINT32): + a = AMOVL; + break; + + case CASE(TINT64, TINT64): // same size + case CASE(TINT64, TUINT64): + case CASE(TUINT64, TINT64): + case CASE(TUINT64, TUINT64): + split64(f, &flo, &fhi); + split64(t, &tlo, &thi); + if(f->op == OLITERAL) { + gins(AMOVL, &flo, &tlo); + gins(AMOVL, &fhi, &thi); + } else { + regalloc(&r1, types[TUINT32], N); + regalloc(&r2, types[TUINT32], N); + gins(AMOVL, &flo, &r1); + gins(AMOVL, &fhi, &r2); + gins(AMOVL, &r1, &tlo); + gins(AMOVL, &r2, &thi); + regfree(&r2); + regfree(&r1); + } + splitclean(); + splitclean(); + return; + + /* + * integer up-conversions + */ + case CASE(TINT8, TINT16): // sign extend int8 + case CASE(TINT8, TUINT16): + a = AMOVBWSX; + goto rdst; + case CASE(TINT8, TINT32): + case CASE(TINT8, TUINT32): + a = AMOVBLSX; + goto rdst; +// case CASE(TINT8, TINT64): +// case CASE(TINT8, TUINT64): +// a = AMOVBQSX; +// goto rdst; + + case CASE(TUINT8, TINT16): // zero extend uint8 + case CASE(TUINT8, TUINT16): + a = AMOVBWZX; + goto rdst; + case CASE(TUINT8, TINT32): + case CASE(TUINT8, TUINT32): + a = AMOVBLZX; + goto rdst; +// case CASE(TUINT8, TINT64): +// case CASE(TUINT8, TUINT64): +// a = AMOVBQZX; +// goto rdst; + + case CASE(TINT16, TINT32): // sign extend int16 + case CASE(TINT16, TUINT32): + a = AMOVWLSX; + goto rdst; +// case CASE(TINT16, TINT64): +// case CASE(TINT16, TUINT64): +// a = AMOVWQSX; +// goto rdst; + + case CASE(TUINT16, TINT32): // zero extend uint16 + case CASE(TUINT16, TUINT32): + a = AMOVWLZX; + goto rdst; +// case CASE(TUINT16, TINT64): +// case CASE(TUINT16, TUINT64): +// a = AMOVWQZX; +// goto rdst; + + case CASE(TINT32, TINT64): // sign extend int32 + case CASE(TINT32, TUINT64): + split64(t, &tlo, &thi); + nodreg(&flo, tlo.type, D_AX); + nodreg(&fhi, thi.type, D_DX); + gmove(f, &flo); + gins(ACDQ, N, N); + gins(AMOVL, &flo, &tlo); + gins(AMOVL, &fhi, &thi); + return; + +// case CASE(TUINT32, TINT64): // zero extend uint32 +// case CASE(TUINT32, TUINT64): +// // AMOVL into a register zeros the top of the register, +// // so this is not always necessary, but if we rely on AMOVL +// // the optimizer is almost certain to screw with us. +// a = AMOVLQZX; +// goto rdst; + + /* + * float to integer + * + case CASE(TFLOAT32, TINT16): + case CASE(TFLOAT32, TINT32): + case CASE(TFLOAT32, TINT64): + case CASE(TFLOAT64, TINT16): + case CASE(TFLOAT64, TINT32): + case CASE(TFLOAT64, TINT64): + if(ft == TFLOAT32) + gins(AFMOVF, f, &f0); + else + gins(AFMOVD, f, &f0); + if(tt == TINT16) + gins(AFMOVWP, &f0, t); + else if(tt == TINT32) + gins(AFMOVLP, &f0, t); + else + gins(AFMOVVP, &f0, t); + return; + + case CASE(TFLOAT32, TINT8): + case CASE(TFLOAT32, TUINT16): + case CASE(TFLOAT32, TUINT8): + case CASE(TFLOAT64, TINT8): + case CASE(TFLOAT64, TUINT16): + case CASE(TFLOAT64, TUINT8): + // convert via int32. + cvt = types[TINT32]; + goto hard; + + case CASE(TFLOAT32, TUINT32): + case CASE(TFLOAT64, TUINT32): + // could potentially convert via int64. + cvt = types[TINT64]; + goto hard; + + case CASE(TFLOAT32, TUINT64): + case CASE(TFLOAT64, TUINT64): + if(ft == TFLOAT32) + gins(AFMOVF, f, &f0); + else + gins(AFMOVD, f, &f0); + // algorithm is: + // if small enough, use native float64 -> int64 conversion. + // otherwise, subtract 2^63, convert, and add it back. + bignodes(); + regalloc(&r1, types[ft], N); + regalloc(&r2, types[ft], N); + gins(optoas(OCMP, f->type), &bigf, &r1); + p1 = gbranch(optoas(OLE, f->type), T); + gins(a, &r1, &r2); + p2 = gbranch(AJMP, T); + patch(p1, pc); + gins(optoas(OAS, f->type), &bigf, &r3); + gins(optoas(OSUB, f->type), &r3, &r1); + gins(a, &r1, &r2); + gins(AMOVQ, &bigi, &r4); + gins(AXORQ, &r4, &r2); + patch(p2, pc); + gmove(&r2, t); + regfree(&r4); + regfree(&r3); + regfree(&r2); + regfree(&r1); + fatal("lazy"); + return; + */ + /* + * integer to float + * + case CASE(TINT32, TFLOAT32): + a = ACVTSL2SS; + goto rdst; + + + case CASE(TINT32, TFLOAT64): + a = ACVTSL2SD; + goto rdst; + + case CASE(TINT64, TFLOAT32): + a = ACVTSQ2SS; + goto rdst; + + case CASE(TINT64, TFLOAT64): + a = ACVTSQ2SD; + goto rdst; + + case CASE(TINT16, TFLOAT32): + case CASE(TINT16, TFLOAT64): + case CASE(TINT8, TFLOAT32): + case CASE(TINT8, TFLOAT64): + case CASE(TUINT16, TFLOAT32): + case CASE(TUINT16, TFLOAT64): + case CASE(TUINT8, TFLOAT32): + case CASE(TUINT8, TFLOAT64): + // convert via int32 + cvt = types[TINT32]; + goto hard; + + case CASE(TUINT32, TFLOAT32): + case CASE(TUINT32, TFLOAT64): + // convert via int64. + cvt = types[TINT64]; + goto hard; + + case CASE(TUINT64, TFLOAT32): + case CASE(TUINT64, TFLOAT64): + // algorithm is: + // if small enough, use native int64 -> uint64 conversion. + // otherwise, halve (rounding to odd?), convert, and double. + a = ACVTSQ2SS; + if(tt == TFLOAT64) + a = ACVTSQ2SD; + nodconst(&zero, types[TUINT64], 0); + nodconst(&one, types[TUINT64], 1); + regalloc(&r1, f->type, f); + regalloc(&r2, t->type, t); + regalloc(&r3, f->type, N); + regalloc(&r4, f->type, N); + gmove(f, &r1); + gins(ACMPQ, &r1, &zero); + p1 = gbranch(AJLT, T); + gins(a, &r1, &r2); + p2 = gbranch(AJMP, T); + patch(p1, pc); + gmove(&r1, &r3); + gins(ASHRQ, &one, &r3); + gmove(&r1, &r4); + gins(AANDL, &one, &r4); + gins(AORQ, &r4, &r3); + gins(a, &r3, &r2); + gins(optoas(OADD, t->type), &r2, &r2); + patch(p2, pc); + gmove(&r2, t); + regfree(&r4); + regfree(&r3); + regfree(&r2); + regfree(&r1); + return; + */ + /* + * float to float + * + case CASE(TFLOAT32, TFLOAT32): + a = AMOVSS; + break; + + case CASE(TFLOAT64, TFLOAT64): + a = AMOVSD; + break; + + case CASE(TFLOAT32, TFLOAT64): + a = ACVTSS2SD; + goto rdst; + + case CASE(TFLOAT64, TFLOAT32): + a = ACVTSD2SS; + goto rdst; */ } - if(is64(types[ft]) && isconst(f, CTINT)) { - f->type = types[TINT32]; // XXX check constant value, choose correct type - ft = TINT32; - } + gins(a, f, t); + return; - if(is64(types[ft]) && is64(types[tt])) { - sgen(f, t, 8); - return; - } +rdst: + // requires register destination + regalloc(&r1, t->type, t); + gins(a, f, &r1); + gmove(&r1, t); + regfree(&r1); + return; - regalloc(&nod, types[TINT32], t); - gload(f, &nod); - gstore(&nod, t); - regfree(&nod); +hard: + // requires register intermediate + regalloc(&r1, cvt, t); + gmove(f, &r1); + gmove(&r1, t); + regfree(&r1); + return; } int @@ -1025,9 +1377,13 @@ gins(int as, Node *f, Node *t) { Prog *p; - // generating AMOVL BX, BX is just dumb. - if(f != N && t != N && samaddr(f, t) && as == AMOVL) - return nil; + switch(as) { + case AMOVB: + case AMOVW: + case AMOVL: + if(f != N && t != N && samaddr(f, t)) + return nil; + } p = prog(as); if(f != N) @@ -1173,6 +1529,25 @@ naddr(Node *n, Addr *a) } } +int +dotaddable(Node *n, Node *n1) +{ + int o, oary[10]; + Node *nn; + + if(n->op != ODOT) + return 0; + + o = dotoffset(n, oary, &nn); + if(nn != N && nn->addable && o == 1 && oary[0] >= 0) { + *n1 = *nn; + n1->type = n->type; + n1->xoffset += oary[0]; + return 1; + } + return 0; +} + void sudoclean(void) { diff --git a/src/cmd/gc/const.c b/src/cmd/gc/const.c index 72cf684adf..d672ec9f09 100644 --- a/src/cmd/gc/const.c +++ b/src/cmd/gc/const.c @@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val) con->val.u.xval = mal(sizeof *con->val.u.xval); switch(val->ctype) { default: - fatal("convconst ctype=%d %lT", val->ctype, t->type); + fatal("convconst ctype=%d %lT", val->ctype, t); case CTINT: i = mpgetfix(val->u.xval); break;