From f50e7b156e5bb038cd979da87b8df8eb19ed056f Mon Sep 17 00:00:00 2001 From: Kai Backman Date: Fri, 21 Aug 2009 16:29:19 -0700 Subject: [PATCH] mostly 64 bit support. - fixed a number of places where we tried to allocate 64bit regs. added honeypot in regalloc to catch these in the future. - implemented quad copying in sgen - cgen64.c, add, mul - clearfat done - D_REGREG output from 5g (linker already knew about them) - gmove done - optoas almost done, last bit probably not needed - D_SHIFT support in list.c R=rsc APPROVED=rsc DELTA=963 (711 added, 112 deleted, 140 changed) OCL=33619 CL=33688 --- src/cmd/5g/Makefile | 13 +- src/cmd/5g/cgen.c | 177 ++++++++++------ src/cmd/5g/cgen64.c | 500 ++++++++++++++++++++++++++++++++++++++++++++ src/cmd/5g/gg.h | 10 +- src/cmd/5g/ggen.c | 25 ++- src/cmd/5g/gobj.c | 4 + src/cmd/5g/gsubr.c | 357 ++++++++++++++++--------------- src/cmd/5g/list.c | 13 ++ 8 files changed, 849 insertions(+), 250 deletions(-) create mode 100644 src/cmd/5g/cgen64.c diff --git a/src/cmd/5g/Makefile b/src/cmd/5g/Makefile index 736d7c10cc..2d4831ba9e 100644 --- a/src/cmd/5g/Makefile +++ b/src/cmd/5g/Makefile @@ -14,13 +14,14 @@ HFILES=\ opt.h\ OFILES=\ - list.$O\ - gobj.$O\ - galign.$O\ - ggen.$O\ - cgen.$O\ - gsubr.$O\ ../5l/enam.$O\ + list.$O\ + galign.$O\ + gobj.$O\ + ggen.$O\ + gsubr.$O\ + cgen.$O\ + cgen64.$O LIB=\ ../gc/gc.a$O diff --git a/src/cmd/5g/cgen.c b/src/cmd/5g/cgen.c index c988882559..b58d491ac9 100644 --- a/src/cmd/5g/cgen.c +++ b/src/cmd/5g/cgen.c @@ -4,6 +4,35 @@ #include "gg.h" +void +mgen(Node *n, Node *n1, Node *rg) +{ + n1->ostk = 0; + n1->op = OEMPTY; + + if(n->addable) { + *n1 = *n; + n1->ostk = 0; + if(n1->op == OREGISTER || n1->op == OINDREG) + reg[n->val.u.reg]++; + return; + } + if(n->type->width > widthptr) + tempalloc(n1, n->type); + else + regalloc(n1, n->type, rg); + cgen(n, n1); +} + +void +mfree(Node *n) +{ + if(n->ostk) + tempfree(n); + else if(n->op == OREGISTER) + regfree(n); +} + /* * generate: * res = n; @@ -124,22 +153,42 @@ cgen(Node *n, Node *res) goto ret; } - a = optoas(OAS, n->type); - if(sudoaddable(a, n, &addr, &w)) { - if(res->op == OREGISTER) { - p1 = gins(a, N, res); - p1->from = addr; - p1->reg = w; - } else { - regalloc(&n2, n->type, N); - p1 = gins(a, N, &n2); - p1->from = addr; - p1->reg = w; - gins(a, &n2, res); - regfree(&n2); + // 64-bit ops are hard on 32-bit machine. + if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) { + print("64 bit op %O\n", n->op); + switch(n->op) { + // math goes to cgen64. + case OMINUS: + case OCOM: + case OADD: + case OSUB: + case OMUL: + case OLSH: + case ORSH: + case OAND: + case OOR: + case OXOR: + cgen64(n, res); + return; + } + } else { + a = optoas(OAS, n->type); + if(sudoaddable(a, n, &addr, &w)) { + if(res->op == OREGISTER) { + p1 = gins(a, N, res); + p1->from = addr; + p1->reg = w; + } else { + regalloc(&n2, n->type, N); + p1 = gins(a, N, &n2); + p1->from = addr; + p1->reg = w; + gins(a, &n2, res); + regfree(&n2); + } + sudoclean(); + goto ret; } - sudoclean(); - goto ret; } switch(n->op) { @@ -207,10 +256,13 @@ cgen(Node *n, Node *res) goto abop; case OCONV: - regalloc(&n1, nl->type, res); - cgen(nl, &n1); + if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { + cgen(nl, res); + break; + } + mgen(nl, &n1, res); gmove(&n1, res); - regfree(&n1); + mfree(&n1); break; case ODOT: @@ -358,11 +410,8 @@ agen(Node *n, Node *res) dump("\nagen-res", res); dump("agen-r", n); } - if(n == N || n->type == T) - return; - - if(!isptr[res->type->etype]) - fatal("agen: not tptr: %T", res->type); + if(n == N || n->type == T || res == N || res->type == T) + fatal("agen"); while(n->op == OCONVNOP) n = n->left; @@ -828,7 +877,7 @@ sgen(Node *n, Node *res, int32 w) { Node dst, src, tmp, nend; int32 c, q, odst, osrc; - Prog *p; + Prog *p, *ploop; if(debug['g']) { print("\nsgen w=%d\n", w); @@ -848,6 +897,9 @@ sgen(Node *n, Node *res, int32 w) osrc = stkof(n); odst = stkof(res); + if(osrc % 4 != 0 || odst %4 != 0) + fatal("sgen: non word(4) aligned offset src %d or dst %d", osrc, odst); + regalloc(&dst, types[tptr], N); regalloc(&src, types[tptr], N); regalloc(&tmp, types[TUINT32], N); @@ -866,44 +918,54 @@ sgen(Node *n, Node *res, int32 w) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if(osrc < odst && odst < osrc+w) { - fatal("sgen reverse copy not implemented"); -// // reverse direction -// gins(ASTD, N, N); // set direction flag -// if(c > 0) { -// gconreg(AADDQ, w-1, D_SI); -// gconreg(AADDQ, w-1, D_DI); + if(c != 0) + fatal("sgen: reverse character copy not implemented"); + if(q >= 4) { + regalloc(&nend, types[TUINT32], N); + // set up end marker to 4 bytes before source + p = gins(AMOVW, &src, &nend); + p->from.type = D_CONST; + p->from.offset = -4; -// gconreg(AMOVQ, c, D_CX); -// gins(AREP, N, N); // repeat -// gins(AMOVSB, N, N); // MOVB *(SI)-,*(DI)- -// } + // move src and dest to the end of block + p = gins(AMOVW, &src, &src); + p->from.type = D_CONST; + p->from.offset = (q-1)*4; -// if(q > 0) { -// if(c > 0) { -// gconreg(AADDQ, -7, D_SI); -// gconreg(AADDQ, -7, D_DI); -// } else { -// gconreg(AADDQ, w-8, D_SI); -// gconreg(AADDQ, w-8, D_DI); -// } -// gconreg(AMOVQ, q, D_CX); -// gins(AREP, N, N); // repeat -// gins(AMOVSQ, N, N); // MOVQ *(SI)-,*(DI)- -// } -// // we leave with the flag clear -// gins(ACLD, N, N); + p = gins(AMOVW, &dst, &dst); + p->from.type = D_CONST; + p->from.offset = (q-1)*4; + + p = gins(AMOVW, &src, &tmp); + p->from.type = D_OREG; + p->from.offset = -4; + p->scond |= C_PBIT; + ploop = p; + + p = gins(AMOVW, &tmp, &dst); + p->to.type = D_OREG; + p->to.offset = -4; + p->scond |= C_PBIT; + + gins(ACMP, &src, &nend); + + patch(gbranch(ABNE, T), ploop); + + regfree(&nend); + } } else { // normal direction if(q >= 4) { regalloc(&nend, types[TUINT32], N); p = gins(AMOVW, &src, &nend); p->from.type = D_CONST; - p->from.offset = q; + p->from.offset = q*4; p = gins(AMOVW, &src, &tmp); p->from.type = D_OREG; p->from.offset = 4; p->scond |= C_PBIT; + ploop = p; p = gins(AMOVW, &tmp, &dst); p->to.type = D_OREG; @@ -911,9 +973,9 @@ sgen(Node *n, Node *res, int32 w) p->scond |= C_PBIT; gins(ACMP, &src, &nend); - fatal("sgen loop not implemented"); - p = gins(ABNE, N, N); - // TODO(PC offset) + + patch(gbranch(ABNE, T), ploop); + regfree(&nend); } else while(q > 0) { @@ -931,16 +993,7 @@ sgen(Node *n, Node *res, int32 w) } if (c != 0) - fatal("sgen character copy not implemented"); -// if(c >= 4) { - -// gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+ -// c -= 4; -// } -// while(c > 0) { -// gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+ -// c--; -// } + fatal("sgen: character copy not implemented"); } regfree(&dst); regfree(&src); diff --git a/src/cmd/5g/cgen64.c b/src/cmd/5g/cgen64.c new file mode 100644 index 0000000000..9499742d37 --- /dev/null +++ b/src/cmd/5g/cgen64.c @@ -0,0 +1,500 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "gg.h" + +/* + * attempt to generate 64-bit + * res = n + * return 1 on success, 0 if op not handled. + */ +void +cgen64(Node *n, Node *res) +{ + Node t1, t2, *l, *r; + Node lo1, lo2, hi1, hi2; + Node al, ah, bl, bh, cl, ch; //, s1, s2; + Prog *p1; + //, *p2; +// uint64 v; +// uint32 lv, hv; + + if(res->op != OINDREG && res->op != ONAME) { + dump("n", n); + dump("res", res); + fatal("cgen64 %O of %O", n->op, res->op); + } + switch(n->op) { + default: + fatal("cgen64 %O", n->op); + +// case OMINUS: +// cgen(n->left, res); +// split64(res, &lo1, &hi1); +// gins(ANEGL, N, &lo1); +// gins(AADCL, ncon(0), &hi1); +// gins(ANEGL, N, &hi1); +// splitclean(); +// return; + +// case OCOM: +// cgen(n->left, res); +// split64(res, &lo1, &hi1); +// gins(ANOTL, N, &lo1); +// gins(ANOTL, N, &hi1); +// splitclean(); +// return; + + case OADD: + case OSUB: + case OMUL: + case OLSH: + case ORSH: + case OAND: + case OOR: + case OXOR: + // binary operators. + // common setup below. + break; + } + + l = n->left; + r = n->right; + if(!l->addable) { + tempalloc(&t1, l->type); + cgen(l, &t1); + l = &t1; + } + if(r != N && !r->addable) { + tempalloc(&t2, r->type); + cgen(r, &t2); + r = &t2; + } + + // Setup for binary operation. + split64(l, &lo1, &hi1); + if(is64(r->type)) + split64(r, &lo2, &hi2); + + regalloc(&al, lo1.type, N); + regalloc(&ah, hi1.type, N); + // Do op. Leave result in ah:al. + switch(n->op) { + default: + fatal("cgen64: not implemented: %N\n", n); + + case OADD: + // TODO: Constants + regalloc(&bl, types[TPTR32], N); + regalloc(&bh, types[TPTR32], N); + gins(AMOVW, &hi1, &ah); + gins(AMOVW, &lo1, &al); + gins(AMOVW, &hi2, &bh); + gins(AMOVW, &lo2, &bl); + gins(AADD, &bl, &al); + gins(AADC, &bh, &ah); + regfree(&bl); + regfree(&bh); + break; + +// case OSUB: +// // TODO: Constants. +// gins(AMOVL, &lo1, &ax); +// gins(AMOVL, &hi1, &dx); +// gins(ASUBL, &lo2, &ax); +// gins(ASBBL, &hi2, &dx); +// break; + + case OMUL: + // TODO(kaib): this can be done with 4 regs and does not need 6 + regalloc(&bh, types[TPTR32], N); + regalloc(&bl, types[TPTR32], N); + regalloc(&ch, types[TPTR32], N); + regalloc(&cl, types[TPTR32], N); + + // load args into bh:bl and bh:bl. + gins(AMOVW, &hi1, &bh); + gins(AMOVW, &lo1, &bl); + gins(AMOVW, &hi2, &ch); + gins(AMOVW, &lo2, &cl); + + // bl * cl + p1 = gins(AMULLU, N, N); + p1->from.type = D_REG; + p1->from.reg = bl.val.u.reg; + p1->reg = cl.val.u.reg; + p1->to.type = D_REGREG; + p1->to.reg = al.val.u.reg; + p1->to.offset = ah.val.u.reg; +//print("%P\n", p1); + + // bl * ch + p1 = gins(AMULALU, N, N); + p1->from.type = D_REG; + p1->from.reg = ah.val.u.reg; + p1->reg = bl.val.u.reg; + p1->to.type = D_REGREG; + p1->to.reg = ch.val.u.reg; + p1->to.offset = ah.val.u.reg; +//print("%P\n", p1); + + // bh * cl + p1 = gins(AMULALU, N, N); + p1->from.type = D_REG; + p1->from.reg = ah.val.u.reg; + p1->reg = bh.val.u.reg; + p1->to.type = D_REGREG; + p1->to.reg = cl.val.u.reg; + p1->to.offset = ah.val.u.reg; +//print("%P\n", p1); + + regfree(&bh); + regfree(&bl); + regfree(&ch); + regfree(&cl); + + break; + +// case OLSH: + // TODO(kaib): optimize for OLITERAL +// regalloc(&s1, types[TPTR32], N); +// regalloc(&s2, types[TPTR32], N); + +// gins(AMOVW, &lo1, &al); +// gins(AMOVW, &hi1, &ah); +// if(is64(r->type)) { +// gins(AMOVW, &lo2, &s1); +// gins(AMOVW, &hi2, &s2); +// p1 = gins(AOR, &s2, &s1); +// p1->from.type = D_SHIFT; +// p1->from.offset = 5 << 7 | s2.val.u.reg; // s2<<7 +// p1->from.reg = NREG; +// } else +// gins(AMOVW, r, &s1 +// p1 = gins(AMOVW, &s1, &s2); +// p1->from.offset = -32; + +// // MOVW ah<from.offset = ah.val.u.reg | 1<<4 | s1.val.u.reg <<8; + + // OR al<from.offset = al.val.u.reg | 1<<4 | s2.val.u.reg << 8; + + // MOVW al<from.offset = al.val.u.reg | 1<<4 | s1.val.u.reg <<8; + +// regfree(&s1); +// regfree(&s2); +// break; + +// case ORSH: +// if(r->op == OLITERAL) { +// fatal("cgen64 ORSH, OLITERAL not implemented"); +// v = mpgetfix(r->val.u.xval); +// if(v >= 64) { +// if(is64(r->type)) +// splitclean(); +// splitclean(); +// split64(res, &lo2, &hi2); +// if(hi1.type->etype == TINT32) { +// gmove(&hi1, &lo2); +// gins(ASARL, ncon(31), &lo2); +// gmove(&hi1, &hi2); +// gins(ASARL, ncon(31), &hi2); +// } else { +// gins(AMOVL, ncon(0), &lo2); +// gins(AMOVL, ncon(0), &hi2); +// } +// splitclean(); +// goto out; +// } +// if(v >= 32) { +// if(is64(r->type)) +// splitclean(); +// split64(res, &lo2, &hi2); +// gmove(&hi1, &lo2); +// if(v > 32) +// gins(optoas(ORSH, hi1.type), ncon(v-32), &lo2); +// if(hi1.type->etype == TINT32) { +// gmove(&hi1, &hi2); +// gins(ASARL, ncon(31), &hi2); +// } else +// gins(AMOVL, ncon(0), &hi2); +// splitclean(); +// splitclean(); +// goto out; +// } + +// // general shift +// gins(AMOVL, &lo1, &ax); +// gins(AMOVL, &hi1, &dx); +// p1 = gins(ASHRL, ncon(v), &ax); +// p1->from.index = D_DX; // double-width shift +// p1->from.scale = 0; +// gins(optoas(ORSH, hi1.type), ncon(v), &dx); +// break; +// } +// fatal("cgen64 ORSH, !OLITERAL not implemented"); + +// // load value into DX:AX. +// gins(AMOVL, &lo1, &ax); +// gins(AMOVL, &hi1, &dx); + +// // load shift value into register. +// // if high bits are set, zero value. +// p1 = P; +// if(is64(r->type)) { +// gins(ACMPL, &hi2, ncon(0)); +// p1 = gbranch(AJNE, T); +// gins(AMOVL, &lo2, &cx); +// } else +// gins(AMOVL, r, &cx); + +// // if shift count is >=64, zero or sign-extend value +// gins(ACMPL, &cx, ncon(64)); +// p2 = gbranch(optoas(OLT, types[TUINT32]), T); +// if(p1 != P) +// patch(p1, pc); +// if(hi1.type->etype == TINT32) { +// gins(ASARL, ncon(31), &dx); +// gins(AMOVL, &dx, &ax); +// } else { +// gins(AXORL, &dx, &dx); +// gins(AXORL, &ax, &ax); +// } +// patch(p2, pc); + +// // if shift count is >= 32, sign-extend hi. +// gins(ACMPL, &cx, ncon(32)); +// p1 = gbranch(optoas(OLT, types[TUINT32]), T); +// gins(AMOVL, &dx, &ax); +// if(hi1.type->etype == TINT32) { +// gins(ASARL, &cx, &ax); // SARL only uses bottom 5 bits of count +// gins(ASARL, ncon(31), &dx); +// } else { +// gins(ASHRL, &cx, &ax); +// gins(AXORL, &dx, &dx); +// } +// p2 = gbranch(AJMP, T); +// patch(p1, pc); + +// // general shift +// p1 = gins(ASHRL, &cx, &ax); +// p1->from.index = D_DX; // double-width shift +// p1->from.scale = 0; +// gins(optoas(ORSH, hi1.type), &cx, &dx); +// patch(p2, pc); +// break; + +// case OXOR: +// case OAND: +// case OOR: +// // make constant the right side (it usually is anyway). +// if(lo1.op == OLITERAL) { +// nswap(&lo1, &lo2); +// nswap(&hi1, &hi2); +// } +// if(lo2.op == OLITERAL) { +// // special cases for constants. +// lv = mpgetfix(lo2.val.u.xval); +// hv = mpgetfix(hi2.val.u.xval); +// splitclean(); // right side +// split64(res, &lo2, &hi2); +// switch(n->op) { +// case OXOR: +// gmove(&lo1, &lo2); +// gmove(&hi1, &hi2); +// switch(lv) { +// case 0: +// break; +// case 0xffffffffu: +// gins(ANOTL, N, &lo2); +// break; +// default: +// gins(AXORL, ncon(lv), &lo2); +// break; +// } +// switch(hv) { +// case 0: +// break; +// case 0xffffffffu: +// gins(ANOTL, N, &hi2); +// break; +// default: +// gins(AXORL, ncon(hv), &hi2); +// break; +// } +// break; + +// case OAND: +// switch(lv) { +// case 0: +// gins(AMOVL, ncon(0), &lo2); +// break; +// default: +// gmove(&lo1, &lo2); +// if(lv != 0xffffffffu) +// gins(AANDL, ncon(lv), &lo2); +// break; +// } +// switch(hv) { +// case 0: +// gins(AMOVL, ncon(0), &hi2); +// break; +// default: +// gmove(&hi1, &hi2); +// if(hv != 0xffffffffu) +// gins(AANDL, ncon(hv), &hi2); +// break; +// } +// break; + +// case OOR: +// switch(lv) { +// case 0: +// gmove(&lo1, &lo2); +// break; +// case 0xffffffffu: +// gins(AMOVL, ncon(0xffffffffu), &lo2); +// break; +// default: +// gmove(&lo1, &lo2); +// gins(AORL, ncon(lv), &lo2); +// break; +// } +// switch(hv) { +// case 0: +// gmove(&hi1, &hi2); +// break; +// case 0xffffffffu: +// gins(AMOVL, ncon(0xffffffffu), &hi2); +// break; +// default: +// gmove(&hi1, &hi2); +// gins(AORL, ncon(hv), &hi2); +// break; +// } +// break; +// } +// splitclean(); +// splitclean(); +// goto out; +// } +// gins(AMOVL, &lo1, &ax); +// gins(AMOVL, &hi1, &dx); +// gins(optoas(n->op, lo1.type), &lo2, &ax); +// gins(optoas(n->op, lo1.type), &hi2, &dx); +// break; + } + if(is64(r->type)) + splitclean(); + splitclean(); + + split64(res, &lo1, &hi1); + gins(AMOVW, &al, &lo1); + gins(AMOVW, &ah, &hi1); + splitclean(); + +//out: + if(r == &t2) + tempfree(&t2); + if(l == &t1) + tempfree(&t1); + regfree(&al); + regfree(&ah); +} + +/* + * generate comparison of nl, nr, both 64-bit. + * nl is memory; nr is constant or memory. + */ +void +cmp64(Node *nl, Node *nr, int op, Prog *to) +{ + fatal("cmp64 not implemented"); +// Node lo1, hi1, lo2, hi2, rr; +// Prog *br; +// Type *t; + +// split64(nl, &lo1, &hi1); +// split64(nr, &lo2, &hi2); + +// // compare most significant word; +// // if they differ, we're done. +// t = hi1.type; +// if(nl->op == OLITERAL || nr->op == OLITERAL) +// gins(ACMPL, &hi1, &hi2); +// else { +// regalloc(&rr, types[TINT32], N); +// gins(AMOVL, &hi1, &rr); +// gins(ACMPL, &rr, &hi2); +// regfree(&rr); +// } +// br = P; +// switch(op) { +// default: +// fatal("cmp64 %O %T", op, t); +// case OEQ: +// // cmp hi +// // jne L +// // cmp lo +// // jeq to +// // L: +// br = gbranch(AJNE, T); +// break; +// case ONE: +// // cmp hi +// // jne to +// // cmp lo +// // jne to +// patch(gbranch(AJNE, T), to); +// break; +// case OGE: +// case OGT: +// // cmp hi +// // jgt to +// // jlt L +// // cmp lo +// // jge to (or jgt to) +// // L: +// patch(gbranch(optoas(OGT, t), T), to); +// br = gbranch(optoas(OLT, t), T); +// break; +// case OLE: +// case OLT: +// // cmp hi +// // jlt to +// // jgt L +// // cmp lo +// // jle to (or jlt to) +// // L: +// patch(gbranch(optoas(OLT, t), T), to); +// br = gbranch(optoas(OGT, t), T); +// break; +// } + +// // compare least significant word +// t = lo1.type; +// if(nl->op == OLITERAL || nr->op == OLITERAL) +// gins(ACMPL, &lo1, &lo2); +// else { +// regalloc(&rr, types[TINT32], N); +// gins(AMOVL, &lo1, &rr); +// gins(ACMPL, &rr, &lo2); +// regfree(&rr); +// } + +// // jump again +// patch(gbranch(optoas(op, t), T), to); + +// // point first branch down here if appropriate +// if(br != P) +// patch(br, pc); + +// splitclean(); +// splitclean(); +} diff --git a/src/cmd/5g/gg.h b/src/cmd/5g/gg.h index e40be11af5..ded072f34c 100644 --- a/src/cmd/5g/gg.h +++ b/src/cmd/5g/gg.h @@ -99,6 +99,12 @@ void raddr(Node *n, Prog *p); void naddr(Node*, Addr*); void cgen_aret(Node*, Node*); +/* + * cgen64.c + */ +void cmp64(Node*, Node*, int, Prog*); +void cgen64(Node*, Node*); + /* * gsubr.c */ @@ -124,14 +130,16 @@ void tempfree(Node*); Node* nodarg(Type*, int); void nodreg(Node*, Type*, int); void nodindreg(Node*, Type*, int); -void gconreg(int, vlong, int); void buildtxt(void); Plist* newplist(void); int isfat(Type*); +int dotaddable(Node*, Node*); void sudoclean(void); int sudoaddable(int, Node*, Addr*, int*); void afunclit(Addr*); void datagostring(Strlit*, Addr*); +void split64(Node*, Node*, Node*); +void splitclean(void); /* * obj.c diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c index 6eeb17bfcd..4b4a5e6b6e 100644 --- a/src/cmd/5g/ggen.c +++ b/src/cmd/5g/ggen.c @@ -435,8 +435,8 @@ void clearfat(Node *nl) { uint32 w, c, q; - Node dst, nc, nz; - Prog *p; + Node dst, nc, nz, end; + Prog *p, *pl; /* clear a fat object */ if(debug['g']) @@ -453,10 +453,21 @@ clearfat(Node *nl) cgen(&nc, &nz); if(q >= 4) { - fatal("clearfat q >=4 not implemented"); -// gconreg(AMOVQ, q, D_CX); -// gins(AREP, N, N); // repeat -// gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ + regalloc(&end, types[tptr], N); + p = gins(AMOVW, &dst, &end); + p->from.type = D_CONST; + p->from.offset = q*4; + + p = gins(AMOVW, &nz, &dst); + p->to.type = D_OREG; + p->to.offset = 4; + p->scond |= C_PBIT; + pl = p; + + gins(ACMP, &dst, &end); + patch(gbranch(ABNE, T), pl); + + regfree(&end); } else while(q > 0) { p = gins(AMOVW, &nz, &dst); @@ -468,7 +479,7 @@ clearfat(Node *nl) } while(c > 0) { - gins(AMOVBU, &nz, &dst); + p = gins(AMOVBU, &nz, &dst); p->to.type = D_OREG; p->to.offset = 1; p->scond |= C_PBIT; diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c index 4606a0e380..3ab5dcae43 100644 --- a/src/cmd/5g/gobj.c +++ b/src/cmd/5g/gobj.c @@ -141,6 +141,10 @@ zaddr(Biobuf *b, Addr *a, int s) } break; + case D_REGREG: + Bputc(b, a->offset); + break; + case D_FCONST: fatal("zaddr D_FCONST not implemented"); //ieeedtod(&e, a->dval); diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c index 3587703160..49997640d0 100644 --- a/src/cmd/5g/gsubr.c +++ b/src/cmd/5g/gsubr.c @@ -206,6 +206,8 @@ regalloc(Node *n, Type *t, Node *o) if(t == T) fatal("regalloc: t nil"); et = simtype[t->etype]; + if(is64(t)) + fatal("regalloc: 64 bit type %T"); switch(et) { case TINT8: @@ -214,10 +216,7 @@ regalloc(Node *n, Type *t, Node *o) case TUINT16: case TINT32: case TUINT32: - case TINT64: - case TUINT64: case TPTR32: - case TPTR64: case TBOOL: if(o != N && o->op == OREGISTER) { i = o->val.u.reg; @@ -385,17 +384,18 @@ fp: } /* - * generate - * as $c, reg + * return constant i node. + * overwritten by next call, but useful in calls to gins. */ -void -gconreg(int as, vlong c, int reg) +Node* +ncon(uint32 i) { - Node n1, n2; + static Node n; - nodconst(&n1, types[TINT32], c); - nodreg(&n2, types[TINT32], reg); - gins(as, &n1, &n2); + if(n.type == T) + nodconst(&n, types[TUINT32], 0); + mpmovecfix(n.val.u.xval, i); + return &n; } /* @@ -413,6 +413,76 @@ ismem(Node *n) return 0; } +Node sclean[10]; +int nsclean; + +/* + * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. + */ +void +split64(Node *n, Node *lo, Node *hi) +{ + Node n1; + int64 i; + + if(!is64(n->type)) + fatal("split64 %T", n->type); + + sclean[nsclean].op = OEMPTY; + if(nsclean >= nelem(sclean)) + fatal("split64 clean"); + nsclean++; + switch(n->op) { + default: + if(!dotaddable(n, &n1)) { + igen(n, &n1, N); + sclean[nsclean-1] = n1; + } + n = &n1; + goto common; + case ONAME: + if(n->class == PPARAMREF) { + cgen(n->heapaddr, &n1); + sclean[nsclean-1] = n1; + // fall through. + n = &n1; + } + goto common; + case OINDREG: + common: + *lo = *n; + *hi = *n; + lo->type = types[TUINT32]; + if(n->type->etype == TINT64) + hi->type = types[TINT32]; + else + hi->type = types[TUINT32]; + hi->xoffset += 4; + break; + + case OLITERAL: + convconst(&n1, n->type, &n->val); + i = mpgetfix(n1.val.u.xval); + nodconst(lo, types[TUINT32], (uint32)i); + i >>= 32; + if(n->type->etype == TINT64) + nodconst(hi, types[TINT32], (int32)i); + else + nodconst(hi, types[TUINT32], (uint32)i); + break; + } +} + +void +splitclean(void) +{ + if(nsclean <= 0) + fatal("splitclean"); + nsclean--; + if(sclean[nsclean].op != OEMPTY) + regfree(&sclean[nsclean]); +} + #define CASE(a,b) (((a)<<16)|((b)<<0)) void @@ -420,9 +490,8 @@ gmove(Node *f, Node *t) { int a, ft, tt; Type *cvt; - Node r1, con; -// Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx; -// Prog *p1, *p2, *p3; + Node r1, r2, flo, fhi, tlo, thi, con; + Prog *p1; if(debug['M']) print("gmove %N -> %N\n", f, t); @@ -433,9 +502,8 @@ gmove(Node *f, Node *t) // cannot have two integer memory operands; // except 64-bit, which always copies via registers anyway. - // TODO(kaib): re-enable check -// if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t)) -// goto hard; + if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t)) + goto hard; // convert constant to desired type if(f->op == OLITERAL) { @@ -471,7 +539,7 @@ gmove(Node *f, Node *t) ft = simsimtype(con.type); // constants can't move directly to memory - if(ismem(t)) goto hard; + if(ismem(t) && !is64(t->type)) goto hard; } // value -> value copy, only one memory operand. @@ -508,15 +576,13 @@ gmove(Node *f, Node *t) case CASE(TINT64, TINT8): // truncate low word case CASE(TUINT64, TINT8): + a = AMOVB; + goto trunc64; + case CASE(TINT64, TUINT8): case CASE(TUINT64, TUINT8): - fatal("gmove INT64,INT8 not implemented"); -// split64(f, &flo, &fhi); -// nodreg(&r1, t->type, D_AX); -// gins(AMOVB, &flo, &r1); -// gins(AMOVB, &r1, t); -// splitclean(); - return; + a = AMOVBU; + goto trunc64; case CASE(TINT16, TINT16): // same size case CASE(TUINT16, TINT16): @@ -534,15 +600,13 @@ gmove(Node *f, Node *t) case CASE(TINT64, TINT16): // truncate low word case CASE(TUINT64, TINT16): + a = AMOVH; + goto trunc64; + case CASE(TINT64, TUINT16): case CASE(TUINT64, TUINT16): - fatal("gmove INT64,INT16 not implemented"); -// split64(f, &flo, &fhi); -// nodreg(&r1, t->type, D_AX); -// gins(AMOVW, &flo, &r1); -// gins(AMOVW, &r1, t); -// splitclean(); - return; + a = AMOVHU; + goto trunc64; case CASE(TINT32, TINT32): // same size case CASE(TINT32, TUINT32): @@ -555,34 +619,35 @@ gmove(Node *f, Node *t) case CASE(TUINT64, TINT32): case CASE(TINT64, TUINT32): case CASE(TUINT64, TUINT32): - fatal("gmove INT64,INT32 not implemented"); -// split64(f, &flo, &fhi); -// nodreg(&r1, t->type, D_AX); -// gins(AMOVL, &flo, &r1); -// gins(AMOVL, &r1, t); -// splitclean(); + split64(f, &flo, &fhi); + regalloc(&r1, t->type, N); + gins(AMOVW, &flo, &r1); + gins(AMOVW, &r1, t); + regfree(&r1); + splitclean(); return; case CASE(TINT64, TINT64): // same size case CASE(TINT64, TUINT64): case CASE(TUINT64, TINT64): case CASE(TUINT64, TUINT64): - fatal("gmove INT64,INT64 not implemented"); -// split64(f, &flo, &fhi); -// split64(t, &tlo, &thi); -// if(f->op == OLITERAL) { -// gins(AMOVL, &flo, &tlo); -// gins(AMOVL, &fhi, &thi); -// } else { -// nodreg(&r1, t->type, D_AX); -// nodreg(&r2, t->type, D_DX); -// gins(AMOVL, &flo, &r1); -// gins(AMOVL, &fhi, &r2); -// gins(AMOVL, &r1, &tlo); -// gins(AMOVL, &r2, &thi); -// } -// splitclean(); -// splitclean(); + split64(f, &flo, &fhi); + split64(t, &tlo, &thi); + if(f->op == OLITERAL) { + gins(AMOVW, &flo, &tlo); + gins(AMOVW, &fhi, &thi); + } else { + regalloc(&r1, flo.type, N); + regalloc(&r2, fhi.type, N); + gins(AMOVW, &flo, &r1); + gins(AMOVW, &fhi, &r2); + gins(AMOVW, &r1, &tlo); + gins(AMOVW, &r2, &thi); + regfree(&r1); + regfree(&r2); + } + splitclean(); + splitclean(); return; /* @@ -594,10 +659,10 @@ gmove(Node *f, Node *t) case CASE(TINT8, TUINT32): a = AMOVB; goto rdst; -// case CASE(TINT8, TINT64): // convert via int32 -// case CASE(TINT8, TUINT64): -// cvt = types[TINT32]; -// goto hard; + case CASE(TINT8, TINT64): // convert via int32 + case CASE(TINT8, TUINT64): + cvt = types[TINT32]; + goto hard; case CASE(TUINT8, TINT16): // zero extend uint8 case CASE(TUINT8, TUINT16): @@ -605,49 +670,53 @@ gmove(Node *f, Node *t) case CASE(TUINT8, TUINT32): a = AMOVBU; goto rdst; -// case CASE(TUINT8, TINT64): // convert via uint32 -// case CASE(TUINT8, TUINT64): -// cvt = types[TUINT32]; -// goto hard; + case CASE(TUINT8, TINT64): // convert via uint32 + case CASE(TUINT8, TUINT64): + cvt = types[TUINT32]; + goto hard; case CASE(TINT16, TINT32): // sign extend int16 case CASE(TINT16, TUINT32): a = AMOVH; goto rdst; -// case CASE(TINT16, TINT64): // convert via int32 -// case CASE(TINT16, TUINT64): -// cvt = types[TINT32]; -// goto hard; + case CASE(TINT16, TINT64): // convert via int32 + case CASE(TINT16, TUINT64): + cvt = types[TINT32]; + goto hard; case CASE(TUINT16, TINT32): // zero extend uint16 case CASE(TUINT16, TUINT32): a = AMOVHU; goto rdst; -// case CASE(TUINT16, TINT64): // convert via uint32 -// case CASE(TUINT16, TUINT64): -// cvt = types[TUINT32]; -// goto hard; + case CASE(TUINT16, TINT64): // convert via uint32 + case CASE(TUINT16, TUINT64): + cvt = types[TUINT32]; + goto hard; case CASE(TINT32, TINT64): // sign extend int32 case CASE(TINT32, TUINT64): - fatal("gmove TINT32,INT64 not implemented"); -// split64(t, &tlo, &thi); -// nodreg(&flo, tlo.type, D_AX); -// nodreg(&fhi, thi.type, D_DX); -// gmove(f, &flo); -// gins(ACDQ, N, N); -// gins(AMOVL, &flo, &tlo); -// gins(AMOVL, &fhi, &thi); -// splitclean(); + split64(t, &tlo, &thi); + regalloc(&r1, tlo.type, N); + regalloc(&r2, thi.type, N); + gmove(f, &r1); + p1 = gins(AMOVW, &r1, &r2); + p1->from.type = D_SHIFT; + p1->from.offset = 2 << 5 | 31 << 7 | r1.val.u.reg; // r1->31 + p1->from.reg = NREG; +//print("gmove: %P\n", p1); + gins(AMOVW, &r1, &tlo); + gins(AMOVW, &r2, &thi); + regfree(&r1); + regfree(&r2); + splitclean(); return; case CASE(TUINT32, TINT64): // zero extend uint32 case CASE(TUINT32, TUINT64): - fatal("gmove TUINT32,INT64 not implemented"); -// split64(t, &tlo, &thi); -// gmove(f, &tlo); -// gins(AMOVL, ncon(0), &thi); -// splitclean(); + split64(t, &tlo, &thi); + gmove(f, &tlo); + gins(AMOVW, ncon(0), &thi); + splitclean(); return; /* @@ -742,18 +811,20 @@ rdst: hard: // requires register intermediate - regalloc(&r1, cvt, t); + regalloc(&r1, cvt, T); gmove(f, &r1); gmove(&r1, t); regfree(&r1); return; -//hardmem: - // requires memory intermediate - tempalloc(&r1, cvt); - gmove(f, &r1); - gmove(&r1, t); - tempfree(&r1); +trunc64: + // truncate 64 bit integer + split64(f, &flo, &fhi); + regalloc(&r1, t->type, N); + gins(a, &flo, &r1); + gins(a, &r1, t); + regfree(&r1); + splitclean(); return; fatal: @@ -1118,26 +1189,14 @@ optoas(int op, Type *t) case CASE(OCMP, TBOOL): case CASE(OCMP, TINT8): case CASE(OCMP, TUINT8): - a = ACMP; - break; - -// case CASE(OCMP, TINT16): -// case CASE(OCMP, TUINT16): -// a = ACMPW; -// break; - + case CASE(OCMP, TINT16): + case CASE(OCMP, TUINT16): case CASE(OCMP, TINT32): case CASE(OCMP, TUINT32): case CASE(OCMP, TPTR32): a = ACMP; break; -// case CASE(OCMP, TINT64): -// case CASE(OCMP, TUINT64): -// case CASE(OCMP, TPTR64): -// a = ACMPQ; -// break; - case CASE(OCMP, TFLOAT32): a = ACMPF; break; @@ -1163,12 +1222,6 @@ optoas(int op, Type *t) a = AMOVW; break; -// case CASE(OAS, TINT64): -// case CASE(OAS, TUINT64): -// case CASE(OAS, TPTR64): -// a = AMOVQ; -// break; - case CASE(OAS, TFLOAT32): a = AMOVF; break; @@ -1187,12 +1240,6 @@ optoas(int op, Type *t) a = AADD; break; -// case CASE(OADD, TINT64): -// case CASE(OADD, TUINT64): -// case CASE(OADD, TPTR64): -// a = AADDQ; -// break; - case CASE(OADD, TFLOAT32): a = AADDF; break; @@ -1211,12 +1258,6 @@ optoas(int op, Type *t) a = ASUB; break; -// case CASE(OSUB, TINT64): -// case CASE(OSUB, TUINT64): -// case CASE(OSUB, TPTR64): -// a = ASUBQ; -// break; - case CASE(OSUB, TFLOAT32): a = ASUBF; break; @@ -1235,12 +1276,6 @@ optoas(int op, Type *t) a = AAND; break; -// case CASE(OAND, TINT64): -// case CASE(OAND, TUINT64): -// case CASE(OAND, TPTR64): -// a = AANDQ; -// break; - case CASE(OOR, TINT8): case CASE(OOR, TUINT8): case CASE(OOR, TINT16): @@ -1251,12 +1286,6 @@ optoas(int op, Type *t) a = AORR; break; -// case CASE(OOR, TINT64): -// case CASE(OOR, TUINT64): -// case CASE(OOR, TPTR64): -// a = AORQ; -// break; - case CASE(OXOR, TINT8): case CASE(OXOR, TUINT8): case CASE(OXOR, TINT16): @@ -1267,12 +1296,6 @@ optoas(int op, Type *t) a = AEOR; break; -// case CASE(OXOR, TINT64): -// case CASE(OXOR, TUINT64): -// case CASE(OXOR, TPTR64): -// a = AXORQ; -// break; - case CASE(OLSH, TINT8): case CASE(OLSH, TUINT8): case CASE(OLSH, TINT16): @@ -1283,12 +1306,6 @@ optoas(int op, Type *t) a = ASLL; break; -// case CASE(OLSH, TINT64): -// case CASE(OLSH, TUINT64): -// case CASE(OLSH, TPTR64): -// a = ASHLQ; -// break; - case CASE(ORSH, TUINT8): case CASE(ORSH, TUINT16): case CASE(ORSH, TUINT32): @@ -1296,21 +1313,12 @@ optoas(int op, Type *t) a = ASRL; break; -// case CASE(ORSH, TUINT64): -// case CASE(ORSH, TPTR64): -// a = ASHRQ; -// break; - case CASE(ORSH, TINT8): case CASE(ORSH, TINT16): case CASE(ORSH, TINT32): a = ASRA; break; -// case CASE(ORSH, TINT64): -// a = ASARQ; -// break; - case CASE(OMUL, TUINT8): case CASE(OMUL, TUINT16): case CASE(OMUL, TUINT32): @@ -1324,12 +1332,6 @@ optoas(int op, Type *t) a = AMUL; break; -// case CASE(OMUL, TINT64): -// case CASE(OMUL, TUINT64): -// case CASE(OMUL, TPTR64): -// a = AIMULQ; -// break; - case CASE(OMUL, TFLOAT32): a = AMULF; break; @@ -1364,18 +1366,6 @@ optoas(int op, Type *t) a = AMOD; break; -// case CASE(ODIV, TINT64): -// case CASE(OMOD, TINT64): -// a = AIDIVQ; -// break; - -// case CASE(ODIV, TUINT64): -// case CASE(ODIV, TPTR64): -// case CASE(OMOD, TUINT64): -// case CASE(OMOD, TPTR64): -// a = ADIVQ; -// break; - // case CASE(OEXTEND, TINT16): // a = ACWD; // break; @@ -1419,6 +1409,25 @@ sudoclean(void) cleani -= 2; } +int +dotaddable(Node *n, Node *n1) +{ + int o, oary[10]; + Node *nn; + + if(n->op != ODOT) + return 0; + + o = dotoffset(n, oary, &nn); + if(nn != N && nn->addable && o == 1 && oary[0] >= 0) { + *n1 = *nn; + n1->type = n->type; + n1->xoffset += oary[0]; + return 1; + } + return 0; +} + /* * generate code to compute address of n, * a reference to a (perhaps nested) field inside diff --git a/src/cmd/5g/list.c b/src/cmd/5g/list.c index d1437733d2..a7dde13d49 100644 --- a/src/cmd/5g/list.c +++ b/src/cmd/5g/list.c @@ -77,8 +77,10 @@ int Dconv(Fmt *fp) { char str[100]; //, s[100]; + char *op; Addr *a; int i; + int32 v; // uint32 d1, d2; a = va_arg(fp->args, Addr*); @@ -111,6 +113,17 @@ Dconv(Fmt *fp) sprint(str, "$%d-%d", a->offset, a->offset2); break; + case D_SHIFT: + v = a->offset; + op = "<<>>->@>" + (((v>>5) & 3) << 1); + if(v & (1<<4)) + sprint(str, "R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15); + else + sprint(str, "R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31); + if(a->reg != NREG) + sprint(str+strlen(str), "(R%d)", a->reg); + break; + case D_FCONST: snprint(str, sizeof(str), "$(%.17e)", a->dval); break;