// Inferno utils/5c/reg.c // http://code.google.com/p/inferno-os/source/browse/utils/5g/reg.c // // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) // Portions Copyright © 1997-1999 Vita Nuova Limited // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) // Portions Copyright © 2004,2006 Bruce Ellis // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others // Portions Copyright © 2009 The Go Authors. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include "gg.h" #include "opt.h" #define NREGVAR 24 #define REGBITS ((uint32)0xffffff) #define P2R(p) (Reg*)(p->reg) void addsplits(void); int noreturn(Prog *p); static int first = 0; static void fixjmp(Prog*); Reg* rega(void) { Reg *r; r = freer; if(r == R) { r = mal(sizeof(*r)); } else freer = r->link; *r = zreg; return r; } int rcmp(const void *a1, const void *a2) { Rgn *p1, *p2; int c1, c2; p1 = (Rgn*)a1; p2 = (Rgn*)a2; c1 = p2->cost; c2 = p1->cost; if(c1 -= c2) return c1; return p2->varno - p1->varno; } static void setoutvar(void) { Type *t; Node *n; Addr a; Iter save; Bits bit; int z; t = structfirst(&save, getoutarg(curfn->type)); while(t != T) { n = nodarg(t, 1); a = zprog.from; naddr(n, &a, 0); bit = mkvar(R, &a); for(z=0; zprog; p->as = ANOP; p->scond = zprog.scond; p->from = zprog.from; p->to = zprog.to; p->reg = zprog.reg; } static void setaddrs(Bits bit) { int i, n; Var *v; Node *node; while(bany(&bit)) { // convert each bit to a variable i = bnum(bit); node = var[i].node; n = var[i].name; bit.b[i/32] &= ~(1L<<(i%32)); // disable all pieces of that variable for(i=0; inode == node && v->name == n) v->addr = 2; } } } static char* regname[] = { ".R0", ".R1", ".R2", ".R3", ".R4", ".R5", ".R6", ".R7", ".R8", ".R9", ".R10", ".R11", ".R12", ".R13", ".R14", ".R15", ".F0", ".F1", ".F2", ".F3", ".F4", ".F5", ".F6", ".F7", }; void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; int i, z, nr; uint32 vreg; Bits bit; if(first == 0) { fmtinstall('Q', Qconv); } fixjmp(firstp); first++; if(debug['K']) { if(first != 13) return; // debug['R'] = 2; // debug['P'] = 2; print("optimizing %S\n", curfn->nname->sym); } // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) nr++; // if too big dont bother if(nr >= 10000) { // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); return; } r1 = R; firstr = R; lastr = R; /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; ilink) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); nr++; if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; p->regp = r; r1 = r->p1; if(r1 != R) { switch(r1->prog->as) { case ARET: case AB: case ARFE: r->p1 = R; r1->s1 = R; } } /* * left side always read */ bit = mkvar(r, &p->from); for(z=0; zuse1.b[z] |= bit.b[z]; /* * middle always read when present */ if(p->reg != NREG) { if(p->from.type != D_FREG) r->use1.b[0] |= RtoB(p->reg); else r->use1.b[0] |= FtoB(p->reg); } /* * right side depends on opcode */ bit = mkvar(r, &p->to); if(bany(&bit)) switch(p->as) { default: yyerror("reg: unknown op: %A", p->as); break; /* * right side read */ case ATST: case ATEQ: case ACMP: case ACMN: case ACMPD: case ACMPF: rightread: for(z=0; zuse2.b[z] |= bit.b[z]; break; /* * right side read or read+write, depending on middle * ADD x, z => z += x * ADD x, y, z => z = x + y */ case AADD: case AAND: case AEOR: case ASUB: case ARSB: case AADC: case ASBC: case ARSC: case AORR: case ABIC: case ASLL: case ASRL: case ASRA: case AMUL: case AMULU: case ADIV: case AMOD: case AMODU: case ADIVU: if(p->reg != NREG) goto rightread; // fall through /* * right side read+write */ case AADDF: case AADDD: case ASUBF: case ASUBD: case AMULF: case AMULD: case ADIVF: case ADIVD: case AMULA: case AMULAL: case AMULALU: for(z=0; zuse2.b[z] |= bit.b[z]; r->set.b[z] |= bit.b[z]; } break; /* * right side write */ case ANOP: case AMOVB: case AMOVBU: case AMOVD: case AMOVDF: case AMOVDW: case AMOVF: case AMOVFW: case AMOVH: case AMOVHU: case AMOVW: case AMOVWD: case AMOVWF: case AMVN: case AMULL: case AMULLU: if((p->scond & C_SCOND) != C_SCOND_NONE) for(z=0; zuse2.b[z] |= bit.b[z]; for(z=0; zset.b[z] |= bit.b[z]; break; /* * funny */ case ABL: setaddrs(bit); break; } if(p->as == AMOVM) { z = p->to.offset; if(p->from.type == D_CONST) z = p->from.offset; for(i=0; z; i++) { if(z&1) regbits |= RtoB(i); z >>= 1; } } } if(firstr == R) return; for(i=0; iaddr) { bit = blsh(i); for(z=0; zaddr, v->etype, v->width, v->sym, v->offset); } /* * pass 2 * turn branch references to pointers * build back pointers */ for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->to.type == D_BRANCH) { if(p->to.branch == P) fatal("pnil %P", p); r1 = p->to.branch->regp; if(r1 == R) fatal("rnil %P", p); if(r1 == r) { //fatal("ref to self %P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R']) { p = firstr->prog; print("\n%L %D\n", p->lineno, &p->from); print(" addr = %Q\n", addrs); } /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, nr); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; addsplits(); if(debug['R'] > 1) { print("\nprop structure:\n"); for(r = firstr; r != R; r = r->link) { print("%d:%P", r->loop, r->prog); for(z=0; zset.b[z] | r->refahead.b[z] | r->calahead.b[z] | r->refbehind.b[z] | r->calbehind.b[z] | r->use1.b[z] | r->use2.b[z]; bit.b[z] &= ~addrs.b[z]; } if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%Q", r->use1); if(bany(&r->use2)) print(" u2=%Q", r->use2); if(bany(&r->set)) print(" st=%Q", r->set); if(bany(&r->refahead)) print(" ra=%Q", r->refahead); if(bany(&r->calahead)) print(" ca=%Q", r->calahead); if(bany(&r->refbehind)) print(" rb=%Q", r->refbehind); if(bany(&r->calbehind)) print(" cb=%Q", r->calbehind); } print("\n"); } } /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = r->link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; zrefahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) & !r->refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->prog->lineno, bit); r->refset = 1; } } for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { for(z=0; zset.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->prog->lineno, bit); r->refset = 1; excise(r); } for(z=0; zact.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] > 1) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", r->prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] > 1) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; ivarno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L $%d F%d: %Q\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L $%d R%d: %Q\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(); } /* * last pass * eliminate nops * free aux structures * adjust the stack pointer * MOVW.W R1,-12(R13) <<- start * MOVW R0,R1 * MOVW R1,8(R13) * MOVW $0,R1 * MOVW R1,4(R13) * BL ,runtime.newproc+0(SB) * MOVW &ft+-32(SP),R7 <<- adjust * MOVW &j+-40(SP),R6 <<- adjust * MOVW autotmp_0003+-24(SP),R5 <<- adjust * MOVW $12(R13),R13 <<- finish */ vreg = 0; for(p = firstp; p != P; p = p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.branch != P && p->to.branch->as == ANOP) p->to.branch = p->to.branch->link; if(p->as == AMOVW && p->to.reg == 13) { if(p->scond & C_WBIT) { vreg = -p->to.offset; // in adjust region // print("%P adjusting %d\n", p, vreg); continue; } if(p->from.type == D_CONST && p->to.type == D_REG) { if(p->from.offset != vreg) print("in and out different\n"); // print("%P finish %d\n", p, vreg); vreg = 0; // done adjust region continue; } // print("%P %d %d from type\n", p, p->from.type, D_CONST); // print("%P %d %d to type\n\n", p, p->to.type, D_REG); } if(p->as == AMOVW && vreg != 0) { if(p->from.sym != S) if(p->from.name == D_AUTO || p->from.name == D_PARAM) { p->from.offset += vreg; // print("%P adjusting from %d %d\n", p, vreg, p->from.type); } if(p->to.sym != S) if(p->to.name == D_AUTO || p->to.name == D_PARAM) { p->to.offset += vreg; // print("%P adjusting to %d %d\n", p, vreg, p->from.type); } } } if(r1 != R) { r1->link = freer; freer = firstr; } } void addsplits(void) { Reg *r, *r1; int z, i; Bits bit; for(r = firstr; r != R; r = r->link) { if(r->loop > 1) continue; if(r->prog->as == ABL) continue; for(r1 = r->p2; r1 != R; r1 = r1->p2link) { if(r1->loop <= 1) continue; for(z=0; zcalbehind.b[z] & (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & ~(r->calahead.b[z] & addrs.b[z]); while(bany(&bit)) { i = bnum(bit); bit.b[i/32] &= ~(1L << (i%32)); } } } } /* * add mov b,rn * just after r */ void addmove(Reg *r, int bn, int rn, int f) { Prog *p, *p1, *p2; Adr *a; Var *v; p1 = mal(sizeof(*p1)); *p1 = zprog; p = r->prog; // If there's a stack fixup coming (after BL newproc or BL deferproc), // delay the load until after the fixup. p2 = p->link; if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG) p = p2; p1->link = p->link; p->link = p1; p1->lineno = p->lineno; v = var + bn; a = &p1->to; a->name = v->name; a->node = v->node; a->sym = v->node->sym; a->offset = v->offset; a->etype = v->etype; a->type = D_OREG; if(a->etype == TARRAY || a->sym == S) a->type = D_CONST; if(v->addr) fatal("addmove: shouldnt be doing this %A\n", a); switch(v->etype) { default: print("What is this %E\n", v->etype); case TINT8: p1->as = AMOVB; break; case TBOOL: case TUINT8: //print("movbu %E %d %S\n", v->etype, bn, v->sym); p1->as = AMOVBU; break; case TINT16: p1->as = AMOVH; break; case TUINT16: p1->as = AMOVHU; break; case TINT32: case TUINT32: case TPTR32: p1->as = AMOVW; break; case TFLOAT32: p1->as = AMOVF; break; case TFLOAT64: p1->as = AMOVD; break; } p1->from.type = D_REG; p1->from.reg = rn; if(rn >= NREG) { p1->from.type = D_FREG; p1->from.reg = rn-NREG; } if(!f) { p1->from = *a; *a = zprog.from; a->type = D_REG; a->reg = rn; if(rn >= NREG) { a->type = D_FREG; a->reg = rn-NREG; } if(v->etype == TUINT8 || v->etype == TBOOL) p1->as = AMOVBU; if(v->etype == TUINT16) p1->as = AMOVHU; } if(debug['R']) print("%P\t.a%P\n", p, p1); } static int overlap(int32 o1, int w1, int32 o2, int w2) { int32 t1, t2; t1 = o1+w1; t2 = o2+w2; if(!(t1 > o2 && t2 > o1)) return 0; return 1; } Bits mkvar(Reg *r, Adr *a) { Var *v; int i, t, n, et, z, w, flag; int32 o; Bits bit; Node *node; // mark registers used t = a->type; flag = 0; switch(t) { default: print("type %d %d %D\n", t, a->name, a); goto none; case D_NONE: case D_FCONST: case D_BRANCH: break; case D_CONST: flag = 1; goto onereg; case D_REGREG: bit = zbits; if(a->offset != NREG) bit.b[0] |= RtoB(a->offset); if(a->reg != NREG) bit.b[0] |= RtoB(a->reg); return bit; case D_REG: case D_SHIFT: onereg: if(a->reg != NREG) { bit = zbits; bit.b[0] = RtoB(a->reg); return bit; } break; case D_OREG: if(a->reg != NREG) { if(a == &r->prog->from) r->use1.b[0] |= RtoB(a->reg); else r->use2.b[0] |= RtoB(a->reg); if(r->prog->scond & (C_PBIT|C_WBIT)) r->set.b[0] |= RtoB(a->reg); } break; case D_FREG: if(a->reg != NREG) { bit = zbits; bit.b[0] = FtoB(a->reg); return bit; } break; } switch(a->name) { default: goto none; case D_EXTERN: case D_STATIC: case D_AUTO: case D_PARAM: n = a->name; break; } node = a->node; if(node == N || node->op != ONAME || node->orig == N) goto none; node = node->orig; if(node->orig != node) fatal("%D: bad node", a); if(node->sym == S || node->sym->name[0] == '.') goto none; et = a->etype; o = a->offset; w = a->width; for(i=0; inode == node && v->name == n) { if(v->offset == o) if(v->etype == et) if(v->width == w) if(!flag) return blsh(i); // if they overlap, disable both if(overlap(v->offset, v->width, o, w)) { v->addr = 1; flag = 1; } } } switch(et) { case 0: case TFUNC: case TARRAY: case TSTRING: goto none; } if(nvar >= NVAR) { if(debug['w'] > 1 && node) fatal("variable not optimized: %D", a); goto none; } i = nvar; nvar++; //print("var %d %E %D %S\n", i, et, a, s); v = var+i; v->offset = o; v->name = n; // v->gotype = a->gotype; v->etype = et; v->width = w; v->addr = flag; // funny punning v->node = node; if(debug['R']) print("bit=%2d et=%2d w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); bit = blsh(i); if(n == D_EXTERN || n == D_STATIC) for(z=0; zp1) { for(z=0; zrefahead.b[z]; if(ref.b[z] != r1->refahead.b[z]) { r1->refahead.b[z] = ref.b[z]; change++; } cal.b[z] |= r1->calahead.b[z]; if(cal.b[z] != r1->calahead.b[z]) { r1->calahead.b[z] = cal.b[z]; change++; } } switch(r1->prog->as) { case ABL: if(noreturn(r1->prog)) break; for(z=0; zset.b[z]) | r1->use1.b[z] | r1->use2.b[z]; cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); r1->refbehind.b[z] = ref.b[z]; r1->calbehind.b[z] = cal.b[z]; } if(r1->active) break; r1->active = 1; } for(; r != r1; r = r->p1) for(r2 = r->p2; r2 != R; r2 = r2->p2link) prop(r2, r->refbehind, r->calbehind); } /* * find looping structure * * 1) find reverse postordering * 2) find approximate dominators, * the actual dominators if the flow graph is reducible * otherwise, dominators plus some other non-dominators. * See Matthew S. Hecht and Jeffrey D. Ullman, * "Analysis of a Simple Algorithm for Global Data Flow Problems", * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, * Oct. 1-3, 1973, pp. 207-217. * 3) find all nodes with a predecessor dominated by the current node. * such a node is a loop head. * recursively, all preds with a greater rpo number are in the loop */ int32 postorder(Reg *r, Reg **rpo2r, int32 n) { Reg *r1; r->rpo = 1; r1 = r->s1; if(r1 && !r1->rpo) n = postorder(r1, rpo2r, n); r1 = r->s2; if(r1 && !r1->rpo) n = postorder(r1, rpo2r, n); rpo2r[n] = r; n++; return n; } int32 rpolca(int32 *idom, int32 rpo1, int32 rpo2) { int32 t; if(rpo1 == -1) return rpo2; while(rpo1 != rpo2){ if(rpo1 > rpo2){ t = rpo2; rpo2 = rpo1; rpo1 = t; } while(rpo1 < rpo2){ t = idom[rpo2]; if(t >= rpo2) fatal("bad idom"); rpo2 = t; } } return rpo1; } int doms(int32 *idom, int32 r, int32 s) { while(s > r) s = idom[s]; return s == r; } int loophead(int32 *idom, Reg *r) { int32 src; src = r->rpo; if(r->p1 != R && doms(idom, src, r->p1->rpo)) return 1; for(r = r->p2; r != R; r = r->p2link) if(doms(idom, src, r->rpo)) return 1; return 0; } void loopmark(Reg **rpo2r, int32 head, Reg *r) { if(r->rpo < head || r->active == head) return; r->active = head; r->loop += LOOP; if(r->p1 != R) loopmark(rpo2r, head, r->p1); for(r = r->p2; r != R; r = r->p2link) loopmark(rpo2r, head, r); } void loopit(Reg *r, int32 nr) { Reg *r1; int32 i, d, me; if(nr > maxnr) { rpo2r = mal(nr * sizeof(Reg*)); idom = mal(nr * sizeof(int32)); maxnr = nr; } d = postorder(r, rpo2r, 0); if(d > nr) fatal("too many reg nodes"); nr = d; for(i = 0; i < nr / 2; i++){ r1 = rpo2r[i]; rpo2r[i] = rpo2r[nr - 1 - i]; rpo2r[nr - 1 - i] = r1; } for(i = 0; i < nr; i++) rpo2r[i]->rpo = i; idom[0] = 0; for(i = 0; i < nr; i++){ r1 = rpo2r[i]; me = r1->rpo; d = -1; // rpo2r[r->rpo] == r protects against considering dead code, // which has r->rpo == 0. if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) d = r1->p1->rpo; for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) if(rpo2r[r1->rpo] == r1 && r1->rpo < me) d = rpolca(idom, d, r1->rpo); idom[i] = d; } for(i = 0; i < nr; i++){ r1 = rpo2r[i]; r1->loop++; if(r1->p2 != R && loophead(idom, r1)) loopmark(rpo2r, i, r1); } } void synch(Reg *r, Bits dif) { Reg *r1; int z; for(r1 = r; r1 != R; r1 = r1->s1) { for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | r1->set.b[z] | r1->regdiff.b[z]; if(dif.b[z] != r1->regdiff.b[z]) { r1->regdiff.b[z] = dif.b[z]; change++; } } if(r1->active) break; r1->active = 1; for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); if(r1->s2 != R) synch(r1->s2, dif); } } uint32 allreg(uint32 b, Rgn *r) { Var *v; int i; v = var + r->varno; r->regno = 0; switch(v->etype) { default: fatal("unknown etype %d/%E", bitno(b), v->etype); break; case TINT8: case TUINT8: case TINT16: case TUINT16: case TINT32: case TUINT32: case TINT: case TUINT: case TUINTPTR: case TBOOL: case TPTR32: i = BtoR(~b); if(i && r->cost >= 0) { r->regno = i; return RtoB(i); } break; case TFLOAT32: case TFLOAT64: i = BtoF(~b); if(i && r->cost >= 0) { r->regno = i+NREG; return FtoB(i); } break; case TINT64: case TUINT64: case TPTR64: case TINTER: case TSTRUCT: case TARRAY: break; } return 0; } void paint1(Reg *r, int bn) { Reg *r1; Prog *p; int z; uint32 bb; z = bn/32; bb = 1L<<(bn%32); if(r->act.b[z] & bb) return; for(;;) { if(!(r->refbehind.b[z] & bb)) break; r1 = r->p1; if(r1 == R) break; if(!(r1->refahead.b[z] & bb)) break; if(r1->act.b[z] & bb) break; r = r1; } if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { change -= CLOAD * r->loop; if(debug['R'] > 1) print("%d%P\td %Q $%d\n", r->loop, r->prog, blsh(bn), change); } for(;;) { r->act.b[z] |= bb; p = r->prog; if(r->use1.b[z] & bb) { change += CREF * r->loop; if(debug['R'] > 1) print("%d%P\tu1 %Q $%d\n", r->loop, p, blsh(bn), change); } if((r->use2.b[z]|r->set.b[z]) & bb) { change += CREF * r->loop; if(debug['R'] > 1) print("%d%P\tu2 %Q $%d\n", r->loop, p, blsh(bn), change); } if(STORE(r) & r->regdiff.b[z] & bb) { change -= CLOAD * r->loop; if(debug['R'] > 1) print("%d%P\tst %Q $%d\n", r->loop, p, blsh(bn), change); } if(r->refbehind.b[z] & bb) for(r1 = r->p2; r1 != R; r1 = r1->p2link) if(r1->refahead.b[z] & bb) paint1(r1, bn); if(!(r->refahead.b[z] & bb)) break; r1 = r->s2; if(r1 != R) if(r1->refbehind.b[z] & bb) paint1(r1, bn); r = r->s1; if(r == R) break; if(r->act.b[z] & bb) break; if(!(r->refbehind.b[z] & bb)) break; } } uint32 paint2(Reg *r, int bn) { Reg *r1; int z; uint32 bb, vreg; z = bn/32; bb = 1L << (bn%32); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; for(;;) { if(!(r->refbehind.b[z] & bb)) break; r1 = r->p1; if(r1 == R) break; if(!(r1->refahead.b[z] & bb)) break; if(!(r1->act.b[z] & bb)) break; r = r1; } for(;;) { r->act.b[z] &= ~bb; vreg |= r->regu; if(r->refbehind.b[z] & bb) for(r1 = r->p2; r1 != R; r1 = r1->p2link) if(r1->refahead.b[z] & bb) vreg |= paint2(r1, bn); if(!(r->refahead.b[z] & bb)) break; r1 = r->s2; if(r1 != R) if(r1->refbehind.b[z] & bb) vreg |= paint2(r1, bn); r = r->s1; if(r == R) break; if(!(r->act.b[z] & bb)) break; if(!(r->refbehind.b[z] & bb)) break; } return vreg; } void paint3(Reg *r, int bn, int32 rb, int rn) { Reg *r1; Prog *p; int z; uint32 bb; z = bn/32; bb = 1L << (bn%32); if(r->act.b[z] & bb) return; for(;;) { if(!(r->refbehind.b[z] & bb)) break; r1 = r->p1; if(r1 == R) break; if(!(r1->refahead.b[z] & bb)) break; if(r1->act.b[z] & bb) break; r = r1; } if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) addmove(r, bn, rn, 0); for(;;) { r->act.b[z] |= bb; p = r->prog; if(r->use1.b[z] & bb) { if(debug['R']) print("%P", p); addreg(&p->from, rn); if(debug['R']) print("\t.c%P\n", p); } if((r->use2.b[z]|r->set.b[z]) & bb) { if(debug['R']) print("%P", p); addreg(&p->to, rn); if(debug['R']) print("\t.c%P\n", p); } if(STORE(r) & r->regdiff.b[z] & bb) addmove(r, bn, rn, 1); r->regu |= rb; if(r->refbehind.b[z] & bb) for(r1 = r->p2; r1 != R; r1 = r1->p2link) if(r1->refahead.b[z] & bb) paint3(r1, bn, rb, rn); if(!(r->refahead.b[z] & bb)) break; r1 = r->s2; if(r1 != R) if(r1->refbehind.b[z] & bb) paint3(r1, bn, rb, rn); r = r->s1; if(r == R) break; if(r->act.b[z] & bb) break; if(!(r->refbehind.b[z] & bb)) break; } } void addreg(Adr *a, int rn) { a->sym = 0; a->name = D_NONE; a->type = D_REG; a->reg = rn; if(rn >= NREG) { a->type = D_FREG; a->reg = rn-NREG; } } /* * bit reg * 0 R0 * 1 R1 * ... ... * 10 R10 */ int32 RtoB(int r) { if(r >= REGTMP-2) // excluded R9 and R10 for m and g return 0; return 1L << r; } int BtoR(int32 b) { b &= 0x01fcL; // excluded R9 and R10 for m and g if(b == 0) return 0; return bitno(b); } /* * bit reg * 18 F2 * 19 F3 * ... ... * 23 F7 */ int32 FtoB(int f) { if(f < 2 || f > NFREG-1) return 0; return 1L << (f + 16); } int BtoF(int32 b) { b &= 0xfc0000L; if(b == 0) return 0; return bitno(b) - 16; } static Sym* symlist[10]; int noreturn(Prog *p) { Sym *s; int i; if(symlist[0] == S) { symlist[0] = pkglookup("panicindex", runtimepkg); symlist[1] = pkglookup("panicslice", runtimepkg); symlist[2] = pkglookup("throwinit", runtimepkg); symlist[3] = pkglookup("panic", runtimepkg); symlist[4] = pkglookup("panicwrap", runtimepkg); } s = p->to.sym; if(s == S) return 0; for(i=0; symlist[i]!=S; i++) if(s == symlist[i]) return 1; return 0; } void dumpone(Reg *r) { int z; Bits bit; print("%d:%P", r->loop, r->prog); for(z=0; zset.b[z] | r->use1.b[z] | r->use2.b[z] | r->refbehind.b[z] | r->refahead.b[z] | r->calbehind.b[z] | r->calahead.b[z] | r->regdiff.b[z] | r->act.b[z] | 0; if(bany(&bit)) { print("\t"); if(bany(&r->set)) print(" s:%Q", r->set); if(bany(&r->use1)) print(" u1:%Q", r->use1); if(bany(&r->use2)) print(" u2:%Q", r->use2); if(bany(&r->refbehind)) print(" rb:%Q ", r->refbehind); if(bany(&r->refahead)) print(" ra:%Q ", r->refahead); if(bany(&r->calbehind)) print(" cb:%Q ", r->calbehind); if(bany(&r->calahead)) print(" ca:%Q ", r->calahead); if(bany(&r->regdiff)) print(" d:%Q ", r->regdiff); if(bany(&r->act)) print(" a:%Q ", r->act); } print("\n"); } void dumpit(char *str, Reg *r0) { Reg *r, *r1; print("\n%s\n", str); for(r = r0; r != R; r = r->link) { dumpone(r); r1 = r->p2; if(r1 != R) { print(" pred:"); for(; r1 != R; r1 = r1->p2link) print(" %.4ud", r1->prog->loc); print("\n"); } // r1 = r->s1; // if(r1 != R) { // print(" succ:"); // for(; r1 != R; r1 = r1->s1) // print(" %.4ud", r1->prog->loc); // print("\n"); // } } } /* * the code generator depends on being able to write out JMP (B) * instructions that it can jump to now but fill in later. * the linker will resolve them nicely, but they make the code * longer and more difficult to follow during debugging. * remove them. */ /* what instruction does a JMP to p eventually land on? */ static Prog* chasejmp(Prog *p, int *jmploop) { int n; n = 0; while(p != P && p->as == AB && p->to.type == D_BRANCH) { if(++n > 10) { *jmploop = 1; break; } p = p->to.branch; } return p; } /* * reuse reg pointer for mark/sweep state. * leave reg==nil at end because alive==nil. */ #define alive ((void*)0) #define dead ((void*)1) /* mark all code reachable from firstp as alive */ static void mark(Prog *firstp) { Prog *p; for(p=firstp; p; p=p->link) { if(p->regp != dead) break; p->regp = alive; if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch) mark(p->to.branch); if(p->as == AB || p->as == ARET || (p->as == ABL && noreturn(p))) break; } } static void fixjmp(Prog *firstp) { int jmploop; Prog *p, *last; if(debug['R'] && debug['v']) print("\nfixjmp\n"); // pass 1: resolve jump to B, mark all code as dead. jmploop = 0; for(p=firstp; p; p=p->link) { if(debug['R'] && debug['v']) print("%P\n", p); if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AB) { p->to.branch = chasejmp(p->to.branch, &jmploop); if(debug['R'] && debug['v']) print("->%P\n", p); } p->regp = dead; } if(debug['R'] && debug['v']) print("\n"); // pass 2: mark all reachable code alive mark(firstp); // pass 3: delete dead code (mostly JMPs). last = nil; for(p=firstp; p; p=p->link) { if(p->regp == dead) { if(p->link == P && p->as == ARET && last && last->as != ARET) { // This is the final ARET, and the code so far doesn't have one. // Let it stay. } else { if(debug['R'] && debug['v']) print("del %P\n", p); continue; } } if(last) last->link = p; last = p; } last->link = P; // pass 4: elide JMP to next instruction. // only safe if there are no jumps to JMPs anymore. if(!jmploop) { last = nil; for(p=firstp; p; p=p->link) { if(p->as == AB && p->to.type == D_BRANCH && p->to.branch == p->link) { if(debug['R'] && debug['v']) print("del %P\n", p); continue; } if(last) last->link = p; last = p; } last->link = P; } if(debug['R'] && debug['v']) { print("\n"); for(p=firstp; p; p=p->link) print("%P\n", p); print("\n"); } }