diff --git a/src/cmd/5g/peep.c b/src/cmd/5g/peep.c index c9910d1134..fe0969be4b 100644 --- a/src/cmd/5g/peep.c +++ b/src/cmd/5g/peep.c @@ -65,7 +65,7 @@ peep(Prog *firstp) Prog *p; int t; - g = flowstart(firstp, sizeof(Flow)); + g = flowstart(firstp, 0); if(g == nil) return; gactive = 0; diff --git a/src/cmd/6g/peep.c b/src/cmd/6g/peep.c index 279b60d4e4..1140a3d4bb 100644 --- a/src/cmd/6g/peep.c +++ b/src/cmd/6g/peep.c @@ -97,7 +97,7 @@ peep(Prog *firstp) Prog *p, *p1; int t; - g = flowstart(firstp, sizeof(Flow)); + g = flowstart(firstp, 0); if(g == nil) return; gactive = 0; diff --git a/src/cmd/8g/peep.c b/src/cmd/8g/peep.c index 9b514a8964..0652c0af02 100644 --- a/src/cmd/8g/peep.c +++ b/src/cmd/8g/peep.c @@ -94,7 +94,7 @@ peep(Prog *firstp) Prog *p, *p1; int t; - g = flowstart(firstp, sizeof(Flow)); + g = flowstart(firstp, 0); if(g == nil) return; gactive = 0; diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c index 1ca28dde05..0980039c9d 100644 --- a/src/cmd/9g/peep.c +++ b/src/cmd/9g/peep.c @@ -55,7 +55,7 @@ peep(Prog *firstp) Prog *p, *p1; int t; - g = flowstart(firstp, sizeof(Flow)); + g = flowstart(firstp, 0); if(g == nil) return; gactive = 0; diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index b53655b412..93eba2e80d 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1557,9 +1557,12 @@ struct Flow { int32 active; // usable by client + int32 id; // sequence number in flow graph int32 rpo; // reverse post ordering uint16 loop; // x5 for every loop uchar refset; // diagnostic generated + + void* data; // for use by client }; struct Graph diff --git a/src/cmd/gc/popt.c b/src/cmd/gc/popt.c index f71702431a..afe2b078b8 100644 --- a/src/cmd/gc/popt.c +++ b/src/cmd/gc/popt.c @@ -202,7 +202,7 @@ fixjmp(Prog *firstp) // Control flow analysis. The Flow structures hold predecessor and successor // information as well as basic loop analysis. // -// graph = flowstart(firstp, sizeof(Flow)); +// graph = flowstart(firstp, 0); // ... use flow graph ... // flowend(graph); // free graph // @@ -214,19 +214,20 @@ fixjmp(Prog *firstp) // f->p1 and this list: // // for(f2 = f->p2; f2 != nil; f2 = f2->p2link) -// -// Often the Flow struct is embedded as the first field inside a larger struct S. -// In that case casts are needed to convert Flow* to S* in many places but the -// idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart. +// +// The size argument to flowstart specifies an amount of zeroed memory +// to allocate in every f->data field, for use by the client. +// If size == 0, f->data will be nil. Graph* flowstart(Prog *firstp, int size) { - int nf; + int id, nf; Flow *f, *f1, *start, *last; Graph *graph; Prog *p; ProgInfo info; + char *data; // Count and mark instructions to annotate. nf = 0; @@ -248,12 +249,16 @@ flowstart(Prog *firstp, int size) } // Allocate annotations and assign to instructions. - graph = calloc(sizeof *graph + size*nf, 1); + graph = calloc(sizeof *graph + sizeof(Flow)*nf + size*nf, 1); if(graph == nil) fatal("out of memory"); start = (Flow*)(graph+1); last = nil; f = start; + data = (char*)(f+nf); + if(size == 0) + data = nil; + id = 0; for(p = firstp; p != P; p = p->link) { if(p->opt == nil) continue; @@ -262,8 +267,11 @@ flowstart(Prog *firstp, int size) if(last) last->link = f; last = f; - - f = (Flow*)((uchar*)f + size); + f->data = data; + f->id = id; + f++; + id++; + data += size; } // Fill in pred/succ information. @@ -498,13 +506,12 @@ uniqs(Flow *r) // ACM TOPLAS 1999. typedef struct TempVar TempVar; -typedef struct TempFlow TempFlow; struct TempVar { Node *node; - TempFlow *def; // definition of temp var - TempFlow *use; // use list, chained through TempFlow.uselink + Flow *def; // definition of temp var + Flow *use; // use list, chained through Flow.data TempVar *freelink; // next free temp in Type.opt list TempVar *merge; // merge var with this one vlong start; // smallest Prog.pc in live range @@ -513,12 +520,6 @@ struct TempVar uchar removed; // removed from program }; -struct TempFlow -{ - Flow f; - TempFlow *uselink; -}; - static int startcmp(const void *va, const void *vb) { @@ -541,15 +542,15 @@ canmerge(Node *n) return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0; } -static void mergewalk(TempVar*, TempFlow*, uint32); -static void varkillwalk(TempVar*, TempFlow*, uint32); +static void mergewalk(TempVar*, Flow*, uint32); +static void varkillwalk(TempVar*, Flow*, uint32); void mergetemp(Prog *firstp) { int i, j, nvar, ninuse, nfree, nkill; TempVar *var, *v, *v1, **bystart, **inuse; - TempFlow *r; + Flow *f; NodeList *l, **lp; Node *n; Prog *p, *p1; @@ -560,7 +561,7 @@ mergetemp(Prog *firstp) enum { Debug = 0 }; - g = flowstart(firstp, sizeof(TempFlow)); + g = flowstart(firstp, 0); if(g == nil) return; @@ -585,8 +586,8 @@ mergetemp(Prog *firstp) // We assume that the earliest reference to a temporary is its definition. // This is not true of variables in general but our temporaries are all // single-use (that's why we have so many!). - for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { - p = r->f.prog; + for(f = g->start; f != nil; f = f->link) { + p = f->prog; arch.proginfo(&info, p); if(p->from.node != N && ((Node*)(p->from.node))->opt && p->to.node != N && ((Node*)(p->to.node))->opt) @@ -598,9 +599,9 @@ mergetemp(Prog *firstp) v = n->opt; if(v != nil) { if(v->def == nil) - v->def = r; - r->uselink = v->use; - v->use = r; + v->def = f; + f->data = v->use; + v->use = f; if(n == p->from.node && (info.flags & LeftAddr)) v->addr = 1; } @@ -616,8 +617,8 @@ mergetemp(Prog *firstp) if(v->addr) continue; // Used in only one instruction, which had better be a write. - if((r = v->use) != nil && r->uselink == nil) { - p = r->f.prog; + if((f = v->use) != nil && (Flow*)f->data == nil) { + p = f->prog; arch.proginfo(&info, p); if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { p->as = ANOP; @@ -633,10 +634,10 @@ mergetemp(Prog *firstp) // Written in one instruction, read in the next, otherwise unused, // no jumps to the next instruction. Happens mainly in 386 compiler. - if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { - p = r->f.prog; + if((f = v->use) != nil && f->link == (Flow*)f->data && (Flow*)((Flow*)f->data)->data == nil && uniqp(f->link) == f) { + p = f->prog; arch.proginfo(&info, p); - p1 = r->f.link->prog; + p1 = f->link->prog; arch.proginfo(&info1, p1); enum { SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, @@ -645,7 +646,7 @@ mergetemp(Prog *firstp) !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && (info.flags & SizeAny) == (info1.flags & SizeAny)) { p1->from = p->from; - arch.excise(&r->f); + arch.excise(f); v->removed = 1; if(Debug) print("drop immediate-use %S\n", v->node->sym); @@ -657,16 +658,16 @@ mergetemp(Prog *firstp) // Traverse live range of each variable to set start, end. // Each flood uses a new value of gen so that we don't have - // to clear all the r->f.active words after each variable. + // to clear all the r->active words after each variable. gen = 0; for(v = var; v < var+nvar; v++) { gen++; - for(r = v->use; r != nil; r = r->uselink) - mergewalk(v, r, gen); + for(f = v->use; f != nil; f = (Flow*)f->data) + mergewalk(v, f, gen); if(v->addr) { gen++; - for(r = v->use; r != nil; r = r->uselink) - varkillwalk(v, r, gen); + for(f = v->use; f != nil; f = (Flow*)f->data) + varkillwalk(v, f, gen); } } @@ -736,7 +737,7 @@ mergetemp(Prog *firstp) if(v->merge) print(" merge %#N", v->merge->node); if(v->start == v->end) - print(" %P", v->def->f.prog); + print(" %P", v->def->prog); print("\n"); } @@ -745,8 +746,8 @@ mergetemp(Prog *firstp) } // Update node references to use merged temporaries. - for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { - p = r->f.prog; + for(f = g->start; f != nil; f = f->link) { + p = f->prog; if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) p->from.node = v->merge->node; if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) @@ -775,40 +776,40 @@ mergetemp(Prog *firstp) } static void -mergewalk(TempVar *v, TempFlow *r0, uint32 gen) +mergewalk(TempVar *v, Flow *f0, uint32 gen) { Prog *p; - TempFlow *r1, *r, *r2; + Flow *f1, *f, *f2; - for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) { - if(r1->f.active == gen) + for(f1 = f0; f1 != nil; f1 = f1->p1) { + if(f1->active == gen) break; - r1->f.active = gen; - p = r1->f.prog; + f1->active = gen; + p = f1->prog; if(v->end < p->pc) v->end = p->pc; - if(r1 == v->def) { + if(f1 == v->def) { v->start = p->pc; break; } } - for(r = r0; r != r1; r = (TempFlow*)r->f.p1) - for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link) - mergewalk(v, r2, gen); + for(f = f0; f != f1; f = f->p1) + for(f2 = f->p2; f2 != nil; f2 = f2->p2link) + mergewalk(v, f2, gen); } static void -varkillwalk(TempVar *v, TempFlow *r0, uint32 gen) +varkillwalk(TempVar *v, Flow *f0, uint32 gen) { Prog *p; - TempFlow *r1, *r; + Flow *f1, *f; - for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.s1) { - if(r1->f.active == gen) + for(f1 = f0; f1 != nil; f1 = f1->s1) { + if(f1->active == gen) break; - r1->f.active = gen; - p = r1->f.prog; + f1->active = gen; + p = f1->prog; if(v->end < p->pc) v->end = p->pc; if(v->start > p->pc) @@ -817,8 +818,8 @@ varkillwalk(TempVar *v, TempFlow *r0, uint32 gen) break; } - for(r = r0; r != r1; r = (TempFlow*)r->f.s1) - varkillwalk(v, (TempFlow*)r->f.s2, gen); + for(f = f0; f != f1; f = f->s1) + varkillwalk(v, f->s2, gen); } // Eliminate redundant nil pointer checks. @@ -836,25 +837,21 @@ varkillwalk(TempVar *v, TempFlow *r0, uint32 gen) // each load. typedef struct NilVar NilVar; -typedef struct NilFlow NilFlow; -struct NilFlow { - Flow f; - int kill; -}; +static void nilwalkback(Flow *rcheck); +static void nilwalkfwd(Flow *rcheck); -static void nilwalkback(NilFlow *rcheck); -static void nilwalkfwd(NilFlow *rcheck); +static int killed; // f->data is either nil or &killed void nilopt(Prog *firstp) { - NilFlow *r; + Flow *f; Prog *p; Graph *g; int ncheck, nkill; - g = flowstart(firstp, sizeof(NilFlow)); + g = flowstart(firstp, 0); if(g == nil) return; @@ -863,35 +860,35 @@ nilopt(Prog *firstp) ncheck = 0; nkill = 0; - for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { - p = r->f.prog; + for(f = g->start; f != nil; f = f->link) { + p = f->prog; if(p->as != ACHECKNIL || !arch.regtyp(&p->from)) continue; ncheck++; if(arch.stackaddr(&p->from)) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed nil check of SP address"); - r->kill = 1; + f->data = &killed; continue; } - nilwalkfwd(r); - if(r->kill) { + nilwalkfwd(f); + if(f->data != nil) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed nil check before indirect"); continue; } - nilwalkback(r); - if(r->kill) { + nilwalkback(f); + if(f->data != nil) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed repeated nil check"); continue; } } - for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { - if(r->kill) { + for(f = g->start; f != nil; f = f->link) { + if(f->data != nil) { nkill++; - arch.excise(&r->f); + arch.excise(f); } } @@ -902,72 +899,72 @@ nilopt(Prog *firstp) } static void -nilwalkback(NilFlow *rcheck) +nilwalkback(Flow *fcheck) { Prog *p; ProgInfo info; - NilFlow *r; + Flow *f; - for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) { - p = r->f.prog; + for(f = fcheck; f != nil; f = uniqp(f)) { + p = f->prog; arch.proginfo(&info, p); - if((info.flags & RightWrite) && arch.sameaddr(&p->to, &rcheck->f.prog->from)) { + if((info.flags & RightWrite) && arch.sameaddr(&p->to, &fcheck->prog->from)) { // Found initialization of value we're checking for nil. // without first finding the check, so this one is unchecked. return; } - if(r != rcheck && p->as == ACHECKNIL && arch.sameaddr(&p->from, &rcheck->f.prog->from)) { - rcheck->kill = 1; + if(f != fcheck && p->as == ACHECKNIL && arch.sameaddr(&p->from, &fcheck->prog->from)) { + fcheck->data = &killed; return; } } // Here is a more complex version that scans backward across branches. - // It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason - // to keep the check (setting rcheck->kill = 0). + // It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason + // to keep the check (setting fcheck->kill = 0). // It doesn't handle copying of aggregates as well as I would like, // nor variables with their address taken, // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. /* - for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) { - if(r1->f.active == gen) + for(f1 = f0; f1 != nil; f1 = f1->p1) { + if(f1->active == gen) break; - r1->f.active = gen; - p = r1->f.prog; + f1->active = gen; + p = f1->prog; // If same check, stop this loop but still check // alternate predecessors up to this point. - if(r1 != rcheck && p->as == ACHECKNIL && arch.sameaddr(&p->from, &rcheck->f.prog->from)) + if(f1 != fcheck && p->as == ACHECKNIL && arch.sameaddr(&p->from, &fcheck->prog->from)) break; arch.proginfo(&info, p); - if((info.flags & RightWrite) && arch.sameaddr(&p->to, &rcheck->f.prog->from)) { + if((info.flags & RightWrite) && arch.sameaddr(&p->to, &fcheck->prog->from)) { // Found initialization of value we're checking for nil. // without first finding the check, so this one is unchecked. - rcheck->kill = 0; + fcheck->kill = 0; return; } - if(r1->f.p1 == nil && r1->f.p2 == nil) { - print("lost pred for %P\n", rcheck->f.prog); - for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) { - arch.proginfo(&info, r1->f.prog); - print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, arch.sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from); + if(f1->p1 == nil && f1->p2 == nil) { + print("lost pred for %P\n", fcheck->prog); + for(f1=f0; f1!=nil; f1=f1->p1) { + arch.proginfo(&info, f1->prog); + print("\t%P %d %d %D %D\n", r1->prog, info.flags&RightWrite, arch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from); } fatal("lost pred trail"); } } - for(r = r0; r != r1; r = (NilFlow*)r->f.p1) - for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link) - nilwalkback(rcheck, r2, gen); + for(f = f0; f != f1; f = f->p1) + for(f2 = f->p2; f2 != nil; f2 = f2->p2link) + nilwalkback(fcheck, f2, gen); */ } static void -nilwalkfwd(NilFlow *rcheck) +nilwalkfwd(Flow *fcheck) { - NilFlow *r, *last; + Flow *f, *last; Prog *p; ProgInfo info; @@ -979,16 +976,16 @@ nilwalkfwd(NilFlow *rcheck) // _ = *x // should panic // for {} // no writes but infinite loop may be considered visible last = nil; - for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { - p = r->f.prog; + for(f = uniqs(fcheck); f != nil; f = uniqs(f)) { + p = f->prog; arch.proginfo(&info, p); - if((info.flags & LeftRead) && arch.smallindir(&p->from, &rcheck->f.prog->from)) { - rcheck->kill = 1; + if((info.flags & LeftRead) && arch.smallindir(&p->from, &fcheck->prog->from)) { + fcheck->data = &killed; return; } - if((info.flags & (RightRead|RightWrite)) && arch.smallindir(&p->to, &rcheck->f.prog->from)) { - rcheck->kill = 1; + if((info.flags & (RightRead|RightWrite)) && arch.smallindir(&p->to, &fcheck->prog->from)) { + fcheck->data = &killed; return; } @@ -996,17 +993,14 @@ nilwalkfwd(NilFlow *rcheck) if(p->as == ACHECKNIL) return; // Stop if value is lost. - if((info.flags & RightWrite) && arch.sameaddr(&p->to, &rcheck->f.prog->from)) + if((info.flags & RightWrite) && arch.sameaddr(&p->to, &fcheck->prog->from)) return; // Stop if memory write. if((info.flags & RightWrite) && !arch.regtyp(&p->to)) return; // Stop if we jump backward. - // This test is valid because all the NilFlow* are pointers into - // a single contiguous array. We will need to add an explicit - // numbering when the code is converted to Go. - if(last != nil && r <= last) + if(last != nil && f->id <= last->id) return; - last = r; + last = f; } } diff --git a/src/cmd/gc/popt.h b/src/cmd/gc/popt.h index 833f69a212..2bae0d770d 100644 --- a/src/cmd/gc/popt.h +++ b/src/cmd/gc/popt.h @@ -68,11 +68,8 @@ uint64 STORE(Reg*); // A Reg is a wrapper around a single Prog (one instruction) that holds // register optimization information while the optimizer runs. // r->prog is the instruction. -// r->prog->opt points back to r. struct Reg { - Flow f; - Bits set; // regopt variables written by this instruction. Bits use1; // regopt variables read by prog->from. Bits use2; // regopt variables read by prog->to. @@ -109,7 +106,7 @@ struct Reg // cost. struct Rgn { - Reg* enter; + Flow* enter; short cost; short varno; short regno; @@ -143,17 +140,7 @@ EXTERN struct /* * reg.c */ -int rcmp(const void*, const void*); void regopt(Prog*); -void addmove(Reg*, int, int, int); -Bits mkvar(Reg*, Adr*); -void prop(Reg*, Bits, Bits); -void synch(Reg*, Bits); -uint64 allreg(uint64, Rgn*); -void paint1(Reg*, int); -uint64 paint2(Reg*, int, int); -void paint3(Reg*, int, uint64, int); -void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); diff --git a/src/cmd/gc/reg.c b/src/cmd/gc/reg.c index d7ffa1799f..939cf34cb5 100644 --- a/src/cmd/gc/reg.c +++ b/src/cmd/gc/reg.c @@ -33,10 +33,20 @@ #include "go.h" #include "popt.h" -static Reg* firstr; +static Flow* firstf; static int first = 1; -int +static void addmove(Flow*, int, int, int); +static Bits mkvar(Flow*, Adr*); +static void prop(Flow*, Bits, Bits); +static void synch(Flow*, Bits); +static uint64 allreg(uint64, Rgn*); +static void paint1(Flow*, int); +static uint64 paint2(Flow*, int, int); +static void paint3(Flow*, int, uint64, int); +static void addreg(Adr*, int); + +static int rcmp(const void *a1, const void *a2) { Rgn *p1, *p2; @@ -76,12 +86,13 @@ setaddrs(Bits bit) static Node* regnodes[64]; -static void walkvardef(Node *n, Reg *r, int active); +static void walkvardef(Node *n, Flow *r, int active); void regopt(Prog *firstp) { - Reg *r, *r1; + Flow *f, *f1; + Reg *r; Prog *p; Graph *g; ProgInfo info; @@ -134,10 +145,10 @@ regopt(Prog *firstp) return; } - firstr = (Reg*)g->start; + firstf = g->start; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; + for(f = firstf; f != nil; f = f->link) { + p = f->prog; if(p->as == AVARDEF || p->as == AVARKILL) continue; arch.proginfo(&info, p); @@ -147,10 +158,11 @@ regopt(Prog *firstp) continue; // from vs to doesn't matter for registers. + r = (Reg*)f->data; r->use1.b[0] |= info.reguse | info.regindex; r->set.b[0] |= info.regset; - bit = mkvar(r, &p->from); + bit = mkvar(f, &p->from); if(bany(&bit)) { if(info.flags & LeftAddr) setaddrs(bit); @@ -171,7 +183,7 @@ regopt(Prog *firstp) if(p->from3.type != TYPE_NONE) fatal("regopt not implemented for from3"); - bit = mkvar(r, &p->to); + bit = mkvar(f, &p->to); if(bany(&bit)) { if(info.flags & RightAddr) setaddrs(bit); @@ -198,7 +210,7 @@ regopt(Prog *firstp) } if(debug['R'] && debug['v']) - dumpit("pass1", &firstr->f, 1); + dumpit("pass1", firstf, 1); /* * pass 2 @@ -207,7 +219,7 @@ regopt(Prog *firstp) flowrpo(g); if(debug['R'] && debug['v']) - dumpit("pass2", &firstr->f, 1); + dumpit("pass2", firstf, 1); /* * pass 2.5 @@ -217,15 +229,16 @@ regopt(Prog *firstp) * but we'll be done with it by then.) */ active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->f.active = 0; + for(f = firstf; f != nil; f = f->link) { + f->active = 0; + r = (Reg*)f->data; r->act = zbits; } - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; + for(f = firstf; f != nil; f = f->link) { + p = f->prog; if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { active++; - walkvardef(p->to.node, r, active); + walkvardef(p->to.node, f, active); } } @@ -236,18 +249,18 @@ regopt(Prog *firstp) */ loop1: change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - if(r->f.prog->as == ARET) - prop(r, zbits, zbits); + for(f = firstf; f != nil; f = f->link) + f->active = 0; + for(f = firstf; f != nil; f = f->link) + if(f->prog->as == ARET) + prop(f, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; - for(r = firstr; r != R; r = r1) { - r1 = (Reg*)r->f.link; - if(r1 && r1->f.active && !r->f.active) { - prop(r, zbits, zbits); + for(f = firstf; f != nil; f = f1) { + f1 = f->link; + if(f1 && f1->active && !f->active) { + prop(f, zbits, zbits); i = 1; } } @@ -257,7 +270,7 @@ loop11: goto loop1; if(debug['R'] && debug['v']) - dumpit("pass3", &firstr->f, 1); + dumpit("pass3", firstf, 1); /* * pass 4 @@ -266,14 +279,14 @@ loop11: */ loop2: change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - synch(firstr, zbits); + for(f = firstf; f != nil; f = f->link) + f->active = 0; + synch(firstf, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) - dumpit("pass4", &firstr->f, 1); + dumpit("pass4", firstf, 1); /* * pass 4.5 @@ -283,7 +296,8 @@ loop2: mask = ~0ULL; // can't rely on C to shift by 64 else mask = (1ULL<f.link) { + for(f = firstf; f != nil; f = f->link) { + r = (Reg*)f->data; r->regu = (r->refbehind.b[0] | r->set.b[0]) & mask; r->set.b[0] &= ~mask; r->use1.b[0] &= ~mask; @@ -297,47 +311,49 @@ loop2: } if(debug['R'] && debug['v']) - dumpit("pass4.5", &firstr->f, 1); + dumpit("pass4.5", firstf, 1); /* * pass 5 * isolate regions * calculate costs (paint1) */ - r = firstr; - if(r) { + f = firstf; + if(f) { + r = (Reg*)f->data; for(z=0; zrefahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) && !r->f.refset) { + if(bany(&bit) && !f->refset) { // should never happen - all variables are preset if(debug['w']) - print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; + print("%L: used and not set: %Q\n", f->prog->lineno, bit); + f->refset = 1; } } - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->act = zbits; + for(f = firstf; f != nil; f = f->link) + ((Reg*)f->data)->act = zbits; rgp = region; nregion = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { + for(f = firstf; f != nil; f = f->link) { + r = (Reg*)f->data; for(z=0; zset.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); - if(bany(&bit) && !r->f.refset) { + if(bany(&bit) && !f->refset) { if(debug['w']) - print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - arch.excise(&r->f); + print("%L: set and not used: %Q\n", f->prog->lineno, bit); + f->refset = 1; + arch.excise(f); } for(z=0; zact.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); - rgp->enter = r; + rgp->enter = f; rgp->varno = i; change = 0; - paint1(r, i); + paint1(f, i); biclr(&bit, i); if(change <= 0) continue; @@ -355,7 +371,7 @@ brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) - dumpit("pass5", &firstr->f, 1); + dumpit("pass5", firstf, 1); /* * pass 6 @@ -367,7 +383,7 @@ brk: print("\nregisterizing\n"); for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); + print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->prog->pc); bit = blsh(rgp->varno); usedreg = paint2(rgp->enter, rgp->varno, 0); vreg = allreg(usedreg, rgp); @@ -390,15 +406,15 @@ brk: for(i=0; iopt = nil; flowend(g); - firstr = R; + firstf = nil; if(debug['R'] && debug['v']) { // Rebuild flow graph, since we inserted instructions - g = flowstart(firstp, sizeof(Reg)); - firstr = (Reg*)g->start; - dumpit("pass6", &firstr->f, 1); + g = flowstart(firstp, 0); + firstf = g->start; + dumpit("pass6", firstf, 0); flowend(g); - firstr = R; + firstf = nil; } /* @@ -447,37 +463,37 @@ brk: } static void -walkvardef(Node *n, Reg *r, int active) +walkvardef(Node *n, Flow *f, int active) { - Reg *r1, *r2; + Flow *f1, *f2; int bn; Var *v; - for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { - if(r1->f.active == active) + for(f1=f; f1!=nil; f1=f1->s1) { + if(f1->active == active) break; - r1->f.active = active; - if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) + f1->active = active; + if(f1->prog->as == AVARKILL && f1->prog->to.node == n) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - biset(&r1->act, bn); + biset(&((Reg*)f1->data)->act, bn); } - if(r1->f.prog->as == ACALL) + if(f1->prog->as == ACALL) break; } - for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) - if(r2->f.s2 != nil) - walkvardef(n, (Reg*)r2->f.s2, active); + for(f2=f; f2!=f1; f2=f2->s1) + if(f2->s2 != nil) + walkvardef(n, f2->s2, active); } /* * add mov b,rn * just after r */ -void -addmove(Reg *r, int bn, int rn, int f) +static void +addmove(Flow *r, int bn, int rn, int f) { Prog *p, *p1; Adr *a; @@ -487,7 +503,7 @@ addmove(Reg *r, int bn, int rn, int f) clearp(p1); p1->pc = 9999; - p = r->f.prog; + p = r->prog; p1->link = p->link; p->link = p1; p1->lineno = p->lineno; @@ -540,8 +556,8 @@ overlap(int64 o1, int w1, int64 o2, int w2) return 1; } -Bits -mkvar(Reg *r, Adr *a) +static Bits +mkvar(Flow *f, Adr *a) { Var *v; int i, n, et, z, flag; @@ -550,6 +566,8 @@ mkvar(Reg *r, Adr *a) int64 o; Bits bit; Node *node; + Reg *r; + /* * mark registers used @@ -557,8 +575,8 @@ mkvar(Reg *r, Adr *a) if(a->type == TYPE_NONE) goto none; - if(r != R) - r->use1.b[0] |= arch.doregbits(a->index); // TODO: Use RtoB + r = (Reg*)f->data; + r->use1.b[0] |= arch.doregbits(a->index); // TODO: Use RtoB switch(a->type) { default: @@ -574,7 +592,7 @@ mkvar(Reg *r, Adr *a) if(arch.thechar == '9' || arch.thechar == '5') goto memcase; a->type = TYPE_MEM; - bit = mkvar(r, a); + bit = mkvar(f, a); setaddrs(bit); a->type = TYPE_ADDR; ostats.naddr++; @@ -729,14 +747,16 @@ none: return zbits; } -void -prop(Reg *r, Bits ref, Bits cal) +static void +prop(Flow *f, Bits ref, Bits cal) { - Reg *r1, *r2; + Flow *f1, *f2; + Reg *r, *r1; int z, i, j; Var *v, *v1; - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { + for(f1 = f; f1 != nil; f1 = f1->p1) { + r1 = (Reg*)f1->data; for(z=0; zrefahead.b[z]; if(ref.b[z] != r1->refahead.b[z]) { @@ -749,9 +769,9 @@ prop(Reg *r, Bits ref, Bits cal) change++; } } - switch(r1->f.prog->as) { + switch(f1->prog->as) { case ACALL: - if(noreturn(r1->f.prog)) + if(noreturn(f1->prog)) break; // Mark all input variables (ivar) as used, because that's what the @@ -831,22 +851,27 @@ prop(Reg *r, Bits ref, Bits cal) r1->refbehind.b[z] = ref.b[z]; r1->calbehind.b[z] = cal.b[z]; } - if(r1->f.active) + if(f1->active) break; - r1->f.active = 1; + f1->active = 1; + } + + for(; f != f1; f = f->p1) { + r = (Reg*)f->data; + for(f2 = f->p2; f2 != nil; f2 = f2->p2link) + prop(f2, r->refbehind, r->calbehind); } - for(; r != r1; r = (Reg*)r->f.p1) - for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) - prop(r2, r->refbehind, r->calbehind); } -void -synch(Reg *r, Bits dif) +static void +synch(Flow *f, Bits dif) { + Flow *f1; Reg *r1; int z; - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { + for(f1 = f; f1 != nil; f1 = f1->s1) { + r1 = (Reg*)f1->data; for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | @@ -856,17 +881,17 @@ synch(Reg *r, Bits dif) change++; } } - if(r1->f.active) + if(f1->active) break; - r1->f.active = 1; + f1->active = 1; for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); - if(r1->f.s2 != nil) - synch((Reg*)r1->f.s2, dif); + if(f1->s2 != nil) + synch(f1->s2, dif); } } -uint64 +static uint64 allreg(uint64 b, Rgn *r) { Var *v; @@ -913,61 +938,66 @@ allreg(uint64 b, Rgn *r) return 0; } -void -paint1(Reg *r, int bn) +static void +paint1(Flow *f, int bn) { - Reg *r1; + Flow *f1; + Reg *r, *r1; int z; uint64 bb; z = bn/64; bb = 1LL<<(bn%64); + r = (Reg*)f->data; if(r->act.b[z] & bb) return; for(;;) { if(!(r->refbehind.b[z] & bb)) break; - r1 = (Reg*)r->f.p1; - if(r1 == R) + f1 = f->p1; + if(f1 == nil) break; + r1 = (Reg*)f1->data; if(!(r1->refahead.b[z] & bb)) break; if(r1->act.b[z] & bb) break; + f = f1; r = r1; } if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { - change -= CLOAD * r->f.loop; + change -= CLOAD * f->loop; } for(;;) { r->act.b[z] |= bb; - if(r->f.prog->as != ANOP) { // don't give credit for NOPs + if(f->prog->as != ANOP) { // don't give credit for NOPs if(r->use1.b[z] & bb) - change += CREF * r->f.loop; + change += CREF * f->loop; if((r->use2.b[z]|r->set.b[z]) & bb) - change += CREF * r->f.loop; + change += CREF * f->loop; } if(STORE(r) & r->regdiff.b[z] & bb) { - change -= CLOAD * r->f.loop; + change -= CLOAD * f->loop; } if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint1(r1, bn); + for(f1 = f->p2; f1 != nil; f1 = f1->p2link) + if(((Reg*)f1->data)->refahead.b[z] & bb) + paint1(f1, bn); if(!(r->refahead.b[z] & bb)) break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint1(r1, bn); - r = (Reg*)r->f.s1; - if(r == R) + f1 = f->s2; + if(f1 != nil) + if(((Reg*)f1->data)->refbehind.b[z] & bb) + paint1(f1, bn); + f = f->s1; + if(f == nil) break; + r = (Reg*)f->data; if(r->act.b[z] & bb) break; if(!(r->refbehind.b[z] & bb)) @@ -975,52 +1005,57 @@ paint1(Reg *r, int bn) } } -uint64 -paint2(Reg *r, int bn, int depth) +static uint64 +paint2(Flow *f, int bn, int depth) { - Reg *r1; + Flow *f1; + Reg *r, *r1; int z; uint64 bb, vreg; z = bn/64; bb = 1LL << (bn%64); vreg = regbits; + r = (Reg*)f->data; if(!(r->act.b[z] & bb)) return vreg; for(;;) { if(!(r->refbehind.b[z] & bb)) break; - r1 = (Reg*)r->f.p1; - if(r1 == R) + f1 = f->p1; + if(f1 == nil) break; + r1 = (Reg*)f1->data; if(!(r1->refahead.b[z] & bb)) break; if(!(r1->act.b[z] & bb)) break; + f = f1; r = r1; } for(;;) { if(debug['R'] && debug['v']) - print(" paint2 %d %P\n", depth, r->f.prog); + print(" paint2 %d %P\n", depth, f->prog); r->act.b[z] &= ~bb; vreg |= r->regu; if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); + for(f1 = f->p2; f1 != nil; f1 = f1->p2link) + if(((Reg*)f1->data)->refahead.b[z] & bb) + vreg |= paint2(f1, bn, depth+1); if(!(r->refahead.b[z] & bb)) break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - r = (Reg*)r->f.s1; - if(r == R) + f1 = f->s2; + if(f1 != nil) + if(((Reg*)f1->data)->refbehind.b[z] & bb) + vreg |= paint2(f1, bn, depth+1); + f = f->s1; + if(f == nil) break; + r = (Reg*)f->data; if(!(r->act.b[z] & bb)) break; if(!(r->refbehind.b[z] & bb)) @@ -1030,36 +1065,40 @@ paint2(Reg *r, int bn, int depth) return vreg; } -void -paint3(Reg *r, int bn, uint64 rb, int rn) +static void +paint3(Flow *f, int bn, uint64 rb, int rn) { - Reg *r1; + Flow *f1; + Reg *r, *r1; Prog *p; int z; uint64 bb; z = bn/64; bb = 1LL << (bn%64); + r = (Reg*)f->data; if(r->act.b[z] & bb) return; for(;;) { if(!(r->refbehind.b[z] & bb)) break; - r1 = (Reg*)r->f.p1; - if(r1 == R) + f1 = f->p1; + if(f1 == nil) break; + r1 = (Reg*)f1->data; if(!(r1->refahead.b[z] & bb)) break; if(r1->act.b[z] & bb) break; + f = f1; r = r1; } if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) - addmove(r, bn, rn, 0); + addmove(f, bn, rn, 0); for(;;) { r->act.b[z] |= bb; - p = r->f.prog; + p = f->prog; if(r->use1.b[z] & bb) { if(debug['R'] && debug['v']) @@ -1077,23 +1116,24 @@ paint3(Reg *r, int bn, uint64 rb, int rn) } if(STORE(r) & r->regdiff.b[z] & bb) - addmove(r, bn, rn, 1); + addmove(f, bn, rn, 1); r->regu |= rb; if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint3(r1, bn, rb, rn); + for(f1 = f->p2; f1 != nil; f1 = f1->p2link) + if(((Reg*)f1->data)->refahead.b[z] & bb) + paint3(f1, bn, rb, rn); if(!(r->refahead.b[z] & bb)) break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint3(r1, bn, rb, rn); - r = (Reg*)r->f.s1; - if(r == R) + f1 = f->s2; + if(f1 != nil) + if(((Reg*)f1->data)->refbehind.b[z] & bb) + paint3(f1, bn, rb, rn); + f = f->s1; + if(f == nil) break; + r = (Reg*)f->data; if(r->act.b[z] & bb) break; if(!(r->refbehind.b[z] & bb)) @@ -1101,7 +1141,7 @@ paint3(Reg *r, int bn, uint64 rb, int rn) } } -void +static void addreg(Adr *a, int rn) { a->sym = nil; @@ -1123,7 +1163,7 @@ dumpone(Flow *f, int isreg) print("%d:%P", f->loop, f->prog); if(isreg) { - r = (Reg*)f; + r = (Reg*)f->data; for(z=0; zset.b[z] |