From 0e97f4187e4326e6d323ce07610efdae13e7fcc8 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 20 Dec 2013 14:24:48 -0500 Subject: [PATCH] =?UTF-8?q?cmd/gc:=20address=201=C2=BD=20liveness=20bottle?= =?UTF-8?q?necks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As much as 7x speedup on some programs, cuts all.bash time by 20%. Change splicebefore function from O(n) to O(1). The approach was suggested by Carl during the code's review but apparently did not make it into the tree. It makes a huge difference on huge programs. Make twobitwalktype1 slightly faster by using & instead of %. Really it needs to be cached; left a note to that effect. (Not a complete fix, hence the ½.) big.go (output of test/chan/select5.go) 47.53u 0.50s 48.14r before this CL 7.09u 0.47s 7.59r with splicebefore change (6.7x speedup) 6.15u 0.42s 6.59r with twobitwalktype1 change (1.15x speedup; total 7.7x) slow.go (variant of program in go.text, by mpvl) 77.75u 2.11s 80.03r before this CL 24.40u 1.97s 26.44r with splicebefore change (3.2x speedup) 18.12u 2.19s 20.38r with twobitwalktype1 change (1.35x speedup; total 4.3x) test/run 150.63u 49.57s 81.08r before this CL 88.01u 45.60s 46.65r after this CL (1.7x speedup) all.bash 369.70u 115.64s 256.21r before this CL 298.52u 110.35s 214.67r after this CL (1.24x speedup) The test programs are at https://rsc.googlecode.com/hg/testdata/big.go (36k lines, 276kB) https://rsc.googlecode.com/hg/testdata/slow.go (7k lines, 352kB) R=golang-codereviews, gobot, r CC=cshapiro, golang-codereviews https://golang.org/cl/43210045 --- src/cmd/gc/plive.c | 67 +++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/src/cmd/gc/plive.c b/src/cmd/gc/plive.c index fd2888e4d2..23f39c5c51 100644 --- a/src/cmd/gc/plive.c +++ b/src/cmd/gc/plive.c @@ -149,29 +149,43 @@ addedge(BasicBlock *from, BasicBlock *to) arrayadd(to->pred, &from); } -// Inserts a new instruction ahead of an existing instruction in the instruction +// Inserts prev before curr in the instruction // stream. Any control flow, such as branches or fall throughs, that target the // existing instruction are adjusted to target the new instruction. static void splicebefore(Liveness *lv, BasicBlock *bb, Prog *prev, Prog *curr) { - Prog *p; + Prog *next, tmp; - prev->opt = curr->opt; - curr->opt = prev; - prev->link = curr; - if(prev->opt != nil) - ((Prog*)prev->opt)->link = prev; - else - bb->first = prev; - for(p = lv->ptxt; p != nil; p = p->link) { - if(p != prev) { - if(p->link == curr) - p->link = prev; - if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch == curr) - p->to.u.branch = prev; - } - } + USED(lv); + + // There may be other instructions pointing at curr, + // and we want them to now point at prev. Instead of + // trying to find all such instructions, swap the contents + // so that the problem becomes inserting next after curr. + // The "opt" field is the backward link in the linked list. + + // Overwrite curr's data with prev, but keep the list links. + tmp = *curr; + *curr = *prev; + curr->opt = tmp.opt; + curr->link = tmp.link; + + // Overwrite prev (now next) with curr's old data. + next = prev; + *next = tmp; + next->opt = nil; + next->link = nil; + + // Now insert next after curr. + next->link = curr->link; + next->opt = curr; + curr->link = next; + if(next->link && next->link->opt == curr) + next->link->opt = next; + + if(bb->last == curr) + bb->last = next; } // A pretty printer for basic blocks. @@ -949,6 +963,10 @@ checkptxt(Node *fn, Prog *firstp) } } +// NOTE: The bitmap for a specific type t should be cached in t after the first run +// and then simply copied into bv at the correct offset on future calls with +// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, twobitwalktype1 +// accounts for 40% of the 6g execution time. static void twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) { @@ -957,7 +975,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) vlong o; Type *t1; - if(t->align > 0 && (*xoffset % t->align) != 0) + if(t->align > 0 && (*xoffset & (t->align - 1)) != 0) fatal("twobitwalktype1: invalid initial alignment, %T", t); switch(t->etype) { @@ -986,7 +1004,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) case TFUNC: case TCHAN: case TMAP: - if(*xoffset % widthptr != 0) + if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); bvset(bv, (*xoffset / widthptr) * BitsPerPointer); *xoffset += t->width; @@ -994,7 +1012,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) case TSTRING: // struct { byte *str; intgo len; } - if(*xoffset % widthptr != 0) + if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); bvset(bv, (*xoffset / widthptr) * BitsPerPointer); *xoffset += t->width; @@ -1004,7 +1022,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) // struct { Itab *tab; union { void *ptr, uintptr val } data; } // or, when isnilinter(t)==true: // struct { Type *type; union { void *ptr, uintptr val } data; } - if(*xoffset % widthptr != 0) + if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 1); if(isnilinter(t)) @@ -1019,7 +1037,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) fatal("twobitwalktype1: invalid bound, %T", t); if(isslice(t)) { // struct { byte *array; uintgo len; uintgo cap; } - if(*xoffset % widthptr != 0) + if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid TARRAY alignment, %T", t); bvset(bv, (*xoffset / widthptr) * BitsPerPointer); *xoffset += t->width; @@ -1285,7 +1303,7 @@ livenessepilogue(Liveness *lv) Bvec *varkill; Bvec *args; Bvec *locals; - Prog *p; + Prog *p, *next; int32 i; int32 nvars; int32 pos; @@ -1329,7 +1347,8 @@ livenessepilogue(Liveness *lv) fatal("livenessepilogue"); } - for(p = bb->last; p != nil; p = p->opt) { + for(p = bb->last; p != nil; p = next) { + next = p->opt; // splicebefore modifies p->opt // Propagate liveness information progeffects(p, lv->vars, uevar, varkill); bvcopy(liveout, livein);