1
0
mirror of https://github.com/golang/go synced 2024-11-12 07:00:21 -07:00

cmd/gc: address 1½ liveness bottlenecks

As much as 7x speedup on some programs, cuts all.bash time by 20%.

Change splicebefore function from O(n) to O(1).
The approach was suggested by Carl during the code's review
but apparently did not make it into the tree.
It makes a huge difference on huge programs.

Make twobitwalktype1 slightly faster by using & instead of %.
Really it needs to be cached; left a note to that effect.
(Not a complete fix, hence the ½.)

big.go (output of test/chan/select5.go)
 47.53u   0.50s  48.14r before this CL
  7.09u   0.47s   7.59r with splicebefore change (6.7x speedup)
  6.15u   0.42s   6.59r with twobitwalktype1 change (1.15x speedup; total 7.7x)

slow.go (variant of program in go.text, by mpvl)
 77.75u   2.11s  80.03r before this CL
 24.40u   1.97s  26.44r with splicebefore change (3.2x speedup)
 18.12u   2.19s  20.38r with twobitwalktype1 change (1.35x speedup; total 4.3x)

test/run
150.63u  49.57s  81.08r before this CL
 88.01u  45.60s  46.65r after this CL (1.7x speedup)

all.bash
369.70u 115.64s 256.21r before this CL
298.52u 110.35s 214.67r after this CL (1.24x speedup)

The test programs are at
https://rsc.googlecode.com/hg/testdata/big.go (36k lines, 276kB)
https://rsc.googlecode.com/hg/testdata/slow.go (7k lines, 352kB)

R=golang-codereviews, gobot, r
CC=cshapiro, golang-codereviews
https://golang.org/cl/43210045
This commit is contained in:
Russ Cox 2013-12-20 14:24:48 -05:00
parent 4acb70d377
commit 0e97f4187e

View File

@ -149,29 +149,43 @@ addedge(BasicBlock *from, BasicBlock *to)
arrayadd(to->pred, &from);
}
// Inserts a new instruction ahead of an existing instruction in the instruction
// Inserts prev before curr in the instruction
// stream. Any control flow, such as branches or fall throughs, that target the
// existing instruction are adjusted to target the new instruction.
static void
splicebefore(Liveness *lv, BasicBlock *bb, Prog *prev, Prog *curr)
{
Prog *p;
Prog *next, tmp;
prev->opt = curr->opt;
curr->opt = prev;
prev->link = curr;
if(prev->opt != nil)
((Prog*)prev->opt)->link = prev;
else
bb->first = prev;
for(p = lv->ptxt; p != nil; p = p->link) {
if(p != prev) {
if(p->link == curr)
p->link = prev;
if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch == curr)
p->to.u.branch = prev;
}
}
USED(lv);
// There may be other instructions pointing at curr,
// and we want them to now point at prev. Instead of
// trying to find all such instructions, swap the contents
// so that the problem becomes inserting next after curr.
// The "opt" field is the backward link in the linked list.
// Overwrite curr's data with prev, but keep the list links.
tmp = *curr;
*curr = *prev;
curr->opt = tmp.opt;
curr->link = tmp.link;
// Overwrite prev (now next) with curr's old data.
next = prev;
*next = tmp;
next->opt = nil;
next->link = nil;
// Now insert next after curr.
next->link = curr->link;
next->opt = curr;
curr->link = next;
if(next->link && next->link->opt == curr)
next->link->opt = next;
if(bb->last == curr)
bb->last = next;
}
// A pretty printer for basic blocks.
@ -949,6 +963,10 @@ checkptxt(Node *fn, Prog *firstp)
}
}
// NOTE: The bitmap for a specific type t should be cached in t after the first run
// and then simply copied into bv at the correct offset on future calls with
// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, twobitwalktype1
// accounts for 40% of the 6g execution time.
static void
twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
{
@ -957,7 +975,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
vlong o;
Type *t1;
if(t->align > 0 && (*xoffset % t->align) != 0)
if(t->align > 0 && (*xoffset & (t->align - 1)) != 0)
fatal("twobitwalktype1: invalid initial alignment, %T", t);
switch(t->etype) {
@ -986,7 +1004,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
case TFUNC:
case TCHAN:
case TMAP:
if(*xoffset % widthptr != 0)
if((*xoffset & (widthptr-1)) != 0)
fatal("twobitwalktype1: invalid alignment, %T", t);
bvset(bv, (*xoffset / widthptr) * BitsPerPointer);
*xoffset += t->width;
@ -994,7 +1012,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
case TSTRING:
// struct { byte *str; intgo len; }
if(*xoffset % widthptr != 0)
if((*xoffset & (widthptr-1)) != 0)
fatal("twobitwalktype1: invalid alignment, %T", t);
bvset(bv, (*xoffset / widthptr) * BitsPerPointer);
*xoffset += t->width;
@ -1004,7 +1022,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
// struct { Itab *tab; union { void *ptr, uintptr val } data; }
// or, when isnilinter(t)==true:
// struct { Type *type; union { void *ptr, uintptr val } data; }
if(*xoffset % widthptr != 0)
if((*xoffset & (widthptr-1)) != 0)
fatal("twobitwalktype1: invalid alignment, %T", t);
bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 1);
if(isnilinter(t))
@ -1019,7 +1037,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
fatal("twobitwalktype1: invalid bound, %T", t);
if(isslice(t)) {
// struct { byte *array; uintgo len; uintgo cap; }
if(*xoffset % widthptr != 0)
if((*xoffset & (widthptr-1)) != 0)
fatal("twobitwalktype1: invalid TARRAY alignment, %T", t);
bvset(bv, (*xoffset / widthptr) * BitsPerPointer);
*xoffset += t->width;
@ -1285,7 +1303,7 @@ livenessepilogue(Liveness *lv)
Bvec *varkill;
Bvec *args;
Bvec *locals;
Prog *p;
Prog *p, *next;
int32 i;
int32 nvars;
int32 pos;
@ -1329,7 +1347,8 @@ livenessepilogue(Liveness *lv)
fatal("livenessepilogue");
}
for(p = bb->last; p != nil; p = p->opt) {
for(p = bb->last; p != nil; p = next) {
next = p->opt; // splicebefore modifies p->opt
// Propagate liveness information
progeffects(p, lv->vars, uevar, varkill);
bvcopy(liveout, livein);