From 7910cd68b55c445f2babae683aea97ba20daffbb Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 9 Aug 2013 23:10:58 -0400 Subject: [PATCH] cmd/gc: zero pointers on entry to function On entry to a function, zero the results and zero the pointer section of the local variables. This is an intermediate step on the way to precise collection of Go frames. This can incur a significant (up to 30%) slowdown, but it also ensures that the garbage collector never looks at a word in a Go frame and sees a stale pointer value that could cause a space leak. (C frames and assembly frames are still possibly problematic.) This CL is required to start making collection of interface values as precise as collection of pointer values are today. Since we have to dereference the interface type to understand whether the value is a pointer, it is critical that the type field be initialized. A future CL by Carl will make the garbage collection pointer bitmaps context-sensitive. At that point it will be possible to remove most of the zeroing. The only values that will still need zeroing are values whose addresses escape the block scoping of the function but do not escape to the heap. benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4420289180 4331060459 -2.02% BenchmarkFannkuch11 3442469663 3277706251 -4.79% BenchmarkFmtFprintfEmpty 100 142 +42.00% BenchmarkFmtFprintfString 262 310 +18.32% BenchmarkFmtFprintfInt 213 281 +31.92% BenchmarkFmtFprintfIntInt 355 431 +21.41% BenchmarkFmtFprintfPrefixedInt 321 383 +19.31% BenchmarkFmtFprintfFloat 444 533 +20.05% BenchmarkFmtManyArgs 1380 1559 +12.97% BenchmarkGobDecode 10240054 11794915 +15.18% BenchmarkGobEncode 17350274 19970478 +15.10% BenchmarkGzip 455179460 460699139 +1.21% BenchmarkGunzip 114271814 119291574 +4.39% BenchmarkHTTPClientServer 89051 89894 +0.95% BenchmarkJSONEncode 40486799 52691558 +30.15% BenchmarkJSONDecode 94193361 112428781 +19.36% BenchmarkMandelbrot200 4747060 4748043 +0.02% BenchmarkGoParse 6363798 6675098 +4.89% BenchmarkRegexpMatchEasy0_32 129 171 +32.56% BenchmarkRegexpMatchEasy0_1K 365 395 +8.22% BenchmarkRegexpMatchEasy1_32 106 152 +43.40% BenchmarkRegexpMatchEasy1_1K 952 1245 +30.78% BenchmarkRegexpMatchMedium_32 198 283 +42.93% BenchmarkRegexpMatchMedium_1K 79006 101097 +27.96% BenchmarkRegexpMatchHard_32 3478 5115 +47.07% BenchmarkRegexpMatchHard_1K 110245 163582 +48.38% BenchmarkRevcomp 777384355 793270857 +2.04% BenchmarkTemplate 136713089 157093609 +14.91% BenchmarkTimeParse 1511 1761 +16.55% BenchmarkTimeFormat 535 850 +58.88% benchmark old MB/s new MB/s speedup BenchmarkGobDecode 74.95 65.07 0.87x BenchmarkGobEncode 44.24 38.43 0.87x BenchmarkGzip 42.63 42.12 0.99x BenchmarkGunzip 169.81 162.67 0.96x BenchmarkJSONEncode 47.93 36.83 0.77x BenchmarkJSONDecode 20.60 17.26 0.84x BenchmarkGoParse 9.10 8.68 0.95x BenchmarkRegexpMatchEasy0_32 247.24 186.31 0.75x BenchmarkRegexpMatchEasy0_1K 2799.20 2591.93 0.93x BenchmarkRegexpMatchEasy1_32 299.31 210.44 0.70x BenchmarkRegexpMatchEasy1_1K 1074.71 822.45 0.77x BenchmarkRegexpMatchMedium_32 5.04 3.53 0.70x BenchmarkRegexpMatchMedium_1K 12.96 10.13 0.78x BenchmarkRegexpMatchHard_32 9.20 6.26 0.68x BenchmarkRegexpMatchHard_1K 9.29 6.26 0.67x BenchmarkRevcomp 326.95 320.40 0.98x BenchmarkTemplate 14.19 12.35 0.87x R=cshapiro CC=golang-dev https://golang.org/cl/12616045 --- src/cmd/5g/ggen.c | 62 +++++++++++++++++++++++++++++++++++++++++++++-- src/cmd/6g/ggen.c | 45 ++++++++++++++++++++++++++++++++-- src/cmd/8g/ggen.c | 45 ++++++++++++++++++++++++++++++++-- src/cmd/gc/go.h | 2 +- src/cmd/gc/pgen.c | 20 ++++++++------- src/cmd/gc/walk.c | 4 ++- 6 files changed, 161 insertions(+), 17 deletions(-) diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c index 288c38588d..8946a9e51e 100644 --- a/src/cmd/5g/ggen.c +++ b/src/cmd/5g/ggen.c @@ -9,9 +9,15 @@ #include "gg.h" #include "opt.h" +static Prog* appendp(Prog*, int, int, int, int32, int, int, int32); + void -defframe(Prog *ptxt) +defframe(Prog *ptxt, Bvec *bv) { + int i, first; + uint32 frame; + Prog *p, *p1; + // fill in argument size ptxt->to.type = D_CONST2; ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); @@ -19,8 +25,60 @@ defframe(Prog *ptxt) // fill in final stack size if(stksize > maxstksize) maxstksize = stksize; - ptxt->to.offset = rnd(maxstksize+maxarg, widthptr); + frame = rnd(maxstksize+maxarg, widthptr); + ptxt->to.offset = frame; maxstksize = 0; + + // insert code to clear pointered part of the frame, + // so that garbage collector only sees initialized values + // when it looks for pointers. + p = ptxt; + while(p->link->as == AFUNCDATA || p->link->as == APCDATA || p->link->as == ATYPE) + p = p->link; + if(stkptrsize >= 8*widthptr) { + p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0); + p = appendp(p, AADD, D_CONST, NREG, 4+frame-stkptrsize, D_REG, 1, 0); + p->reg = REGSP; + p = appendp(p, AADD, D_CONST, NREG, stkptrsize, D_REG, 2, 0); + p->reg = 1; + p1 = p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4); + p->scond |= C_PBIT; + p = appendp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0); + p->reg = 2; + p = appendp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0); + patch(p, p1); + } else { + first = 1; + for(i=0; ias = as; + q->lineno = p->lineno; + q->from.type = ftype; + q->from.reg = freg; + q->from.offset = foffset; + q->to.type = ttype; + q->to.reg = treg; + q->to.offset = toffset; + q->link = p->link; + p->link = q; + return q; } // Sweep the prog list to mark any used nodes. diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index ec558f2e61..b0ef88cb95 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -9,15 +9,56 @@ #include "gg.h" #include "opt.h" +static Prog* appendp(Prog*, int, int, vlong, int, vlong); + void -defframe(Prog *ptxt) +defframe(Prog *ptxt, Bvec *bv) { + int i; + uint32 frame; + Prog *p; + // fill in argument size ptxt->to.offset = rnd(curfn->type->argwid, widthptr); // fill in final stack size ptxt->to.offset <<= 32; - ptxt->to.offset |= rnd(stksize+maxarg, widthptr); + frame = rnd(stksize+maxarg, widthptr); + ptxt->to.offset |= frame; + + // insert code to clear pointered part of the frame, + // so that garbage collector only sees initialized values + // when it looks for pointers. + p = ptxt; + if(stkptrsize >= 8*widthptr) { + p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0); + p = appendp(p, AMOVQ, D_CONST, stkptrsize/widthptr, D_CX, 0); + p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0); + p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); + appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); + } else { + for(i=0; ias = as; + q->lineno = p->lineno; + q->from.type = ftype; + q->from.offset = foffset; + q->to.type = ttype; + q->to.offset = toffset; + q->link = p->link; + p->link = q; + return q; } // Sweep the prog list to mark any used nodes. diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c index 1677d9529b..cbe7a5e55e 100644 --- a/src/cmd/8g/ggen.c +++ b/src/cmd/8g/ggen.c @@ -9,17 +9,58 @@ #include "gg.h" #include "opt.h" +static Prog* appendp(Prog*, int, int, int32, int, int32); + void -defframe(Prog *ptxt) +defframe(Prog *ptxt, Bvec *bv) { + uint32 frame; + Prog *p; + int i; + // fill in argument size ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); // fill in final stack size if(stksize > maxstksize) maxstksize = stksize; - ptxt->to.offset = rnd(maxstksize+maxarg, widthptr); + frame = rnd(maxstksize+maxarg, widthptr); + ptxt->to.offset = frame; maxstksize = 0; + + // insert code to clear pointered part of the frame, + // so that garbage collector only sees initialized values + // when it looks for pointers. + p = ptxt; + if(stkptrsize >= 8*widthptr) { + p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0); + p = appendp(p, AMOVL, D_CONST, stkptrsize/widthptr, D_CX, 0); + p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0); + p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); + appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0); + } else { + for(i=0; ias = as; + q->lineno = p->lineno; + q->from.type = ftype; + q->from.offset = foffset; + q->to.type = ttype; + q->to.offset = toffset; + q->link = p->link; + p->link = q; + return q; } // Sweep the prog list to mark any used nodes. diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 05c864a1c5..6679fa855d 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1458,7 +1458,7 @@ void cgen_callinter(Node *n, Node *res, int proc); void cgen_ret(Node *n); void clearfat(Node *n); void compile(Node*); -void defframe(Prog*); +void defframe(Prog*, Bvec*); int dgostringptr(Sym*, int off, char *str); int dgostrlitptr(Sym*, int off, Strlit*); int dstringptr(Sym *s, int off, char *str); diff --git a/src/cmd/gc/pgen.c b/src/cmd/gc/pgen.c index 6595abce42..d465ab5c3c 100644 --- a/src/cmd/gc/pgen.c +++ b/src/cmd/gc/pgen.c @@ -12,11 +12,12 @@ enum { BitsPerPointer = 2 }; static void allocauto(Prog* p); static void dumpgcargs(Node*, Sym*); -static void dumpgclocals(Node*, Sym*); +static Bvec* dumpgclocals(Node*, Sym*); void compile(Node *fn) { + Bvec *bv; Plist *pl; Node nod1, *n, *gcargsnod, *gclocalsnod; Prog *ptxt, *p, *p1; @@ -179,15 +180,16 @@ compile(Node *fn) goto ret; } - defframe(ptxt); + // Emit garbage collection symbols. + dumpgcargs(fn, gcargssym); + bv = dumpgclocals(curfn, gclocalssym); + + defframe(ptxt, bv); + free(bv); if(0) frame(0); - // Emit garbage collection symbols. - dumpgcargs(fn, gcargssym); - dumpgclocals(curfn, gclocalssym); - ret: lineno = lno; } @@ -329,8 +331,8 @@ dumpgcargs(Node *fn, Sym *sym) // Compute a bit vector to describes the pointer containing locations // in local variables and dumps the bitvector length and data out to -// the provided symbol. -static void +// the provided symbol. Returns the vector for use and freeing by caller. +static Bvec* dumpgclocals(Node* fn, Sym *sym) { Bvec *bv; @@ -354,8 +356,8 @@ dumpgclocals(Node* fn, Sym *sym) for(i = 0; i < bv->n; i += 32) { off = duint32(sym, off, bv->b[i/32]); } - free(bv); ggloblsym(sym, off, 0, 1); + return bv; } // Sort the list of stack variables. Autos after anything else, diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c index 033b041f3c..7e5f678028 100644 --- a/src/cmd/gc/walk.c +++ b/src/cmd/gc/walk.c @@ -2309,7 +2309,9 @@ paramstoheap(Type **argin, int out) v = t->nname; if(v && v->sym && v->sym->name[0] == '~') v = N; - if(v == N && out && hasdefer) { + // The garbage collector assumes results are always live, + // so zero them always (1 ||). + if(out && (1 || (v == N && hasdefer))) { // Defer might stop a panic and show the // return values as they exist at the time of panic. // Make sure to zero them on entry to the function.