mirror of
https://github.com/golang/go
synced 2024-11-12 03:10:22 -07:00
cmd/gc: zero pointers on entry to function
On entry to a function, zero the results and zero the pointer section of the local variables. This is an intermediate step on the way to precise collection of Go frames. This can incur a significant (up to 30%) slowdown, but it also ensures that the garbage collector never looks at a word in a Go frame and sees a stale pointer value that could cause a space leak. (C frames and assembly frames are still possibly problematic.) This CL is required to start making collection of interface values as precise as collection of pointer values are today. Since we have to dereference the interface type to understand whether the value is a pointer, it is critical that the type field be initialized. A future CL by Carl will make the garbage collection pointer bitmaps context-sensitive. At that point it will be possible to remove most of the zeroing. The only values that will still need zeroing are values whose addresses escape the block scoping of the function but do not escape to the heap. benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4420289180 4331060459 -2.02% BenchmarkFannkuch11 3442469663 3277706251 -4.79% BenchmarkFmtFprintfEmpty 100 142 +42.00% BenchmarkFmtFprintfString 262 310 +18.32% BenchmarkFmtFprintfInt 213 281 +31.92% BenchmarkFmtFprintfIntInt 355 431 +21.41% BenchmarkFmtFprintfPrefixedInt 321 383 +19.31% BenchmarkFmtFprintfFloat 444 533 +20.05% BenchmarkFmtManyArgs 1380 1559 +12.97% BenchmarkGobDecode 10240054 11794915 +15.18% BenchmarkGobEncode 17350274 19970478 +15.10% BenchmarkGzip 455179460 460699139 +1.21% BenchmarkGunzip 114271814 119291574 +4.39% BenchmarkHTTPClientServer 89051 89894 +0.95% BenchmarkJSONEncode 40486799 52691558 +30.15% BenchmarkJSONDecode 94193361 112428781 +19.36% BenchmarkMandelbrot200 4747060 4748043 +0.02% BenchmarkGoParse 6363798 6675098 +4.89% BenchmarkRegexpMatchEasy0_32 129 171 +32.56% BenchmarkRegexpMatchEasy0_1K 365 395 +8.22% BenchmarkRegexpMatchEasy1_32 106 152 +43.40% BenchmarkRegexpMatchEasy1_1K 952 1245 +30.78% BenchmarkRegexpMatchMedium_32 198 283 +42.93% BenchmarkRegexpMatchMedium_1K 79006 101097 +27.96% BenchmarkRegexpMatchHard_32 3478 5115 +47.07% BenchmarkRegexpMatchHard_1K 110245 163582 +48.38% BenchmarkRevcomp 777384355 793270857 +2.04% BenchmarkTemplate 136713089 157093609 +14.91% BenchmarkTimeParse 1511 1761 +16.55% BenchmarkTimeFormat 535 850 +58.88% benchmark old MB/s new MB/s speedup BenchmarkGobDecode 74.95 65.07 0.87x BenchmarkGobEncode 44.24 38.43 0.87x BenchmarkGzip 42.63 42.12 0.99x BenchmarkGunzip 169.81 162.67 0.96x BenchmarkJSONEncode 47.93 36.83 0.77x BenchmarkJSONDecode 20.60 17.26 0.84x BenchmarkGoParse 9.10 8.68 0.95x BenchmarkRegexpMatchEasy0_32 247.24 186.31 0.75x BenchmarkRegexpMatchEasy0_1K 2799.20 2591.93 0.93x BenchmarkRegexpMatchEasy1_32 299.31 210.44 0.70x BenchmarkRegexpMatchEasy1_1K 1074.71 822.45 0.77x BenchmarkRegexpMatchMedium_32 5.04 3.53 0.70x BenchmarkRegexpMatchMedium_1K 12.96 10.13 0.78x BenchmarkRegexpMatchHard_32 9.20 6.26 0.68x BenchmarkRegexpMatchHard_1K 9.29 6.26 0.67x BenchmarkRevcomp 326.95 320.40 0.98x BenchmarkTemplate 14.19 12.35 0.87x R=cshapiro CC=golang-dev https://golang.org/cl/12616045
This commit is contained in:
parent
207289660a
commit
7910cd68b5
@ -9,9 +9,15 @@
|
||||
#include "gg.h"
|
||||
#include "opt.h"
|
||||
|
||||
static Prog* appendp(Prog*, int, int, int, int32, int, int, int32);
|
||||
|
||||
void
|
||||
defframe(Prog *ptxt)
|
||||
defframe(Prog *ptxt, Bvec *bv)
|
||||
{
|
||||
int i, first;
|
||||
uint32 frame;
|
||||
Prog *p, *p1;
|
||||
|
||||
// fill in argument size
|
||||
ptxt->to.type = D_CONST2;
|
||||
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
|
||||
@ -19,8 +25,60 @@ defframe(Prog *ptxt)
|
||||
// fill in final stack size
|
||||
if(stksize > maxstksize)
|
||||
maxstksize = stksize;
|
||||
ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
|
||||
frame = rnd(maxstksize+maxarg, widthptr);
|
||||
ptxt->to.offset = frame;
|
||||
maxstksize = 0;
|
||||
|
||||
// insert code to clear pointered part of the frame,
|
||||
// so that garbage collector only sees initialized values
|
||||
// when it looks for pointers.
|
||||
p = ptxt;
|
||||
while(p->link->as == AFUNCDATA || p->link->as == APCDATA || p->link->as == ATYPE)
|
||||
p = p->link;
|
||||
if(stkptrsize >= 8*widthptr) {
|
||||
p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
|
||||
p = appendp(p, AADD, D_CONST, NREG, 4+frame-stkptrsize, D_REG, 1, 0);
|
||||
p->reg = REGSP;
|
||||
p = appendp(p, AADD, D_CONST, NREG, stkptrsize, D_REG, 2, 0);
|
||||
p->reg = 1;
|
||||
p1 = p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
|
||||
p->scond |= C_PBIT;
|
||||
p = appendp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
|
||||
p->reg = 2;
|
||||
p = appendp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
|
||||
patch(p, p1);
|
||||
} else {
|
||||
first = 1;
|
||||
for(i=0; i<stkptrsize; i+=widthptr) {
|
||||
if(bvget(bv, i/widthptr)) {
|
||||
if(first) {
|
||||
p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
|
||||
first = 0;
|
||||
}
|
||||
p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame-stkptrsize+i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Prog*
|
||||
appendp(Prog *p, int as, int ftype, int freg, int32 foffset, int ttype, int treg, int32 toffset)
|
||||
{
|
||||
Prog *q;
|
||||
|
||||
q = mal(sizeof(*q));
|
||||
clearp(q);
|
||||
q->as = as;
|
||||
q->lineno = p->lineno;
|
||||
q->from.type = ftype;
|
||||
q->from.reg = freg;
|
||||
q->from.offset = foffset;
|
||||
q->to.type = ttype;
|
||||
q->to.reg = treg;
|
||||
q->to.offset = toffset;
|
||||
q->link = p->link;
|
||||
p->link = q;
|
||||
return q;
|
||||
}
|
||||
|
||||
// Sweep the prog list to mark any used nodes.
|
||||
|
@ -9,15 +9,56 @@
|
||||
#include "gg.h"
|
||||
#include "opt.h"
|
||||
|
||||
static Prog* appendp(Prog*, int, int, vlong, int, vlong);
|
||||
|
||||
void
|
||||
defframe(Prog *ptxt)
|
||||
defframe(Prog *ptxt, Bvec *bv)
|
||||
{
|
||||
int i;
|
||||
uint32 frame;
|
||||
Prog *p;
|
||||
|
||||
// fill in argument size
|
||||
ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
|
||||
|
||||
// fill in final stack size
|
||||
ptxt->to.offset <<= 32;
|
||||
ptxt->to.offset |= rnd(stksize+maxarg, widthptr);
|
||||
frame = rnd(stksize+maxarg, widthptr);
|
||||
ptxt->to.offset |= frame;
|
||||
|
||||
// insert code to clear pointered part of the frame,
|
||||
// so that garbage collector only sees initialized values
|
||||
// when it looks for pointers.
|
||||
p = ptxt;
|
||||
if(stkptrsize >= 8*widthptr) {
|
||||
p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
|
||||
p = appendp(p, AMOVQ, D_CONST, stkptrsize/widthptr, D_CX, 0);
|
||||
p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
|
||||
p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
|
||||
appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
|
||||
} else {
|
||||
for(i=0; i<stkptrsize; i+=widthptr)
|
||||
if(bvget(bv, i/widthptr))
|
||||
p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
|
||||
}
|
||||
}
|
||||
|
||||
static Prog*
|
||||
appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
|
||||
{
|
||||
Prog *q;
|
||||
|
||||
q = mal(sizeof(*q));
|
||||
clearp(q);
|
||||
q->as = as;
|
||||
q->lineno = p->lineno;
|
||||
q->from.type = ftype;
|
||||
q->from.offset = foffset;
|
||||
q->to.type = ttype;
|
||||
q->to.offset = toffset;
|
||||
q->link = p->link;
|
||||
p->link = q;
|
||||
return q;
|
||||
}
|
||||
|
||||
// Sweep the prog list to mark any used nodes.
|
||||
|
@ -9,17 +9,58 @@
|
||||
#include "gg.h"
|
||||
#include "opt.h"
|
||||
|
||||
static Prog* appendp(Prog*, int, int, int32, int, int32);
|
||||
|
||||
void
|
||||
defframe(Prog *ptxt)
|
||||
defframe(Prog *ptxt, Bvec *bv)
|
||||
{
|
||||
uint32 frame;
|
||||
Prog *p;
|
||||
int i;
|
||||
|
||||
// fill in argument size
|
||||
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
|
||||
|
||||
// fill in final stack size
|
||||
if(stksize > maxstksize)
|
||||
maxstksize = stksize;
|
||||
ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
|
||||
frame = rnd(maxstksize+maxarg, widthptr);
|
||||
ptxt->to.offset = frame;
|
||||
maxstksize = 0;
|
||||
|
||||
// insert code to clear pointered part of the frame,
|
||||
// so that garbage collector only sees initialized values
|
||||
// when it looks for pointers.
|
||||
p = ptxt;
|
||||
if(stkptrsize >= 8*widthptr) {
|
||||
p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0);
|
||||
p = appendp(p, AMOVL, D_CONST, stkptrsize/widthptr, D_CX, 0);
|
||||
p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
|
||||
p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
|
||||
appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
|
||||
} else {
|
||||
for(i=0; i<stkptrsize; i+=widthptr)
|
||||
if(bvget(bv, i/widthptr))
|
||||
p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
|
||||
}
|
||||
}
|
||||
|
||||
static Prog*
|
||||
appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset)
|
||||
{
|
||||
Prog *q;
|
||||
|
||||
q = mal(sizeof(*q));
|
||||
clearp(q);
|
||||
q->as = as;
|
||||
q->lineno = p->lineno;
|
||||
q->from.type = ftype;
|
||||
q->from.offset = foffset;
|
||||
q->to.type = ttype;
|
||||
q->to.offset = toffset;
|
||||
q->link = p->link;
|
||||
p->link = q;
|
||||
return q;
|
||||
}
|
||||
|
||||
// Sweep the prog list to mark any used nodes.
|
||||
|
@ -1458,7 +1458,7 @@ void cgen_callinter(Node *n, Node *res, int proc);
|
||||
void cgen_ret(Node *n);
|
||||
void clearfat(Node *n);
|
||||
void compile(Node*);
|
||||
void defframe(Prog*);
|
||||
void defframe(Prog*, Bvec*);
|
||||
int dgostringptr(Sym*, int off, char *str);
|
||||
int dgostrlitptr(Sym*, int off, Strlit*);
|
||||
int dstringptr(Sym *s, int off, char *str);
|
||||
|
@ -12,11 +12,12 @@ enum { BitsPerPointer = 2 };
|
||||
|
||||
static void allocauto(Prog* p);
|
||||
static void dumpgcargs(Node*, Sym*);
|
||||
static void dumpgclocals(Node*, Sym*);
|
||||
static Bvec* dumpgclocals(Node*, Sym*);
|
||||
|
||||
void
|
||||
compile(Node *fn)
|
||||
{
|
||||
Bvec *bv;
|
||||
Plist *pl;
|
||||
Node nod1, *n, *gcargsnod, *gclocalsnod;
|
||||
Prog *ptxt, *p, *p1;
|
||||
@ -179,15 +180,16 @@ compile(Node *fn)
|
||||
goto ret;
|
||||
}
|
||||
|
||||
defframe(ptxt);
|
||||
// Emit garbage collection symbols.
|
||||
dumpgcargs(fn, gcargssym);
|
||||
bv = dumpgclocals(curfn, gclocalssym);
|
||||
|
||||
defframe(ptxt, bv);
|
||||
free(bv);
|
||||
|
||||
if(0)
|
||||
frame(0);
|
||||
|
||||
// Emit garbage collection symbols.
|
||||
dumpgcargs(fn, gcargssym);
|
||||
dumpgclocals(curfn, gclocalssym);
|
||||
|
||||
ret:
|
||||
lineno = lno;
|
||||
}
|
||||
@ -329,8 +331,8 @@ dumpgcargs(Node *fn, Sym *sym)
|
||||
|
||||
// Compute a bit vector to describes the pointer containing locations
|
||||
// in local variables and dumps the bitvector length and data out to
|
||||
// the provided symbol.
|
||||
static void
|
||||
// the provided symbol. Returns the vector for use and freeing by caller.
|
||||
static Bvec*
|
||||
dumpgclocals(Node* fn, Sym *sym)
|
||||
{
|
||||
Bvec *bv;
|
||||
@ -354,8 +356,8 @@ dumpgclocals(Node* fn, Sym *sym)
|
||||
for(i = 0; i < bv->n; i += 32) {
|
||||
off = duint32(sym, off, bv->b[i/32]);
|
||||
}
|
||||
free(bv);
|
||||
ggloblsym(sym, off, 0, 1);
|
||||
return bv;
|
||||
}
|
||||
|
||||
// Sort the list of stack variables. Autos after anything else,
|
||||
|
@ -2309,7 +2309,9 @@ paramstoheap(Type **argin, int out)
|
||||
v = t->nname;
|
||||
if(v && v->sym && v->sym->name[0] == '~')
|
||||
v = N;
|
||||
if(v == N && out && hasdefer) {
|
||||
// The garbage collector assumes results are always live,
|
||||
// so zero them always (1 ||).
|
||||
if(out && (1 || (v == N && hasdefer))) {
|
||||
// Defer might stop a panic and show the
|
||||
// return values as they exist at the time of panic.
|
||||
// Make sure to zero them on entry to the function.
|
||||
|
Loading…
Reference in New Issue
Block a user