1
0
mirror of https://github.com/golang/go synced 2024-11-25 02:57:57 -07:00

8g: compute register liveness during regopt

Input code like

0000 (x.go:2) TEXT    main+0(SB),$36-0
0001 (x.go:3) MOVL    $5,i+-8(SP)
0002 (x.go:3) MOVL    $0,i+-4(SP)
0003 (x.go:4) MOVL    $1,BX
0004 (x.go:4) MOVL    i+-8(SP),AX
0005 (x.go:4) MOVL    i+-4(SP),DX
0006 (x.go:4) MOVL    AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL    DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL    autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL    autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE     ,13
0011 (x.go:4) CMPL    CX,$32
0012 (x.go:4) JCS     ,14
0013 (x.go:4) MOVL    $0,BX
0014 (x.go:4) SHLL    CX,BX
0015 (x.go:4) MOVL    BX,x+-12(SP)
0016 (x.go:5) MOVL    x+-12(SP),AX
0017 (x.go:5) CDQ     ,
0018 (x.go:5) MOVL    AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL    DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL    autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL    autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL    AX,(SP)
0023 (x.go:5) MOVL    DX,4(SP)
0024 (x.go:5) CALL    ,runtime.printint+0(SB)
0025 (x.go:5) CALL    ,runtime.printnl+0(SB)
0026 (x.go:6) RET     ,

is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.

The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.

The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information.  With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.

This is not specific to the 386, but it only happens in
generated code of the form

        load R1
        ...
        load var into R2
        ...
        store R2 back into var
        ...
        use R1

and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores.  Even so, this may not be the case everywhere,
so the change is worth making in all three.

R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
This commit is contained in:
Russ Cox 2011-06-03 14:10:39 -04:00
parent 79b397b27e
commit 84f291b1bd
4 changed files with 398 additions and 49 deletions

View File

@ -32,6 +32,8 @@
#include "gg.h"
#include "opt.h"
#define NREGVAR 24
#define REGBITS ((uint32)0xffffff)
#define P2R(p) (Reg*)(p->reg)
void addsplits(void);
@ -128,6 +130,33 @@ setaddrs(Bits bit)
}
}
static char* regname[] = {
".R0",
".R1",
".R2",
".R3",
".R4",
".R5",
".R6",
".R7",
".R8",
".R9",
".R10",
".R11",
".R12",
".R13",
".R14",
".R15",
".F0",
".F1",
".F2",
".F3",
".F4",
".F5",
".F6",
".F7",
};
void
regopt(Prog *firstp)
{
@ -164,7 +193,17 @@ regopt(Prog *firstp)
r1 = R;
firstr = R;
lastr = R;
nvar = 0;
/*
* control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for
* registers, so we have complete register usage information.
*/
nvar = NREGVAR;
memset(var, 0, NREGVAR*sizeof var[0]);
for(i=0; i<NREGVAR; i++)
var[i].sym = lookup(regname[i]);
regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
@ -224,6 +263,16 @@ regopt(Prog *firstp)
for(z=0; z<BITS; z++)
r->use1.b[z] |= bit.b[z];
/*
* middle always read when present
*/
if(p->reg != NREG) {
if(p->from.type != D_FREG)
r->use1.b[0] |= RtoB(p->reg);
else
r->use1.b[0] |= FtoB(p->reg);
}
/*
* right side depends on opcode
*/
@ -234,17 +283,89 @@ regopt(Prog *firstp)
yyerror("reg: unknown op: %A", p->as);
break;
/*
* right side read
*/
case ATST:
case ATEQ:
case ACMP:
case ACMN:
case ACMPD:
case ACMPF:
rightread:
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
break;
/*
* right side read or read+write, depending on middle
* ADD x, z => z += x
* ADD x, y, z => z = x + y
*/
case AADD:
case AAND:
case AEOR:
case ASUB:
case ARSB:
case AADC:
case ASBC:
case ARSC:
case AORR:
case ABIC:
case ASLL:
case ASRL:
case ASRA:
case AMUL:
case AMULU:
case ADIV:
case AMOD:
case AMODU:
case ADIVU:
if(p->reg != NREG)
goto rightread;
// fall through
/*
* right side read+write
*/
case AADDF:
case AADDD:
case ASUBF:
case ASUBD:
case AMULF:
case AMULD:
case ADIVF:
case ADIVD:
case AMULAL:
case AMULALU:
for(z=0; z<BITS; z++) {
r->use2.b[z] |= bit.b[z];
r->set.b[z] |= bit.b[z];
}
break;
/*
* right side write
*/
case ANOP:
case AMOVB:
case AMOVBU:
case AMOVD:
case AMOVDF:
case AMOVDW:
case AMOVF:
case AMOVFW:
case AMOVH:
case AMOVHU:
case AMOVW:
case AMOVF:
case AMOVD:
case AMOVWD:
case AMOVWF:
case AMVN:
case AMULL:
case AMULLU:
if((p->scond & C_SCOND) != C_SCOND_NONE)
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
break;
@ -396,6 +517,24 @@ loop2:
}
}
/*
* pass 4.5
* move register pseudo-variables into regu.
*/
for(r = firstr; r != R; r = r->link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS;
r->use1.b[0] &= ~REGBITS;
r->use2.b[0] &= ~REGBITS;
r->refbehind.b[0] &= ~REGBITS;
r->refahead.b[0] &= ~REGBITS;
r->calbehind.b[0] &= ~REGBITS;
r->calahead.b[0] &= ~REGBITS;
r->regdiff.b[0] &= ~REGBITS;
r->act.b[0] &= ~REGBITS;
}
/*
* pass 5
* isolate regions
@ -715,21 +854,40 @@ mkvar(Reg *r, Adr *a)
goto onereg;
case D_REGREG:
bit = zbits;
if(a->offset != NREG)
r->regu |= RtoB(a->offset);
goto onereg;
bit.b[0] |= RtoB(a->offset);
if(a->reg != NREG)
bit.b[0] |= RtoB(a->reg);
return bit;
case D_REG:
case D_SHIFT:
case D_OREG:
onereg:
if(a->reg != NREG)
r->regu |= RtoB(a->reg);
if(a->reg != NREG) {
bit = zbits;
bit.b[0] = RtoB(a->reg);
return bit;
}
break;
case D_OREG:
if(a->reg != NREG) {
if(a == &r->prog->from)
r->use1.b[0] |= RtoB(a->reg);
else
r->use2.b[0] |= RtoB(a->reg);
if(r->prog->scond & (C_PBIT|C_WBIT))
r->set.b[0] |= RtoB(a->reg);
}
break;
case D_FREG:
if(a->reg != NREG)
r->regu |= FtoB(a->reg);
if(a->reg != NREG) {
bit = zbits;
bit.b[0] = FtoB(a->reg);
return bit;
}
break;
}

View File

@ -33,6 +33,8 @@
#define EXTERN
#include "opt.h"
#define NREGVAR 32 /* 16 general + 16 floating */
#define REGBITS ((uint32)0xffffffff)
#define P2R(p) (Reg*)(p->reg)
static int first = 1;
@ -114,6 +116,41 @@ setaddrs(Bits bit)
}
}
static char* regname[] = {
".AX",
".CX",
".DX",
".BX",
".SP",
".BP",
".SI",
".DI",
".R8",
".R9",
".R10",
".R11",
".R12",
".R13",
".R14",
".R15",
".X0",
".X1",
".X2",
".X3",
".X4",
".X5",
".X6",
".X7",
".X8",
".X9",
".X10",
".X11",
".X12",
".X13",
".X14",
".X15",
};
void
regopt(Prog *firstp)
{
@ -143,6 +180,17 @@ regopt(Prog *firstp)
firstr = R;
lastr = R;
nvar = 0;
/*
* control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for
* registers, so we have complete register usage information.
*/
nvar = NREGVAR;
memset(var, 0, NREGVAR*sizeof var[0]);
for(i=0; i<NREGVAR; i++)
var[i].sym = lookup(regname[i]);
regbits = RtoB(D_SP);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
@ -247,6 +295,9 @@ regopt(Prog *firstp)
case ACOMISD:
case AUCOMISS:
case AUCOMISD:
case ATESTB:
case ATESTL:
case ATESTQ:
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
break;
@ -254,6 +305,7 @@ regopt(Prog *firstp)
/*
* right side write
*/
case ALEAQ:
case ANOP:
case AMOVL:
case AMOVQ:
@ -261,6 +313,8 @@ regopt(Prog *firstp)
case AMOVW:
case AMOVBLSX:
case AMOVBLZX:
case AMOVBWSX:
case AMOVBWZX:
case AMOVBQSX:
case AMOVBQZX:
case AMOVLQSX:
@ -269,6 +323,7 @@ regopt(Prog *firstp)
case AMOVWLZX:
case AMOVWQSX:
case AMOVWQZX:
case APOPQ:
case AMOVSS:
case AMOVSD:
@ -357,6 +412,8 @@ regopt(Prog *firstp)
case AIMULL:
case AIMULQ:
case AIMULW:
case ANEGB:
case ANEGW:
case ANEGL:
case ANEGQ:
case ANOTL:
@ -366,6 +423,23 @@ regopt(Prog *firstp)
case ASBBL:
case ASBBQ:
case ASETCC:
case ASETCS:
case ASETEQ:
case ASETGE:
case ASETGT:
case ASETHI:
case ASETLE:
case ASETLS:
case ASETLT:
case ASETMI:
case ASETNE:
case ASETOC:
case ASETOS:
case ASETPC:
case ASETPL:
case ASETPS:
case AXCHGB:
case AXCHGW:
case AXCHGL:
@ -411,32 +485,44 @@ regopt(Prog *firstp)
if(p->to.type != D_NONE)
break;
case AIDIVB:
case AIDIVL:
case AIDIVQ:
case AIDIVW:
case AIDIVQ:
case ADIVL:
case ADIVW:
case ADIVQ:
case AMULL:
case AMULW:
case AMULQ:
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
break;
case AIDIVB:
case AIMULB:
case ADIVB:
case ADIVL:
case ADIVQ:
case ADIVW:
case AMULB:
case AMULL:
case AMULQ:
case AMULW:
case AMULB:
r->set.b[0] |= RtoB(D_AX);
r->use1.b[0] |= RtoB(D_AX);
break;
case ACWD:
case ACDQ:
case ACQO:
r->regu |= RtoB(D_AX) | RtoB(D_DX);
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX);
break;
case ACDQ:
r->set.b[0] |= RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX);
break;
case AREP:
case AREPN:
case ALOOP:
case ALOOPEQ:
case ALOOPNE:
r->regu |= RtoB(D_CX);
r->set.b[0] |= RtoB(D_CX);
r->use1.b[0] |= RtoB(D_CX);
break;
case AMOVSB:
@ -447,7 +533,8 @@ regopt(Prog *firstp)
case ACMPSL:
case ACMPSQ:
case ACMPSW:
r->regu |= RtoB(D_SI) | RtoB(D_DI);
r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
break;
case ASTOSB:
@ -458,16 +545,22 @@ regopt(Prog *firstp)
case ASCASL:
case ASCASQ:
case ASCASW:
r->regu |= RtoB(D_AX) | RtoB(D_DI);
r->set.b[0] |= RtoB(D_DI);
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
break;
case AINSB:
case AINSL:
case AINSW:
r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
r->use1.b[0] |= RtoB(D_DI);
break;
case AOUTSB:
case AOUTSL:
case AOUTSW:
r->regu |= RtoB(D_DI) | RtoB(D_DX);
r->set.b[0] |= RtoB(D_DI);
r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
break;
}
}
@ -573,6 +666,24 @@ loop2:
if(debug['R'] && debug['v'])
dumpit("pass4", firstr);
/*
* pass 4.5
* move register pseudo-variables into regu.
*/
for(r = firstr; r != R; r = r->link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS;
r->use1.b[0] &= ~REGBITS;
r->use2.b[0] &= ~REGBITS;
r->refbehind.b[0] &= ~REGBITS;
r->refahead.b[0] &= ~REGBITS;
r->calbehind.b[0] &= ~REGBITS;
r->calahead.b[0] &= ~REGBITS;
r->regdiff.b[0] &= ~REGBITS;
r->act.b[0] &= ~REGBITS;
}
/*
* pass 5
* isolate regions
@ -818,6 +929,7 @@ mkvar(Reg *r, Adr *a)
{
Var *v;
int i, t, n, et, z, w, flag;
uint32 regu;
int32 o;
Bits bit;
Sym *s;
@ -829,14 +941,17 @@ mkvar(Reg *r, Adr *a)
if(t == D_NONE)
goto none;
if(r != R) {
r->regu |= doregbits(t);
r->regu |= doregbits(a->index);
}
if(r != R)
r->use1.b[0] |= doregbits(a->index);
switch(t) {
default:
goto none;
regu = doregbits(t);
if(regu == 0)
goto none;
bit = zbits;
bit.b[0] = regu;
return bit;
case D_ADDR:
a->type = a->index;

View File

@ -33,6 +33,8 @@
#define EXTERN
#include "opt.h"
#define NREGVAR 8
#define REGBITS ((uint32)0xff)
#define P2R(p) (Reg*)(p->reg)
static int first = 1;
@ -114,6 +116,8 @@ setaddrs(Bits bit)
}
}
static char* regname[] = { ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di" };
void
regopt(Prog *firstp)
{
@ -142,7 +146,17 @@ regopt(Prog *firstp)
r1 = R;
firstr = R;
lastr = R;
nvar = 0;
/*
* control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for
* registers, so we have complete register usage information.
*/
nvar = NREGVAR;
memset(var, 0, NREGVAR*sizeof var[0]);
for(i=0; i<NREGVAR; i++)
var[i].sym = lookup(regname[i]);
regbits = RtoB(D_SP);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
@ -249,14 +263,19 @@ regopt(Prog *firstp)
/*
* right side write
*/
case AFSTSW:
case ALEAL:
case ANOP:
case AMOVL:
case AMOVB:
case AMOVW:
case AMOVBLSX:
case AMOVBLZX:
case AMOVBWSX:
case AMOVBWZX:
case AMOVWLSX:
case AMOVWLZX:
case APOPL:
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
break;
@ -321,6 +340,23 @@ regopt(Prog *firstp)
case AADCL:
case ASBBL:
case ASETCC:
case ASETCS:
case ASETEQ:
case ASETGE:
case ASETGT:
case ASETHI:
case ASETLE:
case ASETLS:
case ASETLT:
case ASETMI:
case ASETNE:
case ASETOC:
case ASETOS:
case ASETPC:
case ASETPL:
case ASETPS:
case AXCHGB:
case AXCHGW:
case AXCHGL:
@ -349,20 +385,32 @@ regopt(Prog *firstp)
if(p->to.type != D_NONE)
break;
case AIDIVB:
case AIDIVL:
case AIDIVW:
case AIMULB:
case ADIVB:
case ADIVL:
case ADIVW:
case AMULB:
case AMULL:
case AMULW:
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
break;
case AIDIVB:
case AIMULB:
case ADIVB:
case AMULB:
r->set.b[0] |= RtoB(D_AX);
r->use1.b[0] |= RtoB(D_AX);
break;
case ACWD:
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX);
break;
case ACDQ:
r->regu |= RtoB(D_AX) | RtoB(D_DX);
r->set.b[0] |= RtoB(D_DX);
r->use1.b[0] |= RtoB(D_AX);
break;
case AREP:
@ -370,7 +418,8 @@ regopt(Prog *firstp)
case ALOOP:
case ALOOPEQ:
case ALOOPNE:
r->regu |= RtoB(D_CX);
r->set.b[0] |= RtoB(D_CX);
r->use1.b[0] |= RtoB(D_CX);
break;
case AMOVSB:
@ -379,7 +428,8 @@ regopt(Prog *firstp)
case ACMPSB:
case ACMPSL:
case ACMPSW:
r->regu |= RtoB(D_SI) | RtoB(D_DI);
r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
break;
case ASTOSB:
@ -388,16 +438,22 @@ regopt(Prog *firstp)
case ASCASB:
case ASCASL:
case ASCASW:
r->regu |= RtoB(D_AX) | RtoB(D_DI);
r->set.b[0] |= RtoB(D_DI);
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
break;
case AINSB:
case AINSL:
case AINSW:
r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
r->use1.b[0] |= RtoB(D_DI);
break;
case AOUTSB:
case AOUTSL:
case AOUTSW:
r->regu |= RtoB(D_DI) | RtoB(D_DX);
r->set.b[0] |= RtoB(D_DI);
r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
break;
}
}
@ -503,6 +559,24 @@ loop2:
if(debug['R'] && debug['v'])
dumpit("pass4", firstr);
/*
* pass 4.5
* move register pseudo-variables into regu.
*/
for(r = firstr; r != R; r = r->link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS;
r->use1.b[0] &= ~REGBITS;
r->use2.b[0] &= ~REGBITS;
r->refbehind.b[0] &= ~REGBITS;
r->refahead.b[0] &= ~REGBITS;
r->calbehind.b[0] &= ~REGBITS;
r->calahead.b[0] &= ~REGBITS;
r->regdiff.b[0] &= ~REGBITS;
r->act.b[0] &= ~REGBITS;
}
/*
* pass 5
* isolate regions
@ -732,7 +806,7 @@ Bits
mkvar(Reg *r, Adr *a)
{
Var *v;
int i, t, n, et, z, w, flag;
int i, t, n, et, z, w, flag, regu;
int32 o;
Bits bit;
Sym *s;
@ -744,14 +818,17 @@ mkvar(Reg *r, Adr *a)
if(t == D_NONE)
goto none;
if(r != R) {
r->regu |= doregbits(t);
r->regu |= doregbits(a->index);
}
if(r != R)
r->use1.b[0] |= doregbits(a->index);
switch(t) {
default:
goto none;
regu = doregbits(t);
if(regu == 0)
goto none;
bit = zbits;
bit.b[0] = regu;
return bit;
case D_ADDR:
a->type = a->index;

View File

@ -703,5 +703,4 @@ tempname(Node *n, Type *t)
if(thechar == '5')
stksize = rnd(stksize, widthptr);
n->xoffset = -stksize;
n->pun = anyregalloc();
}