2008-11-18 20:24:37 -07:00
|
|
|
// Derived from Inferno utils/6c/reg.c
|
|
|
|
// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
|
|
|
|
//
|
|
|
|
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
|
|
|
|
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
// Portions Copyright © 1997-1999 Vita Nuova Limited
|
|
|
|
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
|
|
|
|
// Portions Copyright © 2004,2006 Bruce Ellis
|
|
|
|
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
|
|
|
|
// Portions Copyright © 2009 The Go Authors. All rights reserved.
|
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
// THE SOFTWARE.
|
|
|
|
|
|
|
|
#include "gg.h"
|
|
|
|
#undef EXTERN
|
|
|
|
#define EXTERN
|
|
|
|
#include "opt.h"
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
#define NREGVAR 32 /* 16 general + 16 floating */
|
|
|
|
#define REGBITS ((uint32)0xffffffff)
|
2008-11-18 20:24:37 -07:00
|
|
|
#define P2R(p) (Reg*)(p->reg)
|
|
|
|
|
2008-11-19 10:49:06 -07:00
|
|
|
static int first = 1;
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
Reg*
|
|
|
|
rega(void)
|
|
|
|
{
|
|
|
|
Reg *r;
|
|
|
|
|
|
|
|
r = freer;
|
|
|
|
if(r == R) {
|
|
|
|
r = mal(sizeof(*r));
|
|
|
|
} else
|
|
|
|
freer = r->link;
|
|
|
|
|
|
|
|
*r = zreg;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
rcmp(const void *a1, const void *a2)
|
|
|
|
{
|
|
|
|
Rgn *p1, *p2;
|
|
|
|
int c1, c2;
|
|
|
|
|
|
|
|
p1 = (Rgn*)a1;
|
|
|
|
p2 = (Rgn*)a2;
|
|
|
|
c1 = p2->cost;
|
|
|
|
c2 = p1->cost;
|
|
|
|
if(c1 -= c2)
|
|
|
|
return c1;
|
|
|
|
return p2->varno - p1->varno;
|
|
|
|
}
|
|
|
|
|
2009-12-11 16:55:09 -07:00
|
|
|
static void
|
2008-11-22 18:58:53 -07:00
|
|
|
setoutvar(void)
|
|
|
|
{
|
|
|
|
Type *t;
|
|
|
|
Node *n;
|
|
|
|
Addr a;
|
|
|
|
Iter save;
|
|
|
|
Bits bit;
|
|
|
|
int z;
|
|
|
|
|
|
|
|
t = structfirst(&save, getoutarg(curfn->type));
|
|
|
|
while(t != T) {
|
|
|
|
n = nodarg(t, 1);
|
|
|
|
a = zprog.from;
|
2009-10-20 09:03:43 -06:00
|
|
|
naddr(n, &a, 0);
|
2008-11-22 18:58:53 -07:00
|
|
|
bit = mkvar(R, &a);
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
ovar.b[z] |= bit.b[z];
|
|
|
|
t = structnext(&save);
|
|
|
|
}
|
|
|
|
//if(bany(b))
|
|
|
|
//print("ovars = %Q\n", &ovar);
|
|
|
|
}
|
|
|
|
|
2009-12-11 16:55:09 -07:00
|
|
|
static void
|
|
|
|
setaddrs(Bits bit)
|
|
|
|
{
|
2009-12-12 15:36:52 -07:00
|
|
|
int i, n;
|
|
|
|
Var *v;
|
|
|
|
Sym *s;
|
|
|
|
|
|
|
|
while(bany(&bit)) {
|
|
|
|
// convert each bit to a variable
|
|
|
|
i = bnum(bit);
|
|
|
|
s = var[i].sym;
|
|
|
|
n = var[i].name;
|
|
|
|
bit.b[i/32] &= ~(1L<<(i%32));
|
|
|
|
|
|
|
|
// disable all pieces of that variable
|
|
|
|
for(i=0; i<nvar; i++) {
|
|
|
|
v = var+i;
|
|
|
|
if(v->sym == s && v->name == n)
|
|
|
|
v->addr = 2;
|
|
|
|
}
|
|
|
|
}
|
2009-12-11 16:55:09 -07:00
|
|
|
}
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
static char* regname[] = {
|
|
|
|
".AX",
|
|
|
|
".CX",
|
|
|
|
".DX",
|
|
|
|
".BX",
|
|
|
|
".SP",
|
|
|
|
".BP",
|
|
|
|
".SI",
|
|
|
|
".DI",
|
|
|
|
".R8",
|
|
|
|
".R9",
|
|
|
|
".R10",
|
|
|
|
".R11",
|
|
|
|
".R12",
|
|
|
|
".R13",
|
|
|
|
".R14",
|
|
|
|
".R15",
|
|
|
|
".X0",
|
|
|
|
".X1",
|
|
|
|
".X2",
|
|
|
|
".X3",
|
|
|
|
".X4",
|
|
|
|
".X5",
|
|
|
|
".X6",
|
|
|
|
".X7",
|
|
|
|
".X8",
|
|
|
|
".X9",
|
|
|
|
".X10",
|
|
|
|
".X11",
|
|
|
|
".X12",
|
|
|
|
".X13",
|
|
|
|
".X14",
|
|
|
|
".X15",
|
|
|
|
};
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
void
|
|
|
|
regopt(Prog *firstp)
|
|
|
|
{
|
2008-11-19 10:49:06 -07:00
|
|
|
Reg *r, *r1;
|
|
|
|
Prog *p;
|
2008-11-18 20:24:37 -07:00
|
|
|
int i, z, nr;
|
|
|
|
uint32 vreg;
|
|
|
|
Bits bit;
|
|
|
|
|
|
|
|
if(first) {
|
|
|
|
fmtinstall('Q', Qconv);
|
2008-12-14 18:06:06 -07:00
|
|
|
exregoffset = D_R13; // R14,R15 are external
|
2008-11-18 20:24:37 -07:00
|
|
|
first = 0;
|
|
|
|
}
|
|
|
|
|
2009-08-29 21:33:21 -06:00
|
|
|
// count instructions
|
|
|
|
nr = 0;
|
|
|
|
for(p=firstp; p!=P; p=p->link)
|
|
|
|
nr++;
|
|
|
|
// if too big dont bother
|
|
|
|
if(nr >= 10000) {
|
2009-08-30 12:36:42 -06:00
|
|
|
// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
|
2009-08-29 21:33:21 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-01-09 12:13:59 -07:00
|
|
|
r1 = R;
|
2008-11-18 20:24:37 -07:00
|
|
|
firstr = R;
|
|
|
|
lastr = R;
|
|
|
|
nvar = 0;
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* control flow is more complicated in generated go code
|
|
|
|
* than in generated c code. define pseudo-variables for
|
|
|
|
* registers, so we have complete register usage information.
|
|
|
|
*/
|
|
|
|
nvar = NREGVAR;
|
|
|
|
memset(var, 0, NREGVAR*sizeof var[0]);
|
|
|
|
for(i=0; i<NREGVAR; i++)
|
|
|
|
var[i].sym = lookup(regname[i]);
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
regbits = RtoB(D_SP);
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
externs.b[z] = 0;
|
|
|
|
params.b[z] = 0;
|
|
|
|
consts.b[z] = 0;
|
|
|
|
addrs.b[z] = 0;
|
2008-11-22 18:58:53 -07:00
|
|
|
ovar.b[z] = 0;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
// build list of return variables
|
|
|
|
setoutvar();
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
/*
|
|
|
|
* pass 1
|
|
|
|
* build aux data structure
|
|
|
|
* allocate pcs
|
|
|
|
* find use and set of variables
|
|
|
|
*/
|
|
|
|
nr = 0;
|
|
|
|
for(p=firstp; p!=P; p=p->link) {
|
|
|
|
switch(p->as) {
|
|
|
|
case ADATA:
|
|
|
|
case AGLOBL:
|
|
|
|
case ANAME:
|
|
|
|
case ASIGNAME:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
r = rega();
|
|
|
|
nr++;
|
|
|
|
if(firstr == R) {
|
|
|
|
firstr = r;
|
|
|
|
lastr = r;
|
|
|
|
} else {
|
|
|
|
lastr->link = r;
|
|
|
|
r->p1 = lastr;
|
|
|
|
lastr->s1 = r;
|
|
|
|
lastr = r;
|
|
|
|
}
|
|
|
|
r->prog = p;
|
|
|
|
p->reg = r;
|
|
|
|
|
|
|
|
r1 = r->p1;
|
|
|
|
if(r1 != R) {
|
|
|
|
switch(r1->prog->as) {
|
|
|
|
case ARET:
|
|
|
|
case AJMP:
|
|
|
|
case AIRETL:
|
|
|
|
case AIRETQ:
|
|
|
|
r->p1 = R;
|
|
|
|
r1->s1 = R;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bit = mkvar(r, &p->from);
|
|
|
|
if(bany(&bit))
|
|
|
|
switch(p->as) {
|
|
|
|
/*
|
|
|
|
* funny
|
|
|
|
*/
|
|
|
|
case ALEAL:
|
|
|
|
case ALEAQ:
|
2009-12-11 16:55:09 -07:00
|
|
|
setaddrs(bit);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* left side read
|
|
|
|
*/
|
|
|
|
default:
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->use1.b[z] |= bit.b[z];
|
|
|
|
break;
|
2009-01-13 14:46:09 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* left side read+write
|
|
|
|
*/
|
|
|
|
case AXCHGB:
|
|
|
|
case AXCHGW:
|
|
|
|
case AXCHGL:
|
|
|
|
case AXCHGQ:
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
r->use1.b[z] |= bit.b[z];
|
|
|
|
r->set.b[z] |= bit.b[z];
|
|
|
|
}
|
|
|
|
break;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bit = mkvar(r, &p->to);
|
|
|
|
if(bany(&bit))
|
|
|
|
switch(p->as) {
|
|
|
|
default:
|
|
|
|
yyerror("reg: unknown op: %A", p->as);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* right side read
|
|
|
|
*/
|
|
|
|
case ACMPB:
|
|
|
|
case ACMPL:
|
|
|
|
case ACMPQ:
|
|
|
|
case ACMPW:
|
|
|
|
case ACOMISS:
|
|
|
|
case ACOMISD:
|
|
|
|
case AUCOMISS:
|
|
|
|
case AUCOMISD:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ATESTB:
|
|
|
|
case ATESTL:
|
|
|
|
case ATESTQ:
|
2008-11-18 20:24:37 -07:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->use2.b[z] |= bit.b[z];
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* right side write
|
|
|
|
*/
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ALEAQ:
|
2008-11-18 20:24:37 -07:00
|
|
|
case ANOP:
|
|
|
|
case AMOVL:
|
|
|
|
case AMOVQ:
|
|
|
|
case AMOVB:
|
|
|
|
case AMOVW:
|
|
|
|
case AMOVBLSX:
|
|
|
|
case AMOVBLZX:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case AMOVBWSX:
|
|
|
|
case AMOVBWZX:
|
2008-11-18 20:24:37 -07:00
|
|
|
case AMOVBQSX:
|
|
|
|
case AMOVBQZX:
|
|
|
|
case AMOVLQSX:
|
|
|
|
case AMOVLQZX:
|
|
|
|
case AMOVWLSX:
|
|
|
|
case AMOVWLZX:
|
|
|
|
case AMOVWQSX:
|
|
|
|
case AMOVWQZX:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case APOPQ:
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
case AMOVSS:
|
|
|
|
case AMOVSD:
|
|
|
|
case ACVTSD2SL:
|
|
|
|
case ACVTSD2SQ:
|
|
|
|
case ACVTSD2SS:
|
|
|
|
case ACVTSL2SD:
|
|
|
|
case ACVTSL2SS:
|
|
|
|
case ACVTSQ2SD:
|
|
|
|
case ACVTSQ2SS:
|
|
|
|
case ACVTSS2SD:
|
|
|
|
case ACVTSS2SL:
|
|
|
|
case ACVTSS2SQ:
|
|
|
|
case ACVTTSD2SL:
|
|
|
|
case ACVTTSD2SQ:
|
|
|
|
case ACVTTSS2SL:
|
|
|
|
case ACVTTSS2SQ:
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->set.b[z] |= bit.b[z];
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* right side read+write
|
|
|
|
*/
|
2008-11-22 18:58:53 -07:00
|
|
|
case AINCB:
|
|
|
|
case AINCL:
|
|
|
|
case AINCQ:
|
|
|
|
case AINCW:
|
|
|
|
case ADECB:
|
|
|
|
case ADECL:
|
|
|
|
case ADECQ:
|
|
|
|
case ADECW:
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case AADDB:
|
|
|
|
case AADDL:
|
|
|
|
case AADDQ:
|
|
|
|
case AADDW:
|
|
|
|
case AANDB:
|
|
|
|
case AANDL:
|
|
|
|
case AANDQ:
|
|
|
|
case AANDW:
|
|
|
|
case ASUBB:
|
|
|
|
case ASUBL:
|
|
|
|
case ASUBQ:
|
|
|
|
case ASUBW:
|
|
|
|
case AORB:
|
|
|
|
case AORL:
|
|
|
|
case AORQ:
|
|
|
|
case AORW:
|
|
|
|
case AXORB:
|
|
|
|
case AXORL:
|
|
|
|
case AXORQ:
|
|
|
|
case AXORW:
|
|
|
|
case ASALB:
|
|
|
|
case ASALL:
|
|
|
|
case ASALQ:
|
|
|
|
case ASALW:
|
|
|
|
case ASARB:
|
|
|
|
case ASARL:
|
|
|
|
case ASARQ:
|
|
|
|
case ASARW:
|
2009-08-09 16:16:06 -06:00
|
|
|
case ARCLB:
|
|
|
|
case ARCLL:
|
|
|
|
case ARCLQ:
|
|
|
|
case ARCLW:
|
|
|
|
case ARCRB:
|
|
|
|
case ARCRL:
|
|
|
|
case ARCRQ:
|
|
|
|
case ARCRW:
|
2008-11-18 20:24:37 -07:00
|
|
|
case AROLB:
|
|
|
|
case AROLL:
|
|
|
|
case AROLQ:
|
|
|
|
case AROLW:
|
|
|
|
case ARORB:
|
|
|
|
case ARORL:
|
|
|
|
case ARORQ:
|
|
|
|
case ARORW:
|
|
|
|
case ASHLB:
|
|
|
|
case ASHLL:
|
|
|
|
case ASHLQ:
|
|
|
|
case ASHLW:
|
|
|
|
case ASHRB:
|
|
|
|
case ASHRL:
|
|
|
|
case ASHRQ:
|
|
|
|
case ASHRW:
|
|
|
|
case AIMULL:
|
|
|
|
case AIMULQ:
|
|
|
|
case AIMULW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ANEGB:
|
|
|
|
case ANEGW:
|
2008-11-18 20:24:37 -07:00
|
|
|
case ANEGL:
|
|
|
|
case ANEGQ:
|
|
|
|
case ANOTL:
|
|
|
|
case ANOTQ:
|
|
|
|
case AADCL:
|
|
|
|
case AADCQ:
|
|
|
|
case ASBBL:
|
|
|
|
case ASBBQ:
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ASETCC:
|
|
|
|
case ASETCS:
|
|
|
|
case ASETEQ:
|
|
|
|
case ASETGE:
|
|
|
|
case ASETGT:
|
|
|
|
case ASETHI:
|
|
|
|
case ASETLE:
|
|
|
|
case ASETLS:
|
|
|
|
case ASETLT:
|
|
|
|
case ASETMI:
|
|
|
|
case ASETNE:
|
|
|
|
case ASETOC:
|
|
|
|
case ASETOS:
|
|
|
|
case ASETPC:
|
|
|
|
case ASETPL:
|
|
|
|
case ASETPS:
|
|
|
|
|
2009-01-13 14:46:09 -07:00
|
|
|
case AXCHGB:
|
|
|
|
case AXCHGW:
|
|
|
|
case AXCHGL:
|
|
|
|
case AXCHGQ:
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case AADDSD:
|
|
|
|
case AADDSS:
|
|
|
|
case ACMPSD:
|
|
|
|
case ACMPSS:
|
|
|
|
case ADIVSD:
|
|
|
|
case ADIVSS:
|
|
|
|
case AMAXSD:
|
|
|
|
case AMAXSS:
|
|
|
|
case AMINSD:
|
|
|
|
case AMINSS:
|
|
|
|
case AMULSD:
|
|
|
|
case AMULSS:
|
|
|
|
case ARCPSS:
|
|
|
|
case ARSQRTSS:
|
|
|
|
case ASQRTSD:
|
|
|
|
case ASQRTSS:
|
|
|
|
case ASUBSD:
|
|
|
|
case ASUBSS:
|
|
|
|
case AXORPD:
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
r->set.b[z] |= bit.b[z];
|
|
|
|
r->use2.b[z] |= bit.b[z];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* funny
|
|
|
|
*/
|
|
|
|
case ACALL:
|
2009-12-11 16:55:09 -07:00
|
|
|
setaddrs(bit);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(p->as) {
|
|
|
|
case AIMULL:
|
|
|
|
case AIMULQ:
|
|
|
|
case AIMULW:
|
|
|
|
if(p->to.type != D_NONE)
|
|
|
|
break;
|
|
|
|
|
|
|
|
case AIDIVL:
|
|
|
|
case AIDIVW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case AIDIVQ:
|
2008-11-18 20:24:37 -07:00
|
|
|
case ADIVL:
|
|
|
|
case ADIVW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ADIVQ:
|
2008-11-18 20:24:37 -07:00
|
|
|
case AMULL:
|
|
|
|
case AMULW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case AMULQ:
|
|
|
|
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
|
|
|
|
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case AIDIVB:
|
|
|
|
case AIMULB:
|
|
|
|
case ADIVB:
|
|
|
|
case AMULB:
|
|
|
|
r->set.b[0] |= RtoB(D_AX);
|
|
|
|
r->use1.b[0] |= RtoB(D_AX);
|
|
|
|
break;
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
case ACWD:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
|
|
|
|
r->use1.b[0] |= RtoB(D_AX);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
case ACDQ:
|
|
|
|
r->set.b[0] |= RtoB(D_DX);
|
|
|
|
r->use1.b[0] |= RtoB(D_AX);
|
|
|
|
break;
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case AREP:
|
|
|
|
case AREPN:
|
|
|
|
case ALOOP:
|
|
|
|
case ALOOPEQ:
|
|
|
|
case ALOOPNE:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_CX);
|
|
|
|
r->use1.b[0] |= RtoB(D_CX);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case AMOVSB:
|
|
|
|
case AMOVSL:
|
|
|
|
case AMOVSQ:
|
|
|
|
case AMOVSW:
|
|
|
|
case ACMPSB:
|
|
|
|
case ACMPSL:
|
|
|
|
case ACMPSQ:
|
|
|
|
case ACMPSW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
|
|
|
|
r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ASTOSB:
|
|
|
|
case ASTOSL:
|
|
|
|
case ASTOSQ:
|
|
|
|
case ASTOSW:
|
|
|
|
case ASCASB:
|
|
|
|
case ASCASL:
|
|
|
|
case ASCASQ:
|
|
|
|
case ASCASW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_DI);
|
|
|
|
r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case AINSB:
|
|
|
|
case AINSL:
|
|
|
|
case AINSW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
|
|
|
|
r->use1.b[0] |= RtoB(D_DI);
|
|
|
|
break;
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case AOUTSB:
|
|
|
|
case AOUTSL:
|
|
|
|
case AOUTSW:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(D_DI);
|
|
|
|
r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(firstr == R)
|
|
|
|
return;
|
2008-11-22 18:58:53 -07:00
|
|
|
|
2009-12-11 16:55:09 -07:00
|
|
|
for(i=0; i<nvar; i++) {
|
|
|
|
Var *v = var+i;
|
|
|
|
if(v->addr) {
|
|
|
|
bit = blsh(i);
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
addrs.b[z] |= bit.b[z];
|
|
|
|
}
|
|
|
|
|
|
|
|
// print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n",
|
|
|
|
// i, v->addr, v->etype, v->width, v->sym, v->offset);
|
|
|
|
}
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass1", firstr);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 2
|
|
|
|
* turn branch references to pointers
|
|
|
|
* build back pointers
|
|
|
|
*/
|
|
|
|
for(r=firstr; r!=R; r=r->link) {
|
|
|
|
p = r->prog;
|
|
|
|
if(p->to.type == D_BRANCH) {
|
|
|
|
if(p->to.branch == P)
|
|
|
|
fatal("pnil %P", p);
|
|
|
|
r1 = p->to.branch->reg;
|
|
|
|
if(r1 == R)
|
|
|
|
fatal("rnil %P", p);
|
|
|
|
if(r1 == r) {
|
2008-11-22 18:58:53 -07:00
|
|
|
//fatal("ref to self %P", p);
|
2008-11-18 20:24:37 -07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
r->s2 = r1;
|
|
|
|
r->p2link = r1->p2;
|
|
|
|
r1->p2 = r;
|
|
|
|
}
|
|
|
|
}
|
2008-11-22 18:58:53 -07:00
|
|
|
|
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass2", firstr);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 2.5
|
|
|
|
* find looping structure
|
|
|
|
*/
|
|
|
|
for(r = firstr; r != R; r = r->link)
|
|
|
|
r->active = 0;
|
|
|
|
change = 0;
|
|
|
|
loopit(firstr, nr);
|
2008-11-22 18:58:53 -07:00
|
|
|
|
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass2.5", firstr);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 3
|
|
|
|
* iterate propagating usage
|
|
|
|
* back until flow graph is complete
|
|
|
|
*/
|
|
|
|
loop1:
|
|
|
|
change = 0;
|
|
|
|
for(r = firstr; r != R; r = r->link)
|
|
|
|
r->active = 0;
|
|
|
|
for(r = firstr; r != R; r = r->link)
|
|
|
|
if(r->prog->as == ARET)
|
|
|
|
prop(r, zbits, zbits);
|
|
|
|
loop11:
|
|
|
|
/* pick up unreachable code */
|
|
|
|
i = 0;
|
|
|
|
for(r = firstr; r != R; r = r1) {
|
|
|
|
r1 = r->link;
|
|
|
|
if(r1 && r1->active && !r->active) {
|
|
|
|
prop(r, zbits, zbits);
|
|
|
|
i = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(i)
|
|
|
|
goto loop11;
|
|
|
|
if(change)
|
|
|
|
goto loop1;
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass3", firstr);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 4
|
|
|
|
* iterate propagating register/variable synchrony
|
|
|
|
* forward until graph is complete
|
|
|
|
*/
|
|
|
|
loop2:
|
|
|
|
change = 0;
|
|
|
|
for(r = firstr; r != R; r = r->link)
|
|
|
|
r->active = 0;
|
|
|
|
synch(firstr, zbits);
|
|
|
|
if(change)
|
|
|
|
goto loop2;
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass4", firstr);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
/*
|
|
|
|
* pass 4.5
|
|
|
|
* move register pseudo-variables into regu.
|
|
|
|
*/
|
|
|
|
for(r = firstr; r != R; r = r->link) {
|
|
|
|
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
|
|
|
|
|
|
|
|
r->set.b[0] &= ~REGBITS;
|
|
|
|
r->use1.b[0] &= ~REGBITS;
|
|
|
|
r->use2.b[0] &= ~REGBITS;
|
|
|
|
r->refbehind.b[0] &= ~REGBITS;
|
|
|
|
r->refahead.b[0] &= ~REGBITS;
|
|
|
|
r->calbehind.b[0] &= ~REGBITS;
|
|
|
|
r->calahead.b[0] &= ~REGBITS;
|
|
|
|
r->regdiff.b[0] &= ~REGBITS;
|
|
|
|
r->act.b[0] &= ~REGBITS;
|
|
|
|
}
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
/*
|
|
|
|
* pass 5
|
|
|
|
* isolate regions
|
|
|
|
* calculate costs (paint1)
|
|
|
|
*/
|
|
|
|
r = firstr;
|
|
|
|
if(r) {
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
|
|
|
|
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(bany(&bit) && !r->refset) {
|
|
|
|
// should never happen - all variables are preset
|
|
|
|
if(debug['w'])
|
|
|
|
print("%L: used and not set: %Q\n", r->prog->lineno, bit);
|
|
|
|
r->refset = 1;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for(r = firstr; r != R; r = r->link)
|
|
|
|
r->act = zbits;
|
|
|
|
rgp = region;
|
|
|
|
nregion = 0;
|
|
|
|
for(r = firstr; r != R; r = r->link) {
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = r->set.b[z] &
|
|
|
|
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(bany(&bit) && !r->refset) {
|
|
|
|
if(debug['w'])
|
|
|
|
print("%L: set and not used: %Q\n", r->prog->lineno, bit);
|
|
|
|
r->refset = 1;
|
2008-11-18 20:24:37 -07:00
|
|
|
excise(r);
|
|
|
|
}
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
|
|
|
|
while(bany(&bit)) {
|
|
|
|
i = bnum(bit);
|
|
|
|
rgp->enter = r;
|
|
|
|
rgp->varno = i;
|
|
|
|
change = 0;
|
|
|
|
paint1(r, i);
|
|
|
|
bit.b[i/32] &= ~(1L<<(i%32));
|
2008-11-22 18:58:53 -07:00
|
|
|
if(change <= 0)
|
2008-11-18 20:24:37 -07:00
|
|
|
continue;
|
|
|
|
rgp->cost = change;
|
|
|
|
nregion++;
|
|
|
|
if(nregion >= NRGN) {
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
print("too many regions\n");
|
2008-11-18 20:24:37 -07:00
|
|
|
goto brk;
|
|
|
|
}
|
|
|
|
rgp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
brk:
|
|
|
|
qsort(region, nregion, sizeof(region[0]), rcmp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 6
|
|
|
|
* determine used registers (paint2)
|
|
|
|
* replace code (paint3)
|
|
|
|
*/
|
|
|
|
rgp = region;
|
|
|
|
for(i=0; i<nregion; i++) {
|
|
|
|
bit = blsh(rgp->varno);
|
|
|
|
vreg = paint2(rgp->enter, rgp->varno);
|
|
|
|
vreg = allreg(vreg, rgp);
|
|
|
|
if(rgp->regno != 0)
|
|
|
|
paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
|
|
|
|
rgp++;
|
|
|
|
}
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
dumpit("pass6", firstr);
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
/*
|
|
|
|
* pass 7
|
|
|
|
* peep-hole on basic block
|
|
|
|
*/
|
2008-11-22 18:58:53 -07:00
|
|
|
if(!debug['R'] || debug['P']) {
|
2008-11-18 20:24:37 -07:00
|
|
|
peep();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* eliminate nops
|
|
|
|
* free aux structures
|
|
|
|
*/
|
|
|
|
for(p=firstp; p!=P; p=p->link) {
|
2008-11-22 18:58:53 -07:00
|
|
|
while(p->link != P && p->link->as == ANOP)
|
2008-11-18 20:24:37 -07:00
|
|
|
p->link = p->link->link;
|
2008-11-22 18:58:53 -07:00
|
|
|
if(p->to.type == D_BRANCH)
|
|
|
|
while(p->to.branch != P && p->to.branch->as == ANOP)
|
|
|
|
p->to.branch = p->to.branch->link;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if(r1 != R) {
|
|
|
|
r1->link = freer;
|
|
|
|
freer = firstr;
|
|
|
|
}
|
2008-11-22 18:58:53 -07:00
|
|
|
|
|
|
|
if(debug['R']) {
|
|
|
|
if(ostats.ncvtreg ||
|
|
|
|
ostats.nspill ||
|
|
|
|
ostats.nreload ||
|
|
|
|
ostats.ndelmov ||
|
|
|
|
ostats.nvar ||
|
|
|
|
ostats.naddr ||
|
|
|
|
0)
|
|
|
|
print("\nstats\n");
|
|
|
|
|
|
|
|
if(ostats.ncvtreg)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d cvtreg\n", ostats.ncvtreg);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(ostats.nspill)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d spill\n", ostats.nspill);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(ostats.nreload)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d reload\n", ostats.nreload);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(ostats.ndelmov)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d delmov\n", ostats.ndelmov);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(ostats.nvar)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d delmov\n", ostats.nvar);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(ostats.naddr)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %4d delmov\n", ostats.naddr);
|
2008-11-22 18:58:53 -07:00
|
|
|
|
|
|
|
memset(&ostats, 0, sizeof(ostats));
|
|
|
|
}
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add mov b,rn
|
|
|
|
* just after r
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
addmove(Reg *r, int bn, int rn, int f)
|
|
|
|
{
|
|
|
|
Prog *p, *p1;
|
|
|
|
Adr *a;
|
|
|
|
Var *v;
|
|
|
|
|
|
|
|
p1 = mal(sizeof(*p1));
|
|
|
|
clearp(p1);
|
|
|
|
p1->loc = 9999;
|
|
|
|
|
|
|
|
p = r->prog;
|
|
|
|
p1->link = p->link;
|
|
|
|
p->link = p1;
|
|
|
|
p1->lineno = p->lineno;
|
|
|
|
|
|
|
|
v = var + bn;
|
|
|
|
|
|
|
|
a = &p1->to;
|
|
|
|
a->sym = v->sym;
|
|
|
|
a->offset = v->offset;
|
|
|
|
a->etype = v->etype;
|
|
|
|
a->type = v->name;
|
2009-08-20 18:33:28 -06:00
|
|
|
a->gotype = v->gotype;
|
2008-11-18 20:24:37 -07:00
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
// need to clean this up with wptr and
|
2008-11-18 20:24:37 -07:00
|
|
|
// some of the defaults
|
|
|
|
p1->as = AMOVL;
|
|
|
|
switch(v->etype) {
|
|
|
|
default:
|
|
|
|
fatal("unknown type\n");
|
|
|
|
case TINT8:
|
|
|
|
case TUINT8:
|
|
|
|
case TBOOL:
|
|
|
|
p1->as = AMOVB;
|
|
|
|
break;
|
|
|
|
case TINT16:
|
|
|
|
case TUINT16:
|
|
|
|
p1->as = AMOVW;
|
|
|
|
break;
|
|
|
|
case TINT64:
|
|
|
|
case TUINT64:
|
|
|
|
case TUINTPTR:
|
|
|
|
case TPTR64:
|
|
|
|
p1->as = AMOVQ;
|
|
|
|
break;
|
|
|
|
case TFLOAT32:
|
|
|
|
p1->as = AMOVSS;
|
|
|
|
break;
|
|
|
|
case TFLOAT64:
|
2008-11-22 18:58:53 -07:00
|
|
|
p1->as = AMOVSD;
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
case TINT:
|
|
|
|
case TUINT:
|
|
|
|
case TINT32:
|
|
|
|
case TUINT32:
|
|
|
|
case TPTR32:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
p1->from.type = rn;
|
|
|
|
if(!f) {
|
|
|
|
p1->from = *a;
|
|
|
|
*a = zprog.from;
|
|
|
|
a->type = rn;
|
|
|
|
if(v->etype == TUINT8)
|
|
|
|
p1->as = AMOVB;
|
|
|
|
if(v->etype == TUINT16)
|
|
|
|
p1->as = AMOVW;
|
|
|
|
}
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
print("%P ===add=== %P\n", p, p1);
|
|
|
|
ostats.nspill++;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
doregbits(int r)
|
|
|
|
{
|
|
|
|
uint32 b;
|
|
|
|
|
|
|
|
b = 0;
|
|
|
|
if(r >= D_INDIR)
|
|
|
|
r -= D_INDIR;
|
|
|
|
if(r >= D_AX && r <= D_R15)
|
|
|
|
b |= RtoB(r);
|
|
|
|
else
|
|
|
|
if(r >= D_AL && r <= D_R15B)
|
|
|
|
b |= RtoB(r-D_AL+D_AX);
|
|
|
|
else
|
|
|
|
if(r >= D_AH && r <= D_BH)
|
|
|
|
b |= RtoB(r-D_AH+D_AX);
|
|
|
|
else
|
|
|
|
if(r >= D_X0 && r <= D_X0+15)
|
|
|
|
b |= FtoB(r);
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
2009-05-23 17:36:43 -06:00
|
|
|
static int
|
2009-12-11 16:55:09 -07:00
|
|
|
overlap(int32 o1, int w1, int32 o2, int w2)
|
2009-05-23 17:36:43 -06:00
|
|
|
{
|
2009-12-11 16:55:09 -07:00
|
|
|
int32 t1, t2;
|
2009-05-23 17:36:43 -06:00
|
|
|
|
|
|
|
t1 = o1+w1;
|
|
|
|
t2 = o2+w2;
|
2009-12-11 16:55:09 -07:00
|
|
|
|
2009-05-23 17:36:43 -06:00
|
|
|
if(!(t1 > o2 && t2 > o1))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
Bits
|
|
|
|
mkvar(Reg *r, Adr *a)
|
|
|
|
{
|
|
|
|
Var *v;
|
2009-05-23 17:36:43 -06:00
|
|
|
int i, t, n, et, z, w, flag;
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
uint32 regu;
|
2008-11-18 20:24:37 -07:00
|
|
|
int32 o;
|
|
|
|
Bits bit;
|
|
|
|
Sym *s;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mark registers used
|
|
|
|
*/
|
|
|
|
t = a->type;
|
2009-12-11 16:55:09 -07:00
|
|
|
if(t == D_NONE)
|
|
|
|
goto none;
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
if(r != R)
|
|
|
|
r->use1.b[0] |= doregbits(a->index);
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
switch(t) {
|
|
|
|
default:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
regu = doregbits(t);
|
|
|
|
if(regu == 0)
|
|
|
|
goto none;
|
|
|
|
bit = zbits;
|
|
|
|
bit.b[0] = regu;
|
|
|
|
return bit;
|
2009-12-11 16:55:09 -07:00
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case D_ADDR:
|
|
|
|
a->type = a->index;
|
|
|
|
bit = mkvar(r, a);
|
2009-12-11 16:55:09 -07:00
|
|
|
setaddrs(bit);
|
2008-11-18 20:24:37 -07:00
|
|
|
a->type = t;
|
2008-11-22 18:58:53 -07:00
|
|
|
ostats.naddr++;
|
2008-11-18 20:24:37 -07:00
|
|
|
goto none;
|
2009-12-11 16:55:09 -07:00
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
case D_EXTERN:
|
|
|
|
case D_STATIC:
|
|
|
|
case D_PARAM:
|
|
|
|
case D_AUTO:
|
|
|
|
n = t;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
s = a->sym;
|
|
|
|
if(s == S)
|
|
|
|
goto none;
|
2009-03-10 17:49:34 -06:00
|
|
|
if(s->name[0] == '.')
|
2008-11-24 15:01:12 -07:00
|
|
|
goto none;
|
2008-11-18 20:24:37 -07:00
|
|
|
et = a->etype;
|
|
|
|
o = a->offset;
|
2009-05-23 16:34:29 -06:00
|
|
|
w = a->width;
|
2009-05-23 17:36:43 -06:00
|
|
|
|
|
|
|
flag = 0;
|
2008-11-18 20:24:37 -07:00
|
|
|
for(i=0; i<nvar; i++) {
|
2009-12-11 16:55:09 -07:00
|
|
|
v = var+i;
|
|
|
|
if(v->sym == s && v->name == n) {
|
2009-12-12 15:36:52 -07:00
|
|
|
if(v->offset == o)
|
|
|
|
if(v->etype == et)
|
|
|
|
if(v->width == w)
|
2009-12-11 16:55:09 -07:00
|
|
|
return blsh(i);
|
|
|
|
|
2009-12-12 15:36:52 -07:00
|
|
|
// if they overlaps, disable both
|
2009-12-11 16:55:09 -07:00
|
|
|
if(overlap(v->offset, v->width, o, w)) {
|
2011-05-11 08:35:11 -06:00
|
|
|
// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
|
2009-12-11 16:55:09 -07:00
|
|
|
v->addr = 1;
|
2009-05-23 17:36:43 -06:00
|
|
|
flag = 1;
|
2009-12-11 16:55:09 -07:00
|
|
|
}
|
2009-05-23 17:36:43 -06:00
|
|
|
}
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
2011-05-11 08:35:11 -06:00
|
|
|
if(a->pun) {
|
|
|
|
// print("disable pun %s\n", s->name);
|
2010-05-20 18:31:28 -06:00
|
|
|
flag = 1;
|
2008-11-18 20:24:37 -07:00
|
|
|
|
2011-05-11 08:35:11 -06:00
|
|
|
}
|
2008-11-18 20:24:37 -07:00
|
|
|
switch(et) {
|
2009-05-23 16:34:29 -06:00
|
|
|
case 0:
|
2008-11-18 20:24:37 -07:00
|
|
|
case TFUNC:
|
|
|
|
goto none;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(nvar >= NVAR) {
|
|
|
|
if(debug['w'] > 1 && s)
|
2009-05-23 16:34:29 -06:00
|
|
|
fatal("variable not optimized: %D", a);
|
2008-11-18 20:24:37 -07:00
|
|
|
goto none;
|
|
|
|
}
|
2009-12-11 16:55:09 -07:00
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
i = nvar;
|
|
|
|
nvar++;
|
2009-12-11 16:55:09 -07:00
|
|
|
v = var+i;
|
2008-11-18 20:24:37 -07:00
|
|
|
v->sym = s;
|
|
|
|
v->offset = o;
|
|
|
|
v->name = n;
|
2009-08-20 18:33:28 -06:00
|
|
|
v->gotype = a->gotype;
|
2008-11-18 20:24:37 -07:00
|
|
|
v->etype = et;
|
2009-05-23 16:34:29 -06:00
|
|
|
v->width = w;
|
2009-12-12 15:36:52 -07:00
|
|
|
v->addr = flag; // funny punning
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
if(debug['R'])
|
2009-12-11 16:55:09 -07:00
|
|
|
print("bit=%2d et=%2d w=%d %S %D\n", i, et, w, s, a);
|
2008-11-22 18:58:53 -07:00
|
|
|
ostats.nvar++;
|
2008-11-18 20:24:37 -07:00
|
|
|
|
|
|
|
bit = blsh(i);
|
2009-05-23 17:36:43 -06:00
|
|
|
if(n == D_EXTERN || n == D_STATIC)
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
externs.b[z] |= bit.b[z];
|
|
|
|
if(n == D_PARAM)
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
params.b[z] |= bit.b[z];
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
return bit;
|
|
|
|
|
|
|
|
none:
|
|
|
|
return zbits;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
prop(Reg *r, Bits ref, Bits cal)
|
|
|
|
{
|
|
|
|
Reg *r1, *r2;
|
|
|
|
int z;
|
|
|
|
|
|
|
|
for(r1 = r; r1 != R; r1 = r1->p1) {
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
ref.b[z] |= r1->refahead.b[z];
|
|
|
|
if(ref.b[z] != r1->refahead.b[z]) {
|
|
|
|
r1->refahead.b[z] = ref.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
cal.b[z] |= r1->calahead.b[z];
|
|
|
|
if(cal.b[z] != r1->calahead.b[z]) {
|
|
|
|
r1->calahead.b[z] = cal.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
switch(r1->prog->as) {
|
|
|
|
case ACALL:
|
|
|
|
if(noreturn(r1->prog))
|
|
|
|
break;
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
cal.b[z] |= ref.b[z] | externs.b[z];
|
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ATEXT:
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
cal.b[z] = 0;
|
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ARET:
|
|
|
|
for(z=0; z<BITS; z++) {
|
2008-11-22 18:58:53 -07:00
|
|
|
cal.b[z] = externs.b[z] | ovar.b[z];
|
2008-11-18 20:24:37 -07:00
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
2008-11-22 18:58:53 -07:00
|
|
|
break;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
|
|
|
|
r1->use1.b[z] | r1->use2.b[z];
|
|
|
|
cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
|
|
|
|
r1->refbehind.b[z] = ref.b[z];
|
|
|
|
r1->calbehind.b[z] = cal.b[z];
|
|
|
|
}
|
|
|
|
if(r1->active)
|
|
|
|
break;
|
|
|
|
r1->active = 1;
|
|
|
|
}
|
|
|
|
for(; r != r1; r = r->p1)
|
|
|
|
for(r2 = r->p2; r2 != R; r2 = r2->p2link)
|
|
|
|
prop(r2, r->refbehind, r->calbehind);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* find looping structure
|
|
|
|
*
|
|
|
|
* 1) find reverse postordering
|
|
|
|
* 2) find approximate dominators,
|
|
|
|
* the actual dominators if the flow graph is reducible
|
|
|
|
* otherwise, dominators plus some other non-dominators.
|
|
|
|
* See Matthew S. Hecht and Jeffrey D. Ullman,
|
|
|
|
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
|
|
|
|
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
|
|
|
|
* Oct. 1-3, 1973, pp. 207-217.
|
|
|
|
* 3) find all nodes with a predecessor dominated by the current node.
|
|
|
|
* such a node is a loop head.
|
|
|
|
* recursively, all preds with a greater rpo number are in the loop
|
|
|
|
*/
|
|
|
|
int32
|
|
|
|
postorder(Reg *r, Reg **rpo2r, int32 n)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
|
|
|
|
r->rpo = 1;
|
|
|
|
r1 = r->s1;
|
|
|
|
if(r1 && !r1->rpo)
|
|
|
|
n = postorder(r1, rpo2r, n);
|
|
|
|
r1 = r->s2;
|
|
|
|
if(r1 && !r1->rpo)
|
|
|
|
n = postorder(r1, rpo2r, n);
|
|
|
|
rpo2r[n] = r;
|
|
|
|
n++;
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32
|
|
|
|
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
|
|
|
|
{
|
|
|
|
int32 t;
|
|
|
|
|
|
|
|
if(rpo1 == -1)
|
|
|
|
return rpo2;
|
|
|
|
while(rpo1 != rpo2){
|
|
|
|
if(rpo1 > rpo2){
|
|
|
|
t = rpo2;
|
|
|
|
rpo2 = rpo1;
|
|
|
|
rpo1 = t;
|
|
|
|
}
|
|
|
|
while(rpo1 < rpo2){
|
|
|
|
t = idom[rpo2];
|
|
|
|
if(t >= rpo2)
|
|
|
|
fatal("bad idom");
|
|
|
|
rpo2 = t;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rpo1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
doms(int32 *idom, int32 r, int32 s)
|
|
|
|
{
|
|
|
|
while(s > r)
|
|
|
|
s = idom[s];
|
|
|
|
return s == r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
loophead(int32 *idom, Reg *r)
|
|
|
|
{
|
|
|
|
int32 src;
|
|
|
|
|
|
|
|
src = r->rpo;
|
|
|
|
if(r->p1 != R && doms(idom, src, r->p1->rpo))
|
|
|
|
return 1;
|
|
|
|
for(r = r->p2; r != R; r = r->p2link)
|
|
|
|
if(doms(idom, src, r->rpo))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
loopmark(Reg **rpo2r, int32 head, Reg *r)
|
|
|
|
{
|
|
|
|
if(r->rpo < head || r->active == head)
|
|
|
|
return;
|
|
|
|
r->active = head;
|
|
|
|
r->loop += LOOP;
|
|
|
|
if(r->p1 != R)
|
|
|
|
loopmark(rpo2r, head, r->p1);
|
|
|
|
for(r = r->p2; r != R; r = r->p2link)
|
|
|
|
loopmark(rpo2r, head, r);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
loopit(Reg *r, int32 nr)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int32 i, d, me;
|
|
|
|
|
|
|
|
if(nr > maxnr) {
|
|
|
|
rpo2r = mal(nr * sizeof(Reg*));
|
|
|
|
idom = mal(nr * sizeof(int32));
|
|
|
|
maxnr = nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
d = postorder(r, rpo2r, 0);
|
|
|
|
if(d > nr)
|
|
|
|
fatal("too many reg nodes %d %d", d, nr);
|
|
|
|
nr = d;
|
|
|
|
for(i = 0; i < nr / 2; i++) {
|
|
|
|
r1 = rpo2r[i];
|
|
|
|
rpo2r[i] = rpo2r[nr - 1 - i];
|
|
|
|
rpo2r[nr - 1 - i] = r1;
|
|
|
|
}
|
|
|
|
for(i = 0; i < nr; i++)
|
|
|
|
rpo2r[i]->rpo = i;
|
|
|
|
|
|
|
|
idom[0] = 0;
|
|
|
|
for(i = 0; i < nr; i++) {
|
|
|
|
r1 = rpo2r[i];
|
|
|
|
me = r1->rpo;
|
|
|
|
d = -1;
|
|
|
|
if(r1->p1 != R && r1->p1->rpo < me)
|
|
|
|
d = r1->p1->rpo;
|
|
|
|
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
|
|
|
|
if(r1->rpo < me)
|
|
|
|
d = rpolca(idom, d, r1->rpo);
|
|
|
|
idom[i] = d;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i = 0; i < nr; i++) {
|
|
|
|
r1 = rpo2r[i];
|
|
|
|
r1->loop++;
|
|
|
|
if(r1->p2 != R && loophead(idom, r1))
|
|
|
|
loopmark(rpo2r, i, r1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
synch(Reg *r, Bits dif)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int z;
|
|
|
|
|
|
|
|
for(r1 = r; r1 != R; r1 = r1->s1) {
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
dif.b[z] = (dif.b[z] &
|
|
|
|
~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
|
|
|
|
r1->set.b[z] | r1->regdiff.b[z];
|
|
|
|
if(dif.b[z] != r1->regdiff.b[z]) {
|
|
|
|
r1->regdiff.b[z] = dif.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(r1->active)
|
|
|
|
break;
|
|
|
|
r1->active = 1;
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
|
|
|
|
if(r1->s2 != R)
|
|
|
|
synch(r1->s2, dif);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
allreg(uint32 b, Rgn *r)
|
|
|
|
{
|
|
|
|
Var *v;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
v = var + r->varno;
|
|
|
|
r->regno = 0;
|
|
|
|
switch(v->etype) {
|
|
|
|
|
|
|
|
default:
|
2009-11-06 17:51:49 -07:00
|
|
|
fatal("unknown etype %d/%E", bitno(b), v->etype);
|
2008-11-18 20:24:37 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case TINT8:
|
|
|
|
case TUINT8:
|
|
|
|
case TINT16:
|
|
|
|
case TUINT16:
|
|
|
|
case TINT32:
|
|
|
|
case TUINT32:
|
|
|
|
case TINT64:
|
|
|
|
case TUINT64:
|
|
|
|
case TINT:
|
|
|
|
case TUINT:
|
|
|
|
case TUINTPTR:
|
|
|
|
case TBOOL:
|
|
|
|
case TPTR32:
|
|
|
|
case TPTR64:
|
|
|
|
i = BtoR(~b);
|
|
|
|
if(i && r->cost > 0) {
|
|
|
|
r->regno = i;
|
|
|
|
return RtoB(i);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TFLOAT32:
|
|
|
|
case TFLOAT64:
|
|
|
|
i = BtoF(~b);
|
|
|
|
if(i && r->cost > 0) {
|
|
|
|
r->regno = i;
|
|
|
|
return FtoB(i);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
paint1(Reg *r, int bn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int z;
|
|
|
|
uint32 bb;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L<<(bn%32);
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
return;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->p1;
|
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(r1->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
|
|
|
|
change -= CLOAD * r->loop;
|
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] |= bb;
|
|
|
|
|
|
|
|
if(r->use1.b[z] & bb) {
|
|
|
|
change += CREF * r->loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if((r->use2.b[z]|r->set.b[z]) & bb) {
|
|
|
|
change += CREF * r->loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(STORE(r) & r->regdiff.b[z] & bb) {
|
|
|
|
change -= CLOAD * r->loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
|
|
|
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
|
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
paint1(r1, bn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->s2;
|
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
paint1(r1, bn);
|
|
|
|
r = r->s1;
|
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
regset(Reg *r, uint32 bb)
|
|
|
|
{
|
|
|
|
uint32 b, set;
|
|
|
|
Adr v;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
set = 0;
|
|
|
|
v = zprog.from;
|
|
|
|
while(b = bb & ~(bb-1)) {
|
|
|
|
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
|
|
|
|
if(v.type == 0)
|
2010-10-13 14:20:22 -06:00
|
|
|
fatal("zero v.type for %#ux", b);
|
2008-11-18 20:24:37 -07:00
|
|
|
c = copyu(r->prog, &v, A);
|
|
|
|
if(c == 3)
|
|
|
|
set |= b;
|
|
|
|
bb &= ~b;
|
|
|
|
}
|
|
|
|
return set;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
reguse(Reg *r, uint32 bb)
|
|
|
|
{
|
|
|
|
uint32 b, set;
|
|
|
|
Adr v;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
set = 0;
|
|
|
|
v = zprog.from;
|
|
|
|
while(b = bb & ~(bb-1)) {
|
|
|
|
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
|
|
|
|
c = copyu(r->prog, &v, A);
|
|
|
|
if(c == 1 || c == 2 || c == 4)
|
|
|
|
set |= b;
|
|
|
|
bb &= ~b;
|
|
|
|
}
|
|
|
|
return set;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
paint2(Reg *r, int bn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int z;
|
|
|
|
uint32 bb, vreg, x;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L << (bn%32);
|
|
|
|
vreg = regbits;
|
|
|
|
if(!(r->act.b[z] & bb))
|
|
|
|
return vreg;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->p1;
|
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(!(r1->act.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] &= ~bb;
|
|
|
|
|
|
|
|
vreg |= r->regu;
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
|
|
|
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
|
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
vreg |= paint2(r1, bn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->s2;
|
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
vreg |= paint2(r1, bn);
|
|
|
|
r = r->s1;
|
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(!(r->act.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
bb = vreg;
|
|
|
|
for(; r; r=r->s1) {
|
|
|
|
x = r->regu & ~bb;
|
|
|
|
if(x) {
|
|
|
|
vreg |= reguse(r, x);
|
|
|
|
bb |= regset(r, x);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return vreg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
paint3(Reg *r, int bn, int32 rb, int rn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
Prog *p;
|
|
|
|
int z;
|
|
|
|
uint32 bb;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L << (bn%32);
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
return;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->p1;
|
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(r1->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
|
|
|
|
addmove(r, bn, rn, 0);
|
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] |= bb;
|
|
|
|
p = r->prog;
|
|
|
|
|
|
|
|
if(r->use1.b[z] & bb) {
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
2008-11-18 20:24:37 -07:00
|
|
|
print("%P", p);
|
|
|
|
addreg(&p->from, rn);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
print(" ===change== %P\n", p);
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
if((r->use2.b[z]|r->set.b[z]) & bb) {
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
2008-11-18 20:24:37 -07:00
|
|
|
print("%P", p);
|
|
|
|
addreg(&p->to, rn);
|
2008-11-22 18:58:53 -07:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
print(" ===change== %P\n", p);
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if(STORE(r) & r->regdiff.b[z] & bb)
|
|
|
|
addmove(r, bn, rn, 1);
|
|
|
|
r->regu |= rb;
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
|
|
|
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
|
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
paint3(r1, bn, rb, rn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r1 = r->s2;
|
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
paint3(r1, bn, rb, rn);
|
|
|
|
r = r->s1;
|
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
addreg(Adr *a, int rn)
|
|
|
|
{
|
|
|
|
|
|
|
|
a->sym = 0;
|
|
|
|
a->offset = 0;
|
|
|
|
a->type = rn;
|
2008-11-22 18:58:53 -07:00
|
|
|
|
|
|
|
ostats.ncvtreg++;
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
int32
|
|
|
|
RtoB(int r)
|
|
|
|
{
|
|
|
|
|
|
|
|
if(r < D_AX || r > D_R15)
|
|
|
|
return 0;
|
|
|
|
return 1L << (r-D_AX);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
BtoR(int32 b)
|
|
|
|
{
|
2008-11-22 18:58:53 -07:00
|
|
|
b &= 0x3fffL; // no R14 or R15
|
2008-11-18 20:24:37 -07:00
|
|
|
if(b == 0)
|
|
|
|
return 0;
|
|
|
|
return bitno(b) + D_AX;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bit reg
|
2009-08-09 17:42:21 -06:00
|
|
|
* 16 X5 (FREGMIN)
|
|
|
|
* ...
|
|
|
|
* 26 X15 (FREGEXT)
|
2008-11-18 20:24:37 -07:00
|
|
|
*/
|
|
|
|
int32
|
|
|
|
FtoB(int f)
|
|
|
|
{
|
|
|
|
if(f < FREGMIN || f > FREGEXT)
|
|
|
|
return 0;
|
|
|
|
return 1L << (f - FREGMIN + 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
BtoF(int32 b)
|
|
|
|
{
|
|
|
|
|
2009-08-09 17:42:21 -06:00
|
|
|
b &= 0xFF0000L;
|
2008-11-18 20:24:37 -07:00
|
|
|
if(b == 0)
|
|
|
|
return 0;
|
|
|
|
return bitno(b) - 16 + FREGMIN;
|
|
|
|
}
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
void
|
2009-08-29 21:33:21 -06:00
|
|
|
dumpone(Reg *r)
|
2008-11-18 20:24:37 -07:00
|
|
|
{
|
|
|
|
int z;
|
|
|
|
Bits bit;
|
|
|
|
|
2010-10-13 14:20:22 -06:00
|
|
|
print("%d:%P", r->loop, r->prog);
|
2009-08-29 21:33:21 -06:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] =
|
|
|
|
r->set.b[z] |
|
|
|
|
r->use1.b[z] |
|
|
|
|
r->use2.b[z] |
|
|
|
|
r->refbehind.b[z] |
|
|
|
|
r->refahead.b[z] |
|
|
|
|
r->calbehind.b[z] |
|
|
|
|
r->calahead.b[z] |
|
|
|
|
r->regdiff.b[z] |
|
|
|
|
r->act.b[z] |
|
|
|
|
0;
|
|
|
|
if(bany(&bit)) {
|
|
|
|
print("\t");
|
|
|
|
if(bany(&r->set))
|
|
|
|
print(" s:%Q", r->set);
|
|
|
|
if(bany(&r->use1))
|
|
|
|
print(" u1:%Q", r->use1);
|
|
|
|
if(bany(&r->use2))
|
|
|
|
print(" u2:%Q", r->use2);
|
|
|
|
if(bany(&r->refbehind))
|
|
|
|
print(" rb:%Q ", r->refbehind);
|
|
|
|
if(bany(&r->refahead))
|
|
|
|
print(" ra:%Q ", r->refahead);
|
|
|
|
if(bany(&r->calbehind))
|
|
|
|
print("cb:%Q ", r->calbehind);
|
|
|
|
if(bany(&r->calahead))
|
|
|
|
print(" ca:%Q ", r->calahead);
|
|
|
|
if(bany(&r->regdiff))
|
|
|
|
print(" d:%Q ", r->regdiff);
|
|
|
|
if(bany(&r->act))
|
|
|
|
print(" a:%Q ", r->act);
|
|
|
|
}
|
|
|
|
print("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dumpit(char *str, Reg *r0)
|
|
|
|
{
|
|
|
|
Reg *r, *r1;
|
|
|
|
|
2008-11-18 20:24:37 -07:00
|
|
|
print("\n%s\n", str);
|
|
|
|
for(r = r0; r != R; r = r->link) {
|
2009-08-29 21:33:21 -06:00
|
|
|
dumpone(r);
|
2008-11-18 20:24:37 -07:00
|
|
|
r1 = r->p2;
|
|
|
|
if(r1 != R) {
|
|
|
|
print(" pred:");
|
|
|
|
for(; r1 != R; r1 = r1->p2link)
|
2010-10-13 14:20:22 -06:00
|
|
|
print(" %.4ud", r1->prog->loc);
|
2008-11-18 20:24:37 -07:00
|
|
|
print("\n");
|
|
|
|
}
|
|
|
|
// r1 = r->s1;
|
|
|
|
// if(r1 != R) {
|
|
|
|
// print(" succ:");
|
|
|
|
// for(; r1 != R; r1 = r1->s1)
|
2010-10-13 14:20:22 -06:00
|
|
|
// print(" %.4ud", r1->prog->loc);
|
2008-11-18 20:24:37 -07:00
|
|
|
// print("\n");
|
|
|
|
// }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static Sym* symlist[10];
|
|
|
|
|
2008-11-22 18:58:53 -07:00
|
|
|
int
|
2008-11-18 20:24:37 -07:00
|
|
|
noreturn(Prog *p)
|
|
|
|
{
|
|
|
|
Sym *s;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if(symlist[0] == S) {
|
2010-04-01 23:31:27 -06:00
|
|
|
symlist[0] = pkglookup("panicindex", runtimepkg);
|
|
|
|
symlist[1] = pkglookup("panicslice", runtimepkg);
|
2010-01-22 18:06:20 -07:00
|
|
|
symlist[2] = pkglookup("throwinit", runtimepkg);
|
2010-04-01 23:31:27 -06:00
|
|
|
symlist[3] = pkglookup("panic", runtimepkg);
|
2008-11-18 20:24:37 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
s = p->to.sym;
|
|
|
|
if(s == S)
|
|
|
|
return 0;
|
|
|
|
for(i=0; symlist[i]!=S; i++)
|
|
|
|
if(s == symlist[i])
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|