2010-11-03 18:31:07 -06:00
|
|
|
// Inferno utils/5c/reg.c
|
2012-04-24 09:17:16 -06:00
|
|
|
// http://code.google.com/p/inferno-os/source/browse/utils/5c/reg.c
|
2010-11-03 18:31:07 -06:00
|
|
|
//
|
|
|
|
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
|
|
|
|
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
// Portions Copyright © 1997-1999 Vita Nuova Limited
|
|
|
|
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
|
|
|
|
// Portions Copyright © 2004,2006 Bruce Ellis
|
|
|
|
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
|
|
|
|
// Portions Copyright © 2009 The Go Authors. All rights reserved.
|
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
// THE SOFTWARE.
|
|
|
|
|
|
|
|
|
2011-08-25 14:25:10 -06:00
|
|
|
#include <u.h>
|
|
|
|
#include <libc.h>
|
2010-11-03 18:31:07 -06:00
|
|
|
#include "gg.h"
|
|
|
|
#include "opt.h"
|
|
|
|
|
2012-06-02 10:54:28 -06:00
|
|
|
#define NREGVAR 32
|
|
|
|
#define REGBITS ((uint32)0xffffffff)
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2011-01-07 19:04:48 -07:00
|
|
|
void addsplits(void);
|
cmd/gc: add temporary-merging optimization pass
The compilers assume they can generate temporary variables
as needed to preserve the right semantics or simplify code
generation and the back end will still generate good code.
This turns out not to be true. The back ends will only
track the first 128 variables per function and give up
on the remainder. That needs to be fixed too, in a later CL.
This CL merges temporary variables with equal types and
non-overlapping lifetimes using the greedy algorithm in
Poletto and Sarkar, "Linear Scan Register Allocation",
ACM TOPLAS 1999.
The result can be striking in the right functions.
Top 20 frame size changes in a 6g godoc binary by bytes saved:
5464 1984 (-3480, -63.7%) go/build.(*Context).Import
4456 1824 (-2632, -59.1%) go/printer.(*printer).expr1
2560 80 (-2480, -96.9%) time.nextStdChunk
3496 1608 (-1888, -54.0%) go/printer.(*printer).stmt
1896 272 (-1624, -85.7%) net/http.init
2688 1400 (-1288, -47.9%) fmt.(*pp).printReflectValue
2800 1512 (-1288, -46.0%) main.main
3296 2016 (-1280, -38.8%) crypto/tls.(*Conn).clientHandshake
1664 488 (-1176, -70.7%) time.loadZoneZip
1760 608 (-1152, -65.5%) time.parse
4104 3072 (-1032, -25.1%) runtime/pprof.writeHeap
1680 712 ( -968, -57.6%) go/ast.Walk
2488 1560 ( -928, -37.3%) crypto/x509.parseCertificate
1128 392 ( -736, -65.2%) math/big.nat.divLarge
1528 864 ( -664, -43.5%) go/printer.(*printer).fieldList
1360 712 ( -648, -47.6%) regexp/syntax.(*parser).factor
2104 1528 ( -576, -27.4%) encoding/asn1.parseField
1064 504 ( -560, -52.6%) encoding/xml.(*Decoder).text
584 48 ( -536, -91.8%) html.init
1400 864 ( -536, -38.3%) go/doc.playExample
In the same godoc build, cuts the number of functions with
too many vars from 83 to 32.
R=ken2
CC=golang-dev
https://golang.org/cl/12829043
2013-08-12 22:09:31 -06:00
|
|
|
static Reg* firstr;
|
2013-08-12 20:02:10 -06:00
|
|
|
static int first = 1;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
int
|
|
|
|
rcmp(const void *a1, const void *a2)
|
|
|
|
{
|
|
|
|
Rgn *p1, *p2;
|
|
|
|
int c1, c2;
|
|
|
|
|
|
|
|
p1 = (Rgn*)a1;
|
|
|
|
p2 = (Rgn*)a2;
|
|
|
|
c1 = p2->cost;
|
|
|
|
c2 = p1->cost;
|
|
|
|
if(c1 -= c2)
|
|
|
|
return c1;
|
|
|
|
return p2->varno - p1->varno;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
setoutvar(void)
|
|
|
|
{
|
|
|
|
Type *t;
|
|
|
|
Node *n;
|
|
|
|
Addr a;
|
|
|
|
Iter save;
|
|
|
|
Bits bit;
|
|
|
|
int z;
|
|
|
|
|
|
|
|
t = structfirst(&save, getoutarg(curfn->type));
|
|
|
|
while(t != T) {
|
|
|
|
n = nodarg(t, 1);
|
|
|
|
a = zprog.from;
|
|
|
|
naddr(n, &a, 0);
|
2011-01-17 21:39:26 -07:00
|
|
|
bit = mkvar(R, &a);
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
ovar.b[z] |= bit.b[z];
|
|
|
|
t = structnext(&save);
|
|
|
|
}
|
2012-10-28 13:11:21 -06:00
|
|
|
//if(bany(&ovar))
|
2011-10-03 15:46:36 -06:00
|
|
|
//print("ovar = %Q\n", ovar);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2013-08-12 20:02:10 -06:00
|
|
|
excise(Flow *r)
|
2010-11-03 18:31:07 -06:00
|
|
|
{
|
|
|
|
Prog *p;
|
|
|
|
|
|
|
|
p = r->prog;
|
|
|
|
p->as = ANOP;
|
|
|
|
p->scond = zprog.scond;
|
|
|
|
p->from = zprog.from;
|
|
|
|
p->to = zprog.to;
|
2011-01-10 14:15:52 -07:00
|
|
|
p->reg = zprog.reg;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
setaddrs(Bits bit)
|
|
|
|
{
|
|
|
|
int i, n;
|
|
|
|
Var *v;
|
2011-09-06 08:24:21 -06:00
|
|
|
Node *node;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
while(bany(&bit)) {
|
|
|
|
// convert each bit to a variable
|
|
|
|
i = bnum(bit);
|
2011-09-06 08:24:21 -06:00
|
|
|
node = var[i].node;
|
2010-11-03 18:31:07 -06:00
|
|
|
n = var[i].name;
|
|
|
|
bit.b[i/32] &= ~(1L<<(i%32));
|
|
|
|
|
|
|
|
// disable all pieces of that variable
|
|
|
|
for(i=0; i<nvar; i++) {
|
|
|
|
v = var+i;
|
2011-09-06 08:24:21 -06:00
|
|
|
if(v->node == node && v->name == n)
|
2010-11-03 18:31:07 -06:00
|
|
|
v->addr = 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
static char* regname[] = {
|
|
|
|
".R0",
|
|
|
|
".R1",
|
|
|
|
".R2",
|
|
|
|
".R3",
|
|
|
|
".R4",
|
|
|
|
".R5",
|
|
|
|
".R6",
|
|
|
|
".R7",
|
|
|
|
".R8",
|
|
|
|
".R9",
|
|
|
|
".R10",
|
|
|
|
".R11",
|
|
|
|
".R12",
|
|
|
|
".R13",
|
|
|
|
".R14",
|
|
|
|
".R15",
|
|
|
|
".F0",
|
|
|
|
".F1",
|
|
|
|
".F2",
|
|
|
|
".F3",
|
|
|
|
".F4",
|
|
|
|
".F5",
|
|
|
|
".F6",
|
|
|
|
".F7",
|
2012-06-02 10:54:28 -06:00
|
|
|
".F8",
|
|
|
|
".F9",
|
|
|
|
".F10",
|
|
|
|
".F11",
|
|
|
|
".F12",
|
|
|
|
".F13",
|
|
|
|
".F14",
|
|
|
|
".F15",
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
};
|
|
|
|
|
2012-12-09 11:10:52 -07:00
|
|
|
static Node* regnodes[NREGVAR];
|
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
void
|
|
|
|
regopt(Prog *firstp)
|
|
|
|
{
|
|
|
|
Reg *r, *r1;
|
|
|
|
Prog *p;
|
2013-08-12 20:02:10 -06:00
|
|
|
Graph *g;
|
|
|
|
int i, z;
|
2010-11-03 18:31:07 -06:00
|
|
|
uint32 vreg;
|
|
|
|
Bits bit;
|
2013-08-12 20:02:10 -06:00
|
|
|
ProgInfo info;
|
2013-08-12 11:42:23 -06:00
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
if(first) {
|
2010-11-03 18:31:07 -06:00
|
|
|
fmtinstall('Q', Qconv);
|
2013-08-12 20:02:10 -06:00
|
|
|
first = 0;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
5g, 6g, 8g: fix loop finding bug, squash jmps
The loop recognizer uses the standard dominance
frontiers but gets confused by dead code, which
has a (not explicitly set) rpo number of 0, meaning it
looks like the head of the function, so it dominates
everything. If the loop recognizer encounters dead
code while tracking backward through the graph
it fails to recognize where it started as a loop, and
then the optimizer does not registerize values loaded
inside that loop. Fix by checking rpo against rpo2r.
Separately, run a quick pass over the generated
code to squash JMPs to JMP instructions, which
are convenient to emit during code generation but
difficult to read when debugging the -S output.
A side effect of this pass is to eliminate dead code,
so the output files may be slightly smaller and the
optimizer may have less work to do.
There is no semantic effect, because the linkers
flatten JMP chains and delete dead instructions
when laying out the final code. Doing it here too
just makes the -S output easier to read and more
like what the final binary will contain.
The "dead code breaks loop finding" bug is thus
fixed twice over. It seemed prudent to fix loopit
separately just in case dead code ever sneaks back
in for one reason or another.
R=ken2
CC=golang-dev
https://golang.org/cl/5190043
2011-10-04 13:06:16 -06:00
|
|
|
|
|
|
|
fixjmp(firstp);
|
cmd/gc: add temporary-merging optimization pass
The compilers assume they can generate temporary variables
as needed to preserve the right semantics or simplify code
generation and the back end will still generate good code.
This turns out not to be true. The back ends will only
track the first 128 variables per function and give up
on the remainder. That needs to be fixed too, in a later CL.
This CL merges temporary variables with equal types and
non-overlapping lifetimes using the greedy algorithm in
Poletto and Sarkar, "Linear Scan Register Allocation",
ACM TOPLAS 1999.
The result can be striking in the right functions.
Top 20 frame size changes in a 6g godoc binary by bytes saved:
5464 1984 (-3480, -63.7%) go/build.(*Context).Import
4456 1824 (-2632, -59.1%) go/printer.(*printer).expr1
2560 80 (-2480, -96.9%) time.nextStdChunk
3496 1608 (-1888, -54.0%) go/printer.(*printer).stmt
1896 272 (-1624, -85.7%) net/http.init
2688 1400 (-1288, -47.9%) fmt.(*pp).printReflectValue
2800 1512 (-1288, -46.0%) main.main
3296 2016 (-1280, -38.8%) crypto/tls.(*Conn).clientHandshake
1664 488 (-1176, -70.7%) time.loadZoneZip
1760 608 (-1152, -65.5%) time.parse
4104 3072 (-1032, -25.1%) runtime/pprof.writeHeap
1680 712 ( -968, -57.6%) go/ast.Walk
2488 1560 ( -928, -37.3%) crypto/x509.parseCertificate
1128 392 ( -736, -65.2%) math/big.nat.divLarge
1528 864 ( -664, -43.5%) go/printer.(*printer).fieldList
1360 712 ( -648, -47.6%) regexp/syntax.(*parser).factor
2104 1528 ( -576, -27.4%) encoding/asn1.parseField
1064 504 ( -560, -52.6%) encoding/xml.(*Decoder).text
584 48 ( -536, -91.8%) html.init
1400 864 ( -536, -38.3%) go/doc.playExample
In the same godoc build, cuts the number of functions with
too many vars from 83 to 32.
R=ken2
CC=golang-dev
https://golang.org/cl/12829043
2013-08-12 22:09:31 -06:00
|
|
|
mergetemp(firstp);
|
2011-01-07 19:04:48 -07:00
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
/*
|
|
|
|
* control flow is more complicated in generated go code
|
|
|
|
* than in generated c code. define pseudo-variables for
|
|
|
|
* registers, so we have complete register usage information.
|
|
|
|
*/
|
|
|
|
nvar = NREGVAR;
|
|
|
|
memset(var, 0, NREGVAR*sizeof var[0]);
|
2012-12-09 11:10:52 -07:00
|
|
|
for(i=0; i<NREGVAR; i++) {
|
|
|
|
if(regnodes[i] == N)
|
|
|
|
regnodes[i] = newname(lookup(regname[i]));
|
|
|
|
var[i].node = regnodes[i];
|
|
|
|
}
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
|
2011-01-17 21:39:26 -07:00
|
|
|
regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
externs.b[z] = 0;
|
|
|
|
params.b[z] = 0;
|
|
|
|
consts.b[z] = 0;
|
|
|
|
addrs.b[z] = 0;
|
|
|
|
ovar.b[z] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// build list of return variables
|
|
|
|
setoutvar();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 1
|
|
|
|
* build aux data structure
|
|
|
|
* allocate pcs
|
|
|
|
* find use and set of variables
|
|
|
|
*/
|
2013-08-12 20:02:10 -06:00
|
|
|
g = flowstart(firstp, sizeof(Reg));
|
|
|
|
if(g == nil)
|
|
|
|
return;
|
|
|
|
firstr = (Reg*)g->start;
|
2013-08-12 11:42:23 -06:00
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link) {
|
|
|
|
p = r->f.prog;
|
|
|
|
proginfo(&info, p);
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2013-02-03 12:51:21 -07:00
|
|
|
// Avoid making variables for direct-called functions.
|
|
|
|
if(p->as == ABL && p->to.type == D_EXTERN)
|
|
|
|
continue;
|
|
|
|
|
2013-08-12 11:42:23 -06:00
|
|
|
if(info.flags & LeftRead) {
|
|
|
|
bit = mkvar(r, &p->from);
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->use1.b[z] |= bit.b[z];
|
|
|
|
}
|
|
|
|
|
|
|
|
if(info.flags & RegRead) {
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
if(p->from.type != D_FREG)
|
|
|
|
r->use1.b[0] |= RtoB(p->reg);
|
|
|
|
else
|
|
|
|
r->use1.b[0] |= FtoB(p->reg);
|
|
|
|
}
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2013-08-12 11:42:23 -06:00
|
|
|
if(info.flags & (RightAddr | RightRead | RightWrite)) {
|
|
|
|
bit = mkvar(r, &p->to);
|
|
|
|
if(info.flags & RightAddr)
|
|
|
|
setaddrs(bit);
|
|
|
|
if(info.flags & RightRead)
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->use2.b[z] |= bit.b[z];
|
2013-08-12 11:42:23 -06:00
|
|
|
if(info.flags & RightWrite)
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
r->set.b[z] |= bit.b[z];
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
if(firstr == R)
|
2010-11-03 18:31:07 -06:00
|
|
|
return;
|
|
|
|
|
2011-01-17 14:27:05 -07:00
|
|
|
for(i=0; i<nvar; i++) {
|
|
|
|
Var *v = var+i;
|
|
|
|
if(v->addr) {
|
|
|
|
bit = blsh(i);
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
addrs.b[z] |= bit.b[z];
|
|
|
|
}
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
|
|
|
print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
|
|
|
|
i, v->addr, v->etype, v->width, v->node, v->offset);
|
2011-01-17 14:27:05 -07:00
|
|
|
}
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass1", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* pass 2
|
|
|
|
* find looping structure
|
|
|
|
*/
|
2013-08-12 20:02:10 -06:00
|
|
|
flowrpo(g);
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass2", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* pass 3
|
|
|
|
* iterate propagating usage
|
|
|
|
* back until flow graph is complete
|
|
|
|
*/
|
|
|
|
loop1:
|
|
|
|
change = 0;
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link)
|
|
|
|
r->f.active = 0;
|
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link)
|
|
|
|
if(r->f.prog->as == ARET)
|
2010-11-03 18:31:07 -06:00
|
|
|
prop(r, zbits, zbits);
|
|
|
|
loop11:
|
|
|
|
/* pick up unreachable code */
|
|
|
|
i = 0;
|
|
|
|
for(r = firstr; r != R; r = r1) {
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.link;
|
|
|
|
if(r1 && r1->f.active && !r->f.active) {
|
2010-11-03 18:31:07 -06:00
|
|
|
prop(r, zbits, zbits);
|
|
|
|
i = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(i)
|
|
|
|
goto loop11;
|
|
|
|
if(change)
|
|
|
|
goto loop1;
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass3", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 4
|
|
|
|
* iterate propagating register/variable synchrony
|
|
|
|
* forward until graph is complete
|
|
|
|
*/
|
|
|
|
loop2:
|
|
|
|
change = 0;
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link)
|
|
|
|
r->f.active = 0;
|
2010-11-03 18:31:07 -06:00
|
|
|
synch(firstr, zbits);
|
|
|
|
if(change)
|
|
|
|
goto loop2;
|
|
|
|
|
|
|
|
addsplits();
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass4", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1) {
|
2010-11-03 18:31:07 -06:00
|
|
|
print("\nprop structure:\n");
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link) {
|
|
|
|
print("%d:%P", r->f.loop, r->f.prog);
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
bit.b[z] = r->set.b[z] |
|
|
|
|
r->refahead.b[z] | r->calahead.b[z] |
|
|
|
|
r->refbehind.b[z] | r->calbehind.b[z] |
|
|
|
|
r->use1.b[z] | r->use2.b[z];
|
2011-01-19 17:30:13 -07:00
|
|
|
bit.b[z] &= ~addrs.b[z];
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if(bany(&bit)) {
|
|
|
|
print("\t");
|
|
|
|
if(bany(&r->use1))
|
|
|
|
print(" u1=%Q", r->use1);
|
|
|
|
if(bany(&r->use2))
|
|
|
|
print(" u2=%Q", r->use2);
|
|
|
|
if(bany(&r->set))
|
|
|
|
print(" st=%Q", r->set);
|
|
|
|
if(bany(&r->refahead))
|
|
|
|
print(" ra=%Q", r->refahead);
|
|
|
|
if(bany(&r->calahead))
|
|
|
|
print(" ca=%Q", r->calahead);
|
|
|
|
if(bany(&r->refbehind))
|
|
|
|
print(" rb=%Q", r->refbehind);
|
|
|
|
if(bany(&r->calbehind))
|
|
|
|
print(" cb=%Q", r->calbehind);
|
|
|
|
}
|
|
|
|
print("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
/*
|
|
|
|
* pass 4.5
|
|
|
|
* move register pseudo-variables into regu.
|
|
|
|
*/
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link) {
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
|
|
|
|
|
|
|
|
r->set.b[0] &= ~REGBITS;
|
|
|
|
r->use1.b[0] &= ~REGBITS;
|
|
|
|
r->use2.b[0] &= ~REGBITS;
|
|
|
|
r->refbehind.b[0] &= ~REGBITS;
|
|
|
|
r->refahead.b[0] &= ~REGBITS;
|
|
|
|
r->calbehind.b[0] &= ~REGBITS;
|
|
|
|
r->calahead.b[0] &= ~REGBITS;
|
|
|
|
r->regdiff.b[0] &= ~REGBITS;
|
|
|
|
r->act.b[0] &= ~REGBITS;
|
|
|
|
}
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass4.5", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* pass 5
|
|
|
|
* isolate regions
|
|
|
|
* calculate costs (paint1)
|
|
|
|
*/
|
|
|
|
r = firstr;
|
|
|
|
if(r) {
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
|
|
|
|
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
|
2013-08-12 20:02:10 -06:00
|
|
|
if(bany(&bit) & !r->f.refset) {
|
2010-11-03 18:31:07 -06:00
|
|
|
// should never happen - all variables are preset
|
|
|
|
if(debug['w'])
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
|
|
|
|
r->f.refset = 1;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link)
|
2010-11-03 18:31:07 -06:00
|
|
|
r->act = zbits;
|
|
|
|
rgp = region;
|
|
|
|
nregion = 0;
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link) {
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = r->set.b[z] &
|
|
|
|
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
|
2013-08-12 20:02:10 -06:00
|
|
|
if(bany(&bit) && !r->f.refset) {
|
2010-11-03 18:31:07 -06:00
|
|
|
if(debug['w'])
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
|
|
|
|
r->f.refset = 1;
|
|
|
|
excise(&r->f);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
|
|
|
|
while(bany(&bit)) {
|
|
|
|
i = bnum(bit);
|
|
|
|
rgp->enter = r;
|
|
|
|
rgp->varno = i;
|
|
|
|
change = 0;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2010-11-03 18:31:07 -06:00
|
|
|
print("\n");
|
|
|
|
paint1(r, i);
|
|
|
|
bit.b[i/32] &= ~(1L<<(i%32));
|
|
|
|
if(change <= 0) {
|
|
|
|
if(debug['R'])
|
|
|
|
print("%L $%d: %Q\n",
|
2013-08-12 20:02:10 -06:00
|
|
|
r->f.prog->lineno, change, blsh(i));
|
2010-11-03 18:31:07 -06:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
rgp->cost = change;
|
|
|
|
nregion++;
|
|
|
|
if(nregion >= NRGN) {
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2010-11-03 18:31:07 -06:00
|
|
|
print("too many regions\n");
|
|
|
|
goto brk;
|
|
|
|
}
|
|
|
|
rgp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
brk:
|
|
|
|
qsort(region, nregion, sizeof(region[0]), rcmp);
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass5", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* pass 6
|
|
|
|
* determine used registers (paint2)
|
|
|
|
* replace code (paint3)
|
|
|
|
*/
|
|
|
|
rgp = region;
|
|
|
|
for(i=0; i<nregion; i++) {
|
|
|
|
bit = blsh(rgp->varno);
|
|
|
|
vreg = paint2(rgp->enter, rgp->varno);
|
|
|
|
vreg = allreg(vreg, rgp);
|
|
|
|
if(debug['R']) {
|
|
|
|
if(rgp->regno >= NREG)
|
|
|
|
print("%L $%d F%d: %Q\n",
|
2013-08-12 20:02:10 -06:00
|
|
|
rgp->enter->f.prog->lineno,
|
2010-11-03 18:31:07 -06:00
|
|
|
rgp->cost,
|
|
|
|
rgp->regno-NREG,
|
|
|
|
bit);
|
|
|
|
else
|
|
|
|
print("%L $%d R%d: %Q\n",
|
2013-08-12 20:02:10 -06:00
|
|
|
rgp->enter->f.prog->lineno,
|
2010-11-03 18:31:07 -06:00
|
|
|
rgp->cost,
|
|
|
|
rgp->regno,
|
|
|
|
bit);
|
|
|
|
}
|
|
|
|
if(rgp->regno != 0)
|
|
|
|
paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
|
|
|
|
rgp++;
|
|
|
|
}
|
2012-09-22 08:01:35 -06:00
|
|
|
|
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass6", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* pass 7
|
|
|
|
* peep-hole on basic block
|
|
|
|
*/
|
|
|
|
if(!debug['R'] || debug['P']) {
|
2013-08-12 20:02:10 -06:00
|
|
|
peep(firstp);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
2012-09-22 08:01:35 -06:00
|
|
|
if(debug['R'] && debug['v'])
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit("pass7", &firstr->f, 1);
|
2012-09-22 08:01:35 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
/*
|
|
|
|
* last pass
|
|
|
|
* eliminate nops
|
|
|
|
* free aux structures
|
2011-01-19 17:30:13 -07:00
|
|
|
* adjust the stack pointer
|
|
|
|
* MOVW.W R1,-12(R13) <<- start
|
|
|
|
* MOVW R0,R1
|
|
|
|
* MOVW R1,8(R13)
|
|
|
|
* MOVW $0,R1
|
|
|
|
* MOVW R1,4(R13)
|
|
|
|
* BL ,runtime.newproc+0(SB)
|
|
|
|
* MOVW &ft+-32(SP),R7 <<- adjust
|
|
|
|
* MOVW &j+-40(SP),R6 <<- adjust
|
|
|
|
* MOVW autotmp_0003+-24(SP),R5 <<- adjust
|
|
|
|
* MOVW $12(R13),R13 <<- finish
|
2010-11-03 18:31:07 -06:00
|
|
|
*/
|
2011-01-19 17:30:13 -07:00
|
|
|
vreg = 0;
|
2011-01-07 19:04:48 -07:00
|
|
|
for(p = firstp; p != P; p = p->link) {
|
|
|
|
while(p->link != P && p->link->as == ANOP)
|
2010-11-03 18:31:07 -06:00
|
|
|
p->link = p->link->link;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(p->to.type == D_BRANCH)
|
2012-12-13 12:20:24 -07:00
|
|
|
while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
|
|
|
|
p->to.u.branch = p->to.u.branch->link;
|
2011-01-19 17:30:13 -07:00
|
|
|
if(p->as == AMOVW && p->to.reg == 13) {
|
|
|
|
if(p->scond & C_WBIT) {
|
|
|
|
vreg = -p->to.offset; // in adjust region
|
|
|
|
// print("%P adjusting %d\n", p, vreg);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(p->from.type == D_CONST && p->to.type == D_REG) {
|
|
|
|
if(p->from.offset != vreg)
|
|
|
|
print("in and out different\n");
|
|
|
|
// print("%P finish %d\n", p, vreg);
|
|
|
|
vreg = 0; // done adjust region
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// print("%P %d %d from type\n", p, p->from.type, D_CONST);
|
|
|
|
// print("%P %d %d to type\n\n", p, p->to.type, D_REG);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(p->as == AMOVW && vreg != 0) {
|
|
|
|
if(p->from.sym != S)
|
|
|
|
if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
|
|
|
|
p->from.offset += vreg;
|
|
|
|
// print("%P adjusting from %d %d\n", p, vreg, p->from.type);
|
|
|
|
}
|
|
|
|
if(p->to.sym != S)
|
|
|
|
if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
|
|
|
|
p->to.offset += vreg;
|
|
|
|
// print("%P adjusting to %d %d\n", p, vreg, p->from.type);
|
|
|
|
}
|
|
|
|
}
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
2011-01-19 17:30:13 -07:00
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
flowend(g);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
addsplits(void)
|
|
|
|
{
|
|
|
|
Reg *r, *r1;
|
|
|
|
int z, i;
|
|
|
|
Bits bit;
|
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = firstr; r != R; r = (Reg*)r->f.link) {
|
|
|
|
if(r->f.loop > 1)
|
2010-11-03 18:31:07 -06:00
|
|
|
continue;
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r->f.prog->as == ABL)
|
2010-11-03 18:31:07 -06:00
|
|
|
continue;
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
|
|
|
|
if(r1->f.loop <= 1)
|
2010-11-03 18:31:07 -06:00
|
|
|
continue;
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] = r1->calbehind.b[z] &
|
|
|
|
(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
|
|
|
|
~(r->calahead.b[z] & addrs.b[z]);
|
|
|
|
while(bany(&bit)) {
|
|
|
|
i = bnum(bit);
|
|
|
|
bit.b[i/32] &= ~(1L << (i%32));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add mov b,rn
|
|
|
|
* just after r
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
addmove(Reg *r, int bn, int rn, int f)
|
|
|
|
{
|
2011-07-28 14:28:23 -06:00
|
|
|
Prog *p, *p1, *p2;
|
2010-11-03 18:31:07 -06:00
|
|
|
Adr *a;
|
|
|
|
Var *v;
|
|
|
|
|
|
|
|
p1 = mal(sizeof(*p1));
|
|
|
|
*p1 = zprog;
|
2013-08-12 20:02:10 -06:00
|
|
|
p = r->f.prog;
|
2011-07-28 14:28:23 -06:00
|
|
|
|
|
|
|
// If there's a stack fixup coming (after BL newproc or BL deferproc),
|
|
|
|
// delay the load until after the fixup.
|
|
|
|
p2 = p->link;
|
|
|
|
if(p2 && p2->as == AMOVW && p2->from.type == D_CONST && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == D_REG)
|
|
|
|
p = p2;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
p1->link = p->link;
|
|
|
|
p->link = p1;
|
|
|
|
p1->lineno = p->lineno;
|
|
|
|
|
|
|
|
v = var + bn;
|
|
|
|
|
|
|
|
a = &p1->to;
|
|
|
|
a->name = v->name;
|
2011-06-09 16:02:34 -06:00
|
|
|
a->node = v->node;
|
2011-09-06 08:24:21 -06:00
|
|
|
a->sym = v->node->sym;
|
2010-11-03 18:31:07 -06:00
|
|
|
a->offset = v->offset;
|
|
|
|
a->etype = v->etype;
|
|
|
|
a->type = D_OREG;
|
|
|
|
if(a->etype == TARRAY || a->sym == S)
|
|
|
|
a->type = D_CONST;
|
|
|
|
|
2011-01-17 21:39:26 -07:00
|
|
|
if(v->addr)
|
2013-06-09 07:50:24 -06:00
|
|
|
fatal("addmove: shouldn't be doing this %A\n", a);
|
2011-01-17 21:39:26 -07:00
|
|
|
|
2011-01-07 19:04:48 -07:00
|
|
|
switch(v->etype) {
|
|
|
|
default:
|
|
|
|
print("What is this %E\n", v->etype);
|
|
|
|
|
|
|
|
case TINT8:
|
2013-08-08 22:43:17 -06:00
|
|
|
p1->as = AMOVBS;
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
2011-01-15 17:55:47 -07:00
|
|
|
case TBOOL:
|
|
|
|
case TUINT8:
|
2011-07-28 16:22:12 -06:00
|
|
|
//print("movbu %E %d %S\n", v->etype, bn, v->sym);
|
2011-01-15 17:55:47 -07:00
|
|
|
p1->as = AMOVBU;
|
|
|
|
break;
|
2011-01-07 19:04:48 -07:00
|
|
|
case TINT16:
|
2013-08-08 22:43:17 -06:00
|
|
|
p1->as = AMOVHS;
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
2011-01-15 17:55:47 -07:00
|
|
|
case TUINT16:
|
|
|
|
p1->as = AMOVHU;
|
|
|
|
break;
|
|
|
|
case TINT32:
|
|
|
|
case TUINT32:
|
|
|
|
case TPTR32:
|
|
|
|
p1->as = AMOVW;
|
|
|
|
break;
|
2011-01-07 19:04:48 -07:00
|
|
|
case TFLOAT32:
|
2010-11-03 18:31:07 -06:00
|
|
|
p1->as = AMOVF;
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
|
|
|
case TFLOAT64:
|
2010-11-03 18:31:07 -06:00
|
|
|
p1->as = AMOVD;
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
|
|
|
}
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
p1->from.type = D_REG;
|
|
|
|
p1->from.reg = rn;
|
|
|
|
if(rn >= NREG) {
|
|
|
|
p1->from.type = D_FREG;
|
|
|
|
p1->from.reg = rn-NREG;
|
|
|
|
}
|
|
|
|
if(!f) {
|
|
|
|
p1->from = *a;
|
|
|
|
*a = zprog.from;
|
|
|
|
a->type = D_REG;
|
|
|
|
a->reg = rn;
|
|
|
|
if(rn >= NREG) {
|
|
|
|
a->type = D_FREG;
|
|
|
|
a->reg = rn-NREG;
|
|
|
|
}
|
2011-01-15 17:55:47 -07:00
|
|
|
if(v->etype == TUINT8 || v->etype == TBOOL)
|
2010-11-03 18:31:07 -06:00
|
|
|
p1->as = AMOVBU;
|
|
|
|
if(v->etype == TUINT16)
|
|
|
|
p1->as = AMOVHU;
|
|
|
|
}
|
|
|
|
if(debug['R'])
|
|
|
|
print("%P\t.a%P\n", p, p1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
overlap(int32 o1, int w1, int32 o2, int w2)
|
|
|
|
{
|
|
|
|
int32 t1, t2;
|
|
|
|
|
|
|
|
t1 = o1+w1;
|
|
|
|
t2 = o2+w2;
|
|
|
|
|
|
|
|
if(!(t1 > o2 && t2 > o1))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Bits
|
2011-01-17 21:39:26 -07:00
|
|
|
mkvar(Reg *r, Adr *a)
|
2010-11-03 18:31:07 -06:00
|
|
|
{
|
|
|
|
Var *v;
|
|
|
|
int i, t, n, et, z, w, flag;
|
|
|
|
int32 o;
|
|
|
|
Bits bit;
|
2011-09-06 08:24:21 -06:00
|
|
|
Node *node;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2011-01-07 19:04:48 -07:00
|
|
|
// mark registers used
|
2010-11-03 18:31:07 -06:00
|
|
|
t = a->type;
|
2011-01-07 19:04:48 -07:00
|
|
|
|
2011-01-17 14:27:05 -07:00
|
|
|
flag = 0;
|
2011-01-07 19:04:48 -07:00
|
|
|
switch(t) {
|
|
|
|
default:
|
|
|
|
print("type %d %d %D\n", t, a->name, a);
|
|
|
|
goto none;
|
|
|
|
|
2011-01-10 14:15:52 -07:00
|
|
|
case D_NONE:
|
2011-01-07 19:04:48 -07:00
|
|
|
case D_FCONST:
|
|
|
|
case D_BRANCH:
|
2011-01-16 16:25:13 -07:00
|
|
|
break;
|
2011-01-07 19:04:48 -07:00
|
|
|
|
2011-01-17 14:27:05 -07:00
|
|
|
case D_CONST:
|
|
|
|
flag = 1;
|
|
|
|
goto onereg;
|
|
|
|
|
2011-01-07 19:04:48 -07:00
|
|
|
case D_REGREG:
|
2012-06-07 12:42:28 -06:00
|
|
|
case D_REGREG2:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
bit = zbits;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(a->offset != NREG)
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
bit.b[0] |= RtoB(a->offset);
|
|
|
|
if(a->reg != NREG)
|
|
|
|
bit.b[0] |= RtoB(a->reg);
|
|
|
|
return bit;
|
2011-01-07 19:04:48 -07:00
|
|
|
|
|
|
|
case D_REG:
|
|
|
|
case D_SHIFT:
|
2011-01-17 14:27:05 -07:00
|
|
|
onereg:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
if(a->reg != NREG) {
|
|
|
|
bit = zbits;
|
|
|
|
bit.b[0] = RtoB(a->reg);
|
|
|
|
return bit;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case D_OREG:
|
|
|
|
if(a->reg != NREG) {
|
2013-08-12 20:02:10 -06:00
|
|
|
if(a == &r->f.prog->from)
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->use1.b[0] |= RtoB(a->reg);
|
|
|
|
else
|
|
|
|
r->use2.b[0] |= RtoB(a->reg);
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r->f.prog->scond & (C_PBIT|C_WBIT))
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
r->set.b[0] |= RtoB(a->reg);
|
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case D_FREG:
|
8g: compute register liveness during regopt
Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/4529106
2011-06-03 12:10:39 -06:00
|
|
|
if(a->reg != NREG) {
|
|
|
|
bit = zbits;
|
|
|
|
bit.b[0] = FtoB(a->reg);
|
|
|
|
return bit;
|
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
|
|
|
|
switch(a->name) {
|
|
|
|
default:
|
|
|
|
goto none;
|
|
|
|
|
|
|
|
case D_EXTERN:
|
|
|
|
case D_STATIC:
|
|
|
|
case D_AUTO:
|
|
|
|
case D_PARAM:
|
|
|
|
n = a->name;
|
|
|
|
break;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
|
2011-09-06 08:24:21 -06:00
|
|
|
node = a->node;
|
2011-10-03 15:46:36 -06:00
|
|
|
if(node == N || node->op != ONAME || node->orig == N)
|
2011-01-07 19:04:48 -07:00
|
|
|
goto none;
|
2011-09-06 08:24:21 -06:00
|
|
|
node = node->orig;
|
2011-10-03 15:46:36 -06:00
|
|
|
if(node->orig != node)
|
|
|
|
fatal("%D: bad node", a);
|
|
|
|
if(node->sym == S || node->sym->name[0] == '.')
|
2011-01-07 19:04:48 -07:00
|
|
|
goto none;
|
|
|
|
et = a->etype;
|
|
|
|
o = a->offset;
|
|
|
|
w = a->width;
|
2012-09-22 08:01:35 -06:00
|
|
|
if(w < 0)
|
|
|
|
fatal("bad width %d for %D", w, a);
|
2011-01-07 19:04:48 -07:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
for(i=0; i<nvar; i++) {
|
|
|
|
v = var+i;
|
2011-09-06 08:24:21 -06:00
|
|
|
if(v->node == node && v->name == n) {
|
2010-11-03 18:31:07 -06:00
|
|
|
if(v->offset == o)
|
|
|
|
if(v->etype == et)
|
|
|
|
if(v->width == w)
|
2011-01-07 19:04:48 -07:00
|
|
|
if(!flag)
|
|
|
|
return blsh(i);
|
2010-11-03 18:31:07 -06:00
|
|
|
|
2011-07-28 16:22:12 -06:00
|
|
|
// if they overlap, disable both
|
2010-11-03 18:31:07 -06:00
|
|
|
if(overlap(v->offset, v->width, o, w)) {
|
|
|
|
v->addr = 1;
|
|
|
|
flag = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(et) {
|
|
|
|
case 0:
|
|
|
|
case TFUNC:
|
|
|
|
goto none;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(nvar >= NVAR) {
|
2011-09-06 08:24:21 -06:00
|
|
|
if(debug['w'] > 1 && node)
|
2010-11-03 18:31:07 -06:00
|
|
|
fatal("variable not optimized: %D", a);
|
|
|
|
goto none;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = nvar;
|
|
|
|
nvar++;
|
2011-07-28 16:22:12 -06:00
|
|
|
//print("var %d %E %D %S\n", i, et, a, s);
|
2010-11-03 18:31:07 -06:00
|
|
|
v = var+i;
|
|
|
|
v->offset = o;
|
|
|
|
v->name = n;
|
|
|
|
v->etype = et;
|
|
|
|
v->width = w;
|
|
|
|
v->addr = flag; // funny punning
|
2011-09-06 08:24:21 -06:00
|
|
|
v->node = node;
|
2011-06-09 16:02:34 -06:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
if(debug['R'])
|
2012-09-24 15:44:00 -06:00
|
|
|
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
bit = blsh(i);
|
|
|
|
if(n == D_EXTERN || n == D_STATIC)
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
externs.b[z] |= bit.b[z];
|
|
|
|
if(n == D_PARAM)
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
params.b[z] |= bit.b[z];
|
|
|
|
|
|
|
|
return bit;
|
|
|
|
|
|
|
|
none:
|
|
|
|
return zbits;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
prop(Reg *r, Bits ref, Bits cal)
|
|
|
|
{
|
|
|
|
Reg *r1, *r2;
|
|
|
|
int z;
|
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
ref.b[z] |= r1->refahead.b[z];
|
|
|
|
if(ref.b[z] != r1->refahead.b[z]) {
|
|
|
|
r1->refahead.b[z] = ref.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
cal.b[z] |= r1->calahead.b[z];
|
|
|
|
if(cal.b[z] != r1->calahead.b[z]) {
|
|
|
|
r1->calahead.b[z] = cal.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
}
|
2013-08-12 20:02:10 -06:00
|
|
|
switch(r1->f.prog->as) {
|
2010-11-03 18:31:07 -06:00
|
|
|
case ABL:
|
2013-08-12 20:02:10 -06:00
|
|
|
if(noreturn(r1->f.prog))
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
cal.b[z] |= ref.b[z] | externs.b[z];
|
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ATEXT:
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
cal.b[z] = 0;
|
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ARET:
|
|
|
|
for(z=0; z<BITS; z++) {
|
2011-01-07 19:04:48 -07:00
|
|
|
cal.b[z] = externs.b[z] | ovar.b[z];
|
2010-11-03 18:31:07 -06:00
|
|
|
ref.b[z] = 0;
|
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
break;
|
2012-02-20 11:41:44 -07:00
|
|
|
|
|
|
|
default:
|
|
|
|
// Work around for issue 1304:
|
|
|
|
// flush modified globals before each instruction.
|
2013-01-04 09:07:21 -07:00
|
|
|
for(z=0; z<BITS; z++) {
|
2012-02-20 11:41:44 -07:00
|
|
|
cal.b[z] |= externs.b[z];
|
2013-01-04 09:07:21 -07:00
|
|
|
// issue 4066: flush modified return variables in case of panic
|
|
|
|
if(hasdefer)
|
|
|
|
cal.b[z] |= ovar.b[z];
|
|
|
|
}
|
2012-02-20 11:41:44 -07:00
|
|
|
break;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
|
|
|
|
r1->use1.b[z] | r1->use2.b[z];
|
|
|
|
cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
|
|
|
|
r1->refbehind.b[z] = ref.b[z];
|
|
|
|
r1->calbehind.b[z] = cal.b[z];
|
|
|
|
}
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r1->f.active)
|
2010-11-03 18:31:07 -06:00
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1->f.active = 1;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
2013-08-12 20:02:10 -06:00
|
|
|
for(; r != r1; r = (Reg*)r->f.p1)
|
|
|
|
for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
|
2010-11-03 18:31:07 -06:00
|
|
|
prop(r2, r->refbehind, r->calbehind);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
synch(Reg *r, Bits dif)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int z;
|
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++) {
|
|
|
|
dif.b[z] = (dif.b[z] &
|
|
|
|
~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
|
|
|
|
r1->set.b[z] | r1->regdiff.b[z];
|
|
|
|
if(dif.b[z] != r1->regdiff.b[z]) {
|
|
|
|
r1->regdiff.b[z] = dif.b[z];
|
|
|
|
change++;
|
|
|
|
}
|
|
|
|
}
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r1->f.active)
|
2010-11-03 18:31:07 -06:00
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1->f.active = 1;
|
2010-11-03 18:31:07 -06:00
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r1->f.s2 != nil)
|
|
|
|
synch((Reg*)r1->f.s2, dif);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
allreg(uint32 b, Rgn *r)
|
|
|
|
{
|
|
|
|
Var *v;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
v = var + r->varno;
|
|
|
|
r->regno = 0;
|
|
|
|
switch(v->etype) {
|
|
|
|
|
|
|
|
default:
|
|
|
|
fatal("unknown etype %d/%E", bitno(b), v->etype);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TINT8:
|
|
|
|
case TUINT8:
|
|
|
|
case TINT16:
|
|
|
|
case TUINT16:
|
|
|
|
case TINT32:
|
|
|
|
case TUINT32:
|
|
|
|
case TINT:
|
|
|
|
case TUINT:
|
|
|
|
case TUINTPTR:
|
|
|
|
case TBOOL:
|
|
|
|
case TPTR32:
|
|
|
|
i = BtoR(~b);
|
|
|
|
if(i && r->cost >= 0) {
|
|
|
|
r->regno = i;
|
|
|
|
return RtoB(i);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TFLOAT32:
|
|
|
|
case TFLOAT64:
|
|
|
|
i = BtoF(~b);
|
|
|
|
if(i && r->cost >= 0) {
|
|
|
|
r->regno = i+NREG;
|
|
|
|
return FtoB(i);
|
|
|
|
}
|
|
|
|
break;
|
2011-01-10 14:15:52 -07:00
|
|
|
|
|
|
|
case TINT64:
|
|
|
|
case TUINT64:
|
|
|
|
case TPTR64:
|
|
|
|
case TINTER:
|
|
|
|
case TSTRUCT:
|
|
|
|
case TARRAY:
|
|
|
|
break;
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
paint1(Reg *r, int bn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
Prog *p;
|
|
|
|
int z;
|
|
|
|
uint32 bb;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L<<(bn%32);
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
return;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.p1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(r1->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
|
2013-08-12 20:02:10 -06:00
|
|
|
change -= CLOAD * r->f.loop;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%d%P\td %Q $%d\n", r->f.loop,
|
|
|
|
r->f.prog, blsh(bn), change);
|
2010-11-03 18:31:07 -06:00
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] |= bb;
|
2013-08-12 20:02:10 -06:00
|
|
|
p = r->f.prog;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
if(r->use1.b[z] & bb) {
|
2013-08-12 20:02:10 -06:00
|
|
|
change += CREF * r->f.loop;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%d%P\tu1 %Q $%d\n", r->f.loop,
|
2010-11-03 18:31:07 -06:00
|
|
|
p, blsh(bn), change);
|
|
|
|
}
|
|
|
|
|
|
|
|
if((r->use2.b[z]|r->set.b[z]) & bb) {
|
2013-08-12 20:02:10 -06:00
|
|
|
change += CREF * r->f.loop;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%d%P\tu2 %Q $%d\n", r->f.loop,
|
2010-11-03 18:31:07 -06:00
|
|
|
p, blsh(bn), change);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(STORE(r) & r->regdiff.b[z] & bb) {
|
2013-08-12 20:02:10 -06:00
|
|
|
change -= CLOAD * r->f.loop;
|
2011-01-07 19:04:48 -07:00
|
|
|
if(debug['R'] > 1)
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%d%P\tst %Q $%d\n", r->f.loop,
|
2010-11-03 18:31:07 -06:00
|
|
|
p, blsh(bn), change);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
paint1(r1, bn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.s2;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
paint1(r1, bn);
|
2013-08-12 20:02:10 -06:00
|
|
|
r = (Reg*)r->f.s1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
paint2(Reg *r, int bn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
int z;
|
|
|
|
uint32 bb, vreg;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L << (bn%32);
|
|
|
|
vreg = regbits;
|
|
|
|
if(!(r->act.b[z] & bb))
|
|
|
|
return vreg;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.p1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(!(r1->act.b[z] & bb))
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] &= ~bb;
|
|
|
|
|
|
|
|
vreg |= r->regu;
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
vreg |= paint2(r1, bn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.s2;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
vreg |= paint2(r1, bn);
|
2013-08-12 20:02:10 -06:00
|
|
|
r = (Reg*)r->f.s1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(!(r->act.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return vreg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
paint3(Reg *r, int bn, int32 rb, int rn)
|
|
|
|
{
|
|
|
|
Reg *r1;
|
|
|
|
Prog *p;
|
|
|
|
int z;
|
|
|
|
uint32 bb;
|
|
|
|
|
|
|
|
z = bn/32;
|
|
|
|
bb = 1L << (bn%32);
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
return;
|
|
|
|
for(;;) {
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.p1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 == R)
|
|
|
|
break;
|
|
|
|
if(!(r1->refahead.b[z] & bb))
|
|
|
|
break;
|
|
|
|
if(r1->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
r = r1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
|
|
|
|
addmove(r, bn, rn, 0);
|
2011-01-17 21:39:26 -07:00
|
|
|
|
2010-11-03 18:31:07 -06:00
|
|
|
for(;;) {
|
|
|
|
r->act.b[z] |= bb;
|
2013-08-12 20:02:10 -06:00
|
|
|
p = r->f.prog;
|
2010-11-03 18:31:07 -06:00
|
|
|
|
|
|
|
if(r->use1.b[z] & bb) {
|
|
|
|
if(debug['R'])
|
|
|
|
print("%P", p);
|
|
|
|
addreg(&p->from, rn);
|
|
|
|
if(debug['R'])
|
|
|
|
print("\t.c%P\n", p);
|
|
|
|
}
|
|
|
|
if((r->use2.b[z]|r->set.b[z]) & bb) {
|
|
|
|
if(debug['R'])
|
|
|
|
print("%P", p);
|
|
|
|
addreg(&p->to, rn);
|
|
|
|
if(debug['R'])
|
|
|
|
print("\t.c%P\n", p);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(STORE(r) & r->regdiff.b[z] & bb)
|
|
|
|
addmove(r, bn, rn, 1);
|
|
|
|
r->regu |= rb;
|
|
|
|
|
|
|
|
if(r->refbehind.b[z] & bb)
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1->refahead.b[z] & bb)
|
|
|
|
paint3(r1, bn, rb, rn);
|
|
|
|
|
|
|
|
if(!(r->refahead.b[z] & bb))
|
|
|
|
break;
|
2013-08-12 20:02:10 -06:00
|
|
|
r1 = (Reg*)r->f.s2;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r1 != R)
|
|
|
|
if(r1->refbehind.b[z] & bb)
|
|
|
|
paint3(r1, bn, rb, rn);
|
2013-08-12 20:02:10 -06:00
|
|
|
r = (Reg*)r->f.s1;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(r == R)
|
|
|
|
break;
|
|
|
|
if(r->act.b[z] & bb)
|
|
|
|
break;
|
|
|
|
if(!(r->refbehind.b[z] & bb))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
addreg(Adr *a, int rn)
|
|
|
|
{
|
|
|
|
a->sym = 0;
|
|
|
|
a->name = D_NONE;
|
|
|
|
a->type = D_REG;
|
|
|
|
a->reg = rn;
|
|
|
|
if(rn >= NREG) {
|
|
|
|
a->type = D_FREG;
|
|
|
|
a->reg = rn-NREG;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bit reg
|
|
|
|
* 0 R0
|
|
|
|
* 1 R1
|
|
|
|
* ... ...
|
|
|
|
* 10 R10
|
2012-06-02 10:54:28 -06:00
|
|
|
* 12 R12
|
2010-11-03 18:31:07 -06:00
|
|
|
*/
|
|
|
|
int32
|
|
|
|
RtoB(int r)
|
|
|
|
{
|
2012-06-02 10:54:28 -06:00
|
|
|
if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12
|
2010-11-03 18:31:07 -06:00
|
|
|
return 0;
|
|
|
|
return 1L << r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
BtoR(int32 b)
|
|
|
|
{
|
2012-06-02 10:54:28 -06:00
|
|
|
b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12
|
2010-11-03 18:31:07 -06:00
|
|
|
if(b == 0)
|
|
|
|
return 0;
|
|
|
|
return bitno(b);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bit reg
|
|
|
|
* 18 F2
|
|
|
|
* 19 F3
|
|
|
|
* ... ...
|
2012-06-02 10:54:28 -06:00
|
|
|
* 31 F15
|
2010-11-03 18:31:07 -06:00
|
|
|
*/
|
|
|
|
int32
|
|
|
|
FtoB(int f)
|
|
|
|
{
|
|
|
|
|
|
|
|
if(f < 2 || f > NFREG-1)
|
|
|
|
return 0;
|
|
|
|
return 1L << (f + 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
BtoF(int32 b)
|
|
|
|
{
|
|
|
|
|
2012-06-02 10:54:28 -06:00
|
|
|
b &= 0xfffc0000L;
|
2010-11-03 18:31:07 -06:00
|
|
|
if(b == 0)
|
|
|
|
return 0;
|
|
|
|
return bitno(b) - 16;
|
|
|
|
}
|
2011-01-07 19:04:48 -07:00
|
|
|
|
2011-02-09 14:13:17 -07:00
|
|
|
void
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpone(Flow *f, int isreg)
|
2011-02-09 14:13:17 -07:00
|
|
|
{
|
|
|
|
int z;
|
|
|
|
Bits bit;
|
2013-08-12 20:02:10 -06:00
|
|
|
Reg *r;
|
2011-02-09 14:13:17 -07:00
|
|
|
|
2013-08-12 20:02:10 -06:00
|
|
|
print("%d:%P", f->loop, f->prog);
|
|
|
|
if(isreg) {
|
|
|
|
r = (Reg*)f;
|
|
|
|
for(z=0; z<BITS; z++)
|
|
|
|
bit.b[z] =
|
|
|
|
r->set.b[z] |
|
|
|
|
r->use1.b[z] |
|
|
|
|
r->use2.b[z] |
|
|
|
|
r->refbehind.b[z] |
|
|
|
|
r->refahead.b[z] |
|
|
|
|
r->calbehind.b[z] |
|
|
|
|
r->calahead.b[z] |
|
|
|
|
r->regdiff.b[z] |
|
|
|
|
r->act.b[z] |
|
|
|
|
0;
|
|
|
|
if(bany(&bit)) {
|
|
|
|
print("\t");
|
|
|
|
if(bany(&r->set))
|
|
|
|
print(" s:%Q", r->set);
|
|
|
|
if(bany(&r->use1))
|
|
|
|
print(" u1:%Q", r->use1);
|
|
|
|
if(bany(&r->use2))
|
|
|
|
print(" u2:%Q", r->use2);
|
|
|
|
if(bany(&r->refbehind))
|
|
|
|
print(" rb:%Q ", r->refbehind);
|
|
|
|
if(bany(&r->refahead))
|
|
|
|
print(" ra:%Q ", r->refahead);
|
|
|
|
if(bany(&r->calbehind))
|
|
|
|
print(" cb:%Q ", r->calbehind);
|
|
|
|
if(bany(&r->calahead))
|
|
|
|
print(" ca:%Q ", r->calahead);
|
|
|
|
if(bany(&r->regdiff))
|
|
|
|
print(" d:%Q ", r->regdiff);
|
|
|
|
if(bany(&r->act))
|
|
|
|
print(" a:%Q ", r->act);
|
|
|
|
}
|
2011-06-20 12:18:04 -06:00
|
|
|
}
|
2011-02-09 14:13:17 -07:00
|
|
|
print("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2013-08-12 20:02:10 -06:00
|
|
|
dumpit(char *str, Flow *r0, int isreg)
|
2011-02-09 14:13:17 -07:00
|
|
|
{
|
2013-08-12 20:02:10 -06:00
|
|
|
Flow *r, *r1;
|
2011-02-09 14:13:17 -07:00
|
|
|
|
|
|
|
print("\n%s\n", str);
|
2013-08-12 20:02:10 -06:00
|
|
|
for(r = r0; r != nil; r = r->link) {
|
|
|
|
dumpone(r, isreg);
|
2011-02-09 14:13:17 -07:00
|
|
|
r1 = r->p2;
|
2013-08-12 20:02:10 -06:00
|
|
|
if(r1 != nil) {
|
2011-02-09 14:13:17 -07:00
|
|
|
print(" pred:");
|
2013-08-12 20:02:10 -06:00
|
|
|
for(; r1 != nil; r1 = r1->p2link)
|
2011-02-09 14:13:17 -07:00
|
|
|
print(" %.4ud", r1->prog->loc);
|
|
|
|
print("\n");
|
|
|
|
}
|
|
|
|
// r1 = r->s1;
|
2013-08-12 20:02:10 -06:00
|
|
|
// if(r1 != nil) {
|
2011-02-09 14:13:17 -07:00
|
|
|
// print(" succ:");
|
|
|
|
// for(; r1 != R; r1 = r1->s1)
|
|
|
|
// print(" %.4ud", r1->prog->loc);
|
|
|
|
// print("\n");
|
|
|
|
// }
|
|
|
|
}
|
|
|
|
}
|