go/src/cmd/gc/popt.c

// Derived from Inferno utils/6c/reg.c
// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
//
//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
//	Portions Copyright © 1997-1999 Vita Nuova Limited
//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
//	Portions Copyright © 2004,2006 Bruce Ellis
//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
//	Portions Copyright © 2009 The Go Authors.  All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

// "Portable" optimizations.
// Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h.
// Must code to the intersection of the three back ends.

#include	<u.h>
#include	<libc.h>
#include	"gg.h"
#include	"opt.h"

// p is a call instruction. Does the call fail to return?
int
noreturn(Prog *p)
{
	Sym *s;
	int i;
	static Sym*	symlist[10];

	if(symlist[0] == S) {
		symlist[0] = pkglookup("panicindex", runtimepkg);
		symlist[1] = pkglookup("panicslice", runtimepkg);
		symlist[2] = pkglookup("throwinit", runtimepkg);
		symlist[3] = pkglookup("panic", runtimepkg);
		symlist[4] = pkglookup("panicwrap", runtimepkg);
	}

	s = p->to.sym;
	if(s == S)
		return 0;
	for(i=0; symlist[i]!=S; i++)
		if(s == symlist[i])
			return 1;
	return 0;
}

// JMP chasing and removal.
//
// The code generator depends on being able to write out jump
// instructions that it can jump to now but fill in later.
// the linker will resolve them nicely, but they make the code
// longer and more difficult to follow during debugging.
// Remove them.

/* what instruction does a JMP to p eventually land on? */
static Prog*
chasejmp(Prog *p, int *jmploop)
{
	int n;

	n = 0;
	while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
		if(++n > 10) {
			*jmploop = 1;
			break;
		}
		p = p->to.u.branch;
	}
	return p;
}

/*
 * reuse reg pointer for mark/sweep state.
 * leave reg==nil at end because alive==nil.
 */
#define alive ((void*)0)
#define dead ((void*)1)

/* mark all code reachable from firstp as alive */
static void
mark(Prog *firstp)
{
	Prog *p;
	
	for(p=firstp; p; p=p->link) {
		if(p->opt != dead)
			break;
		p->opt = alive;
		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch)
			mark(p->to.u.branch);
		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
			break;
	}
}

void
fixjmp(Prog *firstp)
{
	int jmploop;
	Prog *p, *last;
	
	if(debug['R'] && debug['v'])
		print("\nfixjmp\n");

	// pass 1: resolve jump to jump, mark all code as dead.
	jmploop = 0;
	for(p=firstp; p; p=p->link) {
		if(debug['R'] && debug['v'])
			print("%P\n", p);
		if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {
			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
			if(debug['R'] && debug['v'])
				print("->%P\n", p);
		}
		p->opt = dead;
	}
	if(debug['R'] && debug['v'])
		print("\n");

	// pass 2: mark all reachable code alive
	mark(firstp);
	
	// pass 3: delete dead code (mostly JMPs).
	last = nil;
	for(p=firstp; p; p=p->link) {
		if(p->opt == dead) {
			if(p->link == P && p->as == ARET && last && last->as != ARET) {
				// This is the final ARET, and the code so far doesn't have one.
				// Let it stay.
			} else {
				if(debug['R'] && debug['v'])
					print("del %P\n", p);
				continue;
			}
		}
		if(last)
			last->link = p;
		last = p;
	}
	last->link = P;
	
	// pass 4: elide JMP to next instruction.
	// only safe if there are no jumps to JMPs anymore.
	if(!jmploop) {
		last = nil;
		for(p=firstp; p; p=p->link) {
			if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) {
				if(debug['R'] && debug['v'])
					print("del %P\n", p);
				continue;
			}
			if(last)
				last->link = p;
			last = p;
		}
		last->link = P;
	}
	
	if(debug['R'] && debug['v']) {
		print("\n");
		for(p=firstp; p; p=p->link)
			print("%P\n", p);
		print("\n");
	}
}

// Control flow analysis. The Flow structures hold predecessor and successor
// information as well as basic loop analysis.
//
//	graph = flowstart(firstp, sizeof(Flow));
//	... use flow graph ...
//	flowend(graph); // free graph
//
// Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
//
//	for(f = graph->start; f != nil; f = f->link)
//
// or, given an instruction f, to iterate over all the predecessors, which is
// f->p1 and this list:
//
//	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
//	
// Often the Flow struct is embedded as the first field inside a larger struct S.
// In that case casts are needed to convert Flow* to S* in many places but the
// idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.

Graph*
flowstart(Prog *firstp, int size)
{
	int nf;
	Flow *f, *f1, *start, *last;
	Graph *graph;
	Prog *p;
	ProgInfo info;

	// Count and mark instructions to annotate.
	nf = 0;
	for(p = firstp; p != P; p = p->link) {
		p->opt = nil; // should be already, but just in case
		proginfo(&info, p);
		if(info.flags & Skip)
			continue;
		p->opt = (void*)1;
		nf++;
	}
	
	if(nf == 0)
		return nil;

	if(nf >= 20000) {
		// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
		return nil;
	}

	// Allocate annotations and assign to instructions.
	graph = calloc(sizeof *graph + size*nf, 1);
	if(graph == nil)
		fatal("out of memory");
	start = (Flow*)(graph+1);
	last = nil;
	f = start;
	for(p = firstp; p != P; p = p->link) {
		if(p->opt == nil)
			continue;
		p->opt = f;
		f->prog = p;
		if(last)
			last->link = f;
		last = f;
		
		f = (Flow*)((uchar*)f + size);
	}

	// Fill in pred/succ information.
	for(f = start; f != nil; f = f->link) {
		p = f->prog;
		proginfo(&info, p);
		if(!(info.flags & Break)) {
			f1 = f->link;
			f->s1 = f1;
			f1->p1 = f;
		}
		if(p->to.type == D_BRANCH) {
			if(p->to.u.branch == P)
				fatal("pnil %P", p);
			f1 = p->to.u.branch->opt;
			if(f1 == nil)
				fatal("fnil %P / %P", p, p->to.u.branch);
			if(f1 == f) {
				//fatal("self loop %P", p);
				continue;
			}
			f->s2 = f1;
			f->p2link = f1->p2;
			f1->p2 = f;
		}
	}
	
	graph->start = start;
	graph->num = nf;
	return graph;
}

void
flowend(Graph *graph)
{
	Flow *f;
	
	for(f = graph->start; f != nil; f = f->link)
		f->prog->opt = nil;
	free(graph);
}

/*
 * find looping structure
 *
 * 1) find reverse postordering
 * 2) find approximate dominators,
 *	the actual dominators if the flow graph is reducible
 *	otherwise, dominators plus some other non-dominators.
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
 *	Oct. 1-3, 1973, pp.  207-217.
 * 3) find all nodes with a predecessor dominated by the current node.
 *	such a node is a loop head.
 *	recursively, all preds with a greater rpo number are in the loop
 */
static int32
postorder(Flow *r, Flow **rpo2r, int32 n)
{
	Flow *r1;

	r->rpo = 1;
	r1 = r->s1;
	if(r1 && !r1->rpo)
		n = postorder(r1, rpo2r, n);
	r1 = r->s2;
	if(r1 && !r1->rpo)
		n = postorder(r1, rpo2r, n);
	rpo2r[n] = r;
	n++;
	return n;
}

static int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
	int32 t;

	if(rpo1 == -1)
		return rpo2;
	while(rpo1 != rpo2){
		if(rpo1 > rpo2){
			t = rpo2;
			rpo2 = rpo1;
			rpo1 = t;
		}
		while(rpo1 < rpo2){
			t = idom[rpo2];
			if(t >= rpo2)
				fatal("bad idom");
			rpo2 = t;
		}
	}
	return rpo1;
}

static int
doms(int32 *idom, int32 r, int32 s)
{
	while(s > r)
		s = idom[s];
	return s == r;
}

static int
loophead(int32 *idom, Flow *r)
{
	int32 src;

	src = r->rpo;
	if(r->p1 != nil && doms(idom, src, r->p1->rpo))
		return 1;
	for(r = r->p2; r != nil; r = r->p2link)
		if(doms(idom, src, r->rpo))
			return 1;
	return 0;
}

static void
loopmark(Flow **rpo2r, int32 head, Flow *r)
{
	if(r->rpo < head || r->active == head)
		return;
	r->active = head;
	r->loop += LOOP;
	if(r->p1 != nil)
		loopmark(rpo2r, head, r->p1);
	for(r = r->p2; r != nil; r = r->p2link)
		loopmark(rpo2r, head, r);
}

void
flowrpo(Graph *g)
{
	Flow *r1;
	int32 i, d, me, nr, *idom;
	Flow **rpo2r;

	free(g->rpo);
	g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
	idom = calloc(g->num*sizeof idom[0], 1);
	if(g->rpo == nil || idom == nil)
		fatal("out of memory");

	rpo2r = g->rpo;
	d = postorder(g->start, rpo2r, 0);
	nr = g->num;
	if(d > nr)
		fatal("too many reg nodes %d %d", d, nr);
	nr = d;
	for(i = 0; i < nr / 2; i++) {
		r1 = rpo2r[i];
		rpo2r[i] = rpo2r[nr - 1 - i];
		rpo2r[nr - 1 - i] = r1;
	}
	for(i = 0; i < nr; i++)
		rpo2r[i]->rpo = i;

	idom[0] = 0;
	for(i = 0; i < nr; i++) {
		r1 = rpo2r[i];
		me = r1->rpo;
		d = -1;
		// rpo2r[r->rpo] == r protects against considering dead code,
		// which has r->rpo == 0.
		if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
			d = r1->p1->rpo;
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
				d = rpolca(idom, d, r1->rpo);
		idom[i] = d;
	}

	for(i = 0; i < nr; i++) {
		r1 = rpo2r[i];
		r1->loop++;
		if(r1->p2 != nil && loophead(idom, r1))
			loopmark(rpo2r, i, r1);
	}
	free(idom);
}

Flow*
uniqp(Flow *r)
{
	Flow *r1;

	r1 = r->p1;
	if(r1 == nil) {
		r1 = r->p2;
		if(r1 == nil || r1->p2link != nil)
			return nil;
	} else
		if(r->p2 != nil)
			return nil;
	return r1;
}

Flow*
uniqs(Flow *r)
{
	Flow *r1;

	r1 = r->s1;
	if(r1 == nil) {
		r1 = r->s2;
		if(r1 == nil)
			return nil;
	} else
		if(r->s2 != nil)
			return nil;
	return r1;
}
cmd/gc: support for "portable" optimization logic Code in gc/popt.c is compiled as part of 5g, 6g, and 8g, meaning it can use arch-specific headers but there's just one copy of the code. This is the same arrangement we use for the portable code generation logic in gc/pgen.c. Move fixjmp and noreturn there to get the ball rolling. R=ken2 CC=golang-dev https://golang.org/cl/12789043 2013-08-12 17:14:02 -06:00			`// Derived from Inferno utils/6c/reg.c`
			`// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c`
			`//`
			`// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.`
			`// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)`
			`// Portions Copyright © 1997-1999 Vita Nuova Limited`
			`// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)`
			`// Portions Copyright © 2004,2006 Bruce Ellis`
			`// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)`
			`// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others`
			`// Portions Copyright © 2009 The Go Authors. All rights reserved.`
			`//`
			`// Permission is hereby granted, free of charge, to any person obtaining a copy`
			`// of this software and associated documentation files (the "Software"), to deal`
			`// in the Software without restriction, including without limitation the rights`
			`// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`// copies of the Software, and to permit persons to whom the Software is`
			`// furnished to do so, subject to the following conditions:`
			`//`
			`// The above copyright notice and this permission notice shall be included in`
			`// all copies or substantial portions of the Software.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`// THE SOFTWARE.`

			`// "Portable" optimizations.`
			`// Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h.`
			`// Must code to the intersection of the three back ends.`

			`#include <u.h>`
			`#include <libc.h>`
			`#include "gg.h"`
			`#include "opt.h"`

			`// p is a call instruction. Does the call fail to return?`
			`int`
			`noreturn(Prog *p)`
			`{`
			`Sym *s;`
			`int i;`
			`static Sym* symlist[10];`

			`if(symlist[0] == S) {`
			`symlist[0] = pkglookup("panicindex", runtimepkg);`
			`symlist[1] = pkglookup("panicslice", runtimepkg);`
			`symlist[2] = pkglookup("throwinit", runtimepkg);`
			`symlist[3] = pkglookup("panic", runtimepkg);`
			`symlist[4] = pkglookup("panicwrap", runtimepkg);`
			`}`

			`s = p->to.sym;`
			`if(s == S)`
			`return 0;`
			`for(i=0; symlist[i]!=S; i++)`
			`if(s == symlist[i])`
			`return 1;`
			`return 0;`
			`}`

			`// JMP chasing and removal.`
			`//`
			`// The code generator depends on being able to write out jump`
			`// instructions that it can jump to now but fill in later.`
			`// the linker will resolve them nicely, but they make the code`
			`// longer and more difficult to follow during debugging.`
			`// Remove them.`

			`/* what instruction does a JMP to p eventually land on? */`
			`static Prog*`
			`chasejmp(Prog p, int jmploop)`
			`{`
			`int n;`

			`n = 0;`
			`while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {`
			`if(++n > 10) {`
			`*jmploop = 1;`
			`break;`
			`}`
			`p = p->to.u.branch;`
			`}`
			`return p;`
			`}`

			`/*`
			`* reuse reg pointer for mark/sweep state.`
			`* leave reg==nil at end because alive==nil.`
			`*/`
			`#define alive ((void*)0)`
			`#define dead ((void*)1)`

			`/* mark all code reachable from firstp as alive */`
			`static void`
			`mark(Prog *firstp)`
			`{`
			`Prog *p;`

			`for(p=firstp; p; p=p->link) {`
			`if(p->opt != dead)`
			`break;`
			`p->opt = alive;`
			`if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch)`
			`mark(p->to.u.branch);`
			`if(p->as == AJMP \|\| p->as == ARET \|\| p->as == AUNDEF)`
			`break;`
			`}`
			`}`

			`void`
			`fixjmp(Prog *firstp)`
			`{`
			`int jmploop;`
			`Prog p, last;`

			`if(debug['R'] && debug['v'])`
			`print("\nfixjmp\n");`

			`// pass 1: resolve jump to jump, mark all code as dead.`
			`jmploop = 0;`
			`for(p=firstp; p; p=p->link) {`
			`if(debug['R'] && debug['v'])`
			`print("%P\n", p);`
			`if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {`
			`p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);`
			`if(debug['R'] && debug['v'])`
			`print("->%P\n", p);`
			`}`
			`p->opt = dead;`
			`}`
			`if(debug['R'] && debug['v'])`
			`print("\n");`

			`// pass 2: mark all reachable code alive`
			`mark(firstp);`

			`// pass 3: delete dead code (mostly JMPs).`
			`last = nil;`
			`for(p=firstp; p; p=p->link) {`
			`if(p->opt == dead) {`
			`if(p->link == P && p->as == ARET && last && last->as != ARET) {`
			`// This is the final ARET, and the code so far doesn't have one.`
			`// Let it stay.`
			`} else {`
			`if(debug['R'] && debug['v'])`
			`print("del %P\n", p);`
			`continue;`
			`}`
			`}`
			`if(last)`
			`last->link = p;`
			`last = p;`
			`}`
			`last->link = P;`

			`// pass 4: elide JMP to next instruction.`
			`// only safe if there are no jumps to JMPs anymore.`
			`if(!jmploop) {`
			`last = nil;`
			`for(p=firstp; p; p=p->link) {`
			`if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) {`
			`if(debug['R'] && debug['v'])`
			`print("del %P\n", p);`
			`continue;`
			`}`
			`if(last)`
			`last->link = p;`
			`last = p;`
			`}`
			`last->link = P;`
			`}`

			`if(debug['R'] && debug['v']) {`
			`print("\n");`
			`for(p=firstp; p; p=p->link)`
			`print("%P\n", p);`
			`print("\n");`
			`}`
			`}`
cmd/gc: move flow graph into portable opt Now there's only one copy of the flow graph construction and dominator computation, and different optimizations can attach different annotations to the instructions. R=ken2 CC=golang-dev https://golang.org/cl/12797045 2013-08-12 20:02:10 -06:00
			`// Control flow analysis. The Flow structures hold predecessor and successor`
			`// information as well as basic loop analysis.`
			`//`
			`// graph = flowstart(firstp, sizeof(Flow));`
			`// ... use flow graph ...`
			`// flowend(graph); // free graph`
			`//`
			`// Typical uses of the flow graph are to iterate over all the flow-relevant instructions:`
			`//`
			`// for(f = graph->start; f != nil; f = f->link)`
			`//`
			`// or, given an instruction f, to iterate over all the predecessors, which is`
			`// f->p1 and this list:`
			`//`
			`// for(f2 = f->p2; f2 != nil; f2 = f2->p2link)`
			`//`
			`// Often the Flow struct is embedded as the first field inside a larger struct S.`
			`// In that case casts are needed to convert Flow* to S* in many places but the`
			`// idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.`

			`Graph*`
			`flowstart(Prog *firstp, int size)`
			`{`
			`int nf;`
			`Flow f, f1, start, last;`
			`Graph *graph;`
			`Prog *p;`
			`ProgInfo info;`

			`// Count and mark instructions to annotate.`
			`nf = 0;`
			`for(p = firstp; p != P; p = p->link) {`
			`p->opt = nil; // should be already, but just in case`
			`proginfo(&info, p);`
			`if(info.flags & Skip)`
			`continue;`
			`p->opt = (void*)1;`
			`nf++;`
			`}`

			`if(nf == 0)`
			`return nil;`

			`if(nf >= 20000) {`
			`// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);`
			`return nil;`
			`}`

			`// Allocate annotations and assign to instructions.`
			`graph = calloc(sizeof graph + sizenf, 1);`
			`if(graph == nil)`
			`fatal("out of memory");`
			`start = (Flow*)(graph+1);`
			`last = nil;`
			`f = start;`
			`for(p = firstp; p != P; p = p->link) {`
			`if(p->opt == nil)`
			`continue;`
			`p->opt = f;`
			`f->prog = p;`
			`if(last)`
			`last->link = f;`
			`last = f;`

			`f = (Flow)((uchar)f + size);`
			`}`

			`// Fill in pred/succ information.`
			`for(f = start; f != nil; f = f->link) {`
			`p = f->prog;`
			`proginfo(&info, p);`
			`if(!(info.flags & Break)) {`
			`f1 = f->link;`
			`f->s1 = f1;`
			`f1->p1 = f;`
			`}`
			`if(p->to.type == D_BRANCH) {`
			`if(p->to.u.branch == P)`
			`fatal("pnil %P", p);`
			`f1 = p->to.u.branch->opt;`
			`if(f1 == nil)`
			`fatal("fnil %P / %P", p, p->to.u.branch);`
			`if(f1 == f) {`
			`//fatal("self loop %P", p);`
			`continue;`
			`}`
			`f->s2 = f1;`
			`f->p2link = f1->p2;`
			`f1->p2 = f;`
			`}`
			`}`

			`graph->start = start;`
			`graph->num = nf;`
			`return graph;`
			`}`

			`void`
			`flowend(Graph *graph)`
			`{`
			`Flow *f;`

			`for(f = graph->start; f != nil; f = f->link)`
			`f->prog->opt = nil;`
			`free(graph);`
			`}`

			`/*`
			`* find looping structure`
			`*`
			`* 1) find reverse postordering`
			`* 2) find approximate dominators,`
			`* the actual dominators if the flow graph is reducible`
			`* otherwise, dominators plus some other non-dominators.`
			`* See Matthew S. Hecht and Jeffrey D. Ullman,`
			`* "Analysis of a Simple Algorithm for Global Data Flow Problems",`
			`* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,`
			`* Oct. 1-3, 1973, pp. 207-217.`
			`* 3) find all nodes with a predecessor dominated by the current node.`
			`* such a node is a loop head.`
			`* recursively, all preds with a greater rpo number are in the loop`
			`*/`
			`static int32`
			`postorder(Flow r, Flow *rpo2r, int32 n)`
			`{`
			`Flow *r1;`

			`r->rpo = 1;`
			`r1 = r->s1;`
			`if(r1 && !r1->rpo)`
			`n = postorder(r1, rpo2r, n);`
			`r1 = r->s2;`
			`if(r1 && !r1->rpo)`
			`n = postorder(r1, rpo2r, n);`
			`rpo2r[n] = r;`
			`n++;`
			`return n;`
			`}`

			`static int32`
			`rpolca(int32 *idom, int32 rpo1, int32 rpo2)`
			`{`
			`int32 t;`

			`if(rpo1 == -1)`
			`return rpo2;`
			`while(rpo1 != rpo2){`
			`if(rpo1 > rpo2){`
			`t = rpo2;`
			`rpo2 = rpo1;`
			`rpo1 = t;`
			`}`
			`while(rpo1 < rpo2){`
			`t = idom[rpo2];`
			`if(t >= rpo2)`
			`fatal("bad idom");`
			`rpo2 = t;`
			`}`
			`}`
			`return rpo1;`
			`}`

			`static int`
			`doms(int32 *idom, int32 r, int32 s)`
			`{`
			`while(s > r)`
			`s = idom[s];`
			`return s == r;`
			`}`

			`static int`
			`loophead(int32 idom, Flow r)`
			`{`
			`int32 src;`

			`src = r->rpo;`
			`if(r->p1 != nil && doms(idom, src, r->p1->rpo))`
			`return 1;`
			`for(r = r->p2; r != nil; r = r->p2link)`
			`if(doms(idom, src, r->rpo))`
			`return 1;`
			`return 0;`
			`}`

			`static void`
			`loopmark(Flow *rpo2r, int32 head, Flow r)`
			`{`
			`if(r->rpo < head \|\| r->active == head)`
			`return;`
			`r->active = head;`
			`r->loop += LOOP;`
			`if(r->p1 != nil)`
			`loopmark(rpo2r, head, r->p1);`
			`for(r = r->p2; r != nil; r = r->p2link)`
			`loopmark(rpo2r, head, r);`
			`}`

			`void`
			`flowrpo(Graph *g)`
			`{`
			`Flow *r1;`
			`int32 i, d, me, nr, *idom;`
			`Flow **rpo2r;`

			`free(g->rpo);`
			`g->rpo = calloc(g->num*sizeof g->rpo[0], 1);`
			`idom = calloc(g->num*sizeof idom[0], 1);`
			`if(g->rpo == nil \|\| idom == nil)`
			`fatal("out of memory");`

			`rpo2r = g->rpo;`
			`d = postorder(g->start, rpo2r, 0);`
			`nr = g->num;`
			`if(d > nr)`
			`fatal("too many reg nodes %d %d", d, nr);`
			`nr = d;`
			`for(i = 0; i < nr / 2; i++) {`
			`r1 = rpo2r[i];`
			`rpo2r[i] = rpo2r[nr - 1 - i];`
			`rpo2r[nr - 1 - i] = r1;`
			`}`
			`for(i = 0; i < nr; i++)`
			`rpo2r[i]->rpo = i;`

			`idom[0] = 0;`
			`for(i = 0; i < nr; i++) {`
			`r1 = rpo2r[i];`
			`me = r1->rpo;`
			`d = -1;`
			`// rpo2r[r->rpo] == r protects against considering dead code,`
			`// which has r->rpo == 0.`
			`if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)`
			`d = r1->p1->rpo;`
			`for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)`
			`if(rpo2r[r1->rpo] == r1 && r1->rpo < me)`
			`d = rpolca(idom, d, r1->rpo);`
			`idom[i] = d;`
			`}`

			`for(i = 0; i < nr; i++) {`
			`r1 = rpo2r[i];`
			`r1->loop++;`
			`if(r1->p2 != nil && loophead(idom, r1))`
			`loopmark(rpo2r, i, r1);`
			`}`
			`free(idom);`
			`}`

			`Flow*`
			`uniqp(Flow *r)`
			`{`
			`Flow *r1;`

			`r1 = r->p1;`
			`if(r1 == nil) {`
			`r1 = r->p2;`
			`if(r1 == nil \|\| r1->p2link != nil)`
			`return nil;`
			`} else`
			`if(r->p2 != nil)`
			`return nil;`
			`return r1;`
			`}`

			`Flow*`
			`uniqs(Flow *r)`
			`{`
			`Flow *r1;`

			`r1 = r->s1;`
			`if(r1 == nil) {`
			`r1 = r->s2;`
			`if(r1 == nil)`
			`return nil;`
			`} else`
			`if(r->s2 != nil)`
			`return nil;`
			`return r1;`
			`}`