[dev.ssa] cmd/internal/ssa: implement more compiler passes

opt: machine-independent optimization fuse: join basic blocks lower: convert to machine-dependent opcodes critical: remove critical edges for register alloc layout: order basic blocks schedule: order values in basic blocks cgen: generate assembly output opt and lower use machine-generated matching rules using the rule generator in rulegen/ cgen will probably change in the real compiler, as we want to generate binary directly instead of ascii assembly. Change-Id: Iedd7ca70f6f55a4cde30e27cfad6a7fa05691b83 Reviewed-on: https://go-review.googlesource.com/7981 Reviewed-by: Alan Donovan <adonovan@google.com> Reviewed-by: Keith Randall <khr@golang.org>
2024-10-06 07:21:22 -06:00 · 2015-03-23 17:02:11 -07:00 · 2015-03-23 17:02:11 -07:00 · 7b96284295
commit 7b96284295
parent 7c2c0b4e53
18 changed files with 1372 additions and 23 deletions
--- a/src/cmd/internal/ssa/cgen.go
+++ b/src/cmd/internal/ssa/cgen.go
@ -0,0 +1,117 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 import "fmt"
 // cgen selects machine instructions for the function.
 // This pass generates assembly output for now, but should
 // TODO(khr): generate binary output (via liblink?) instead of text.
 func cgen(f *Func) {
 	fmt.Printf("TEXT %s(SB),0,$0\n", f.Name) // TODO: frame size / arg size
 	// TODO: prolog, allocate stack frame
 	// hack for now, until regalloc is done
 	f.RegAlloc = make([]Location, f.NumValues())
 	for idx, b := range f.Blocks {
 		fmt.Printf("%d:\n", b.ID)
 		for _, v := range b.Values {
 			asm := opcodeTable[v.Op].asm
 			fmt.Print("\t")
 			if asm == "" {
 				fmt.Print("\t")
 			}
 			for i := 0; i < len(asm); i++ {
 				switch asm[i] {
 				default:
 					fmt.Printf("%c", asm[i])
 				case '%':
 					i++
 					switch asm[i] {
 					case '%':
 						fmt.Print("%")
 					case 'I':
 						i++
 						n := asm[i] - '0'
 						if f.RegAlloc[v.Args[n].ID] != nil {
 							fmt.Print(f.RegAlloc[v.Args[n].ID].Name())
 						} else {
 							fmt.Printf("v%d", v.Args[n].ID)
 						}
 					case 'O':
 						i++
 						n := asm[i] - '0'
 						if n != 0 {
 							panic("can only handle 1 output for now")
 						}
 						if f.RegAlloc[v.ID] != nil {
 							// TODO: output tuple
 							fmt.Print(f.RegAlloc[v.ID].Name())
 						} else {
 							fmt.Printf("v%d", v.ID)
 						}
 					case 'A':
 						fmt.Print(v.Aux)
 					}
 				}
 			}
 			fmt.Println("\t; " + v.LongString())
 		}
 		// find next block in layout sequence
 		var next *Block
 		if idx < len(f.Blocks)-1 {
 			next = f.Blocks[idx+1]
 		}
 		// emit end of block code
 		// TODO: this is machine specific
 		switch b.Kind {
 		case BlockPlain:
 			if b.Succs[0] != next {
 				fmt.Printf("\tJMP\t%d\n", b.Succs[0].ID)
 			}
 		case BlockExit:
 			// TODO: run defers (if any)
 			// TODO: deallocate frame
 			fmt.Println("\tRET")
 		case BlockCall:
 			// nothing to emit - call instruction already happened
 		case BlockEQ:
 			if b.Succs[0] == next {
 				fmt.Printf("\tJNE\t%d\n", b.Succs[1].ID)
 			} else if b.Succs[1] == next {
 				fmt.Printf("\tJEQ\t%d\n", b.Succs[0].ID)
 			} else {
 				fmt.Printf("\tJEQ\t%d\n", b.Succs[0].ID)
 				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
 			}
 		case BlockNE:
 			if b.Succs[0] == next {
 				fmt.Printf("\tJEQ\t%d\n", b.Succs[1].ID)
 			} else if b.Succs[1] == next {
 				fmt.Printf("\tJNE\t%d\n", b.Succs[0].ID)
 			} else {
 				fmt.Printf("\tJNE\t%d\n", b.Succs[0].ID)
 				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
 			}
 		case BlockLT:
 			if b.Succs[0] == next {
 				fmt.Printf("\tJGE\t%d\n", b.Succs[1].ID)
 			} else if b.Succs[1] == next {
 				fmt.Printf("\tJLT\t%d\n", b.Succs[0].ID)
 			} else {
 				fmt.Printf("\tJLT\t%d\n", b.Succs[0].ID)
 				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
 			}
 		default:
 			fmt.Printf("\t%s ->", b.Kind.String())
 			for _, s := range b.Succs {
 				fmt.Printf(" %d", s.ID)
 			}
 			fmt.Printf("\n")
 		}
 	}
 }
--- a/src/cmd/internal/ssa/compile.go
+++ b/src/cmd/internal/ssa/compile.go
@ -4,7 +4,10 @@
 package ssa
-import "fmt"
+import (
 	"fmt"
 	"log"
 )
 // Compile is the main entry point for this package.
 // Compile modifies f so that on return:
@ -50,16 +53,55 @@ type pass struct {
 var passes = [...]pass{
 	{"phielim", phielim},
 	{"copyelim", copyelim},
-	//{"opt", opt},
+	{"opt", opt},
 	// cse
 	{"deadcode", deadcode},
-	//{"fuse", fuse},
+	{"fuse", fuse},
-	//{"lower", lower},
+	{"lower", lower},
 	// cse
-	//{"critical", critical}, // remove critical edges
+	{"critical", critical}, // remove critical edges
-	//{"layout", layout},     // schedule blocks
+	{"layout", layout},     // schedule blocks
-	//{"schedule", schedule}, // schedule values
+	{"schedule", schedule}, // schedule values
 	// regalloc
 	// stack slot alloc (+size stack frame)
-	//{"cgen", cgen},
+	{"cgen", cgen},
 }
 // Double-check phase ordering constraints.
 // This code is intended to document the ordering requirements
 // between different phases.  It does not override the passes
 // list above.
 var passOrder = map[string]string{
 	// don't layout blocks until critical edges have been removed
 	"critical": "layout",
 	// regalloc requires the removal of all critical edges
 	//"critical": "regalloc",
 	// regalloc requires all the values in a block to be scheduled
 	//"schedule": "regalloc",
 	// code generation requires register allocation
 	//"cgen":"regalloc",
 }
 func init() {
 	for a, b := range passOrder {
 		i := -1
 		j := -1
 		for k, p := range passes {
 			if p.name == a {
 				i = k
 			}
 			if p.name == b {
 				j = k
 			}
 		}
 		if i < 0 {
 			log.Panicf("pass %s not found", a)
 		}
 		if j < 0 {
 			log.Panicf("pass %s not found", b)
 		}
 		if i >= j {
 			log.Panicf("passes %s and %s out of order", a, b)
 		}
 	}
 }
--- a/src/cmd/internal/ssa/critical.go
+++ b/src/cmd/internal/ssa/critical.go
@ -0,0 +1,51 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 // critical splits critical edges (those that go from a block with
 // more than one outedge to a block with more than one inedge).
 // Regalloc wants a critical-edge-free CFG so it can implement phi values.
 func critical(f *Func) {
 	for _, b := range f.Blocks {
 		if len(b.Preds) <= 1 {
 			continue
 		}
 		// decide if we need to split edges coming into b.
 		hasphi := false
 		for _, v := range b.Values {
 			if v.Op == OpPhi && v.Type != TypeMem {
 				hasphi = true
 				break
 			}
 		}
 		if !hasphi {
 			// no splitting needed
 			continue
 		}
 		// split input edges coming from multi-output blocks.
 		for i, c := range b.Preds {
 			if c.Kind == BlockPlain {
 				continue
 			}
 			// allocate a new block to place on the edge
 			d := f.NewBlock(BlockPlain)
 			// splice it in
 			d.Preds = append(d.Preds, c)
 			d.Succs = append(d.Succs, b)
 			b.Preds[i] = d
 			// replace b with d in c's successor list.
 			for j, b2 := range c.Succs {
 				if b2 == b {
 					c.Succs[j] = d
 					break
 				}
 			}
 		}
 	}
 }
--- a/src/cmd/internal/ssa/fuse.go
+++ b/src/cmd/internal/ssa/fuse.go
@ -0,0 +1,40 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 // fuse simplifies control flow by joining basic blocks.
 func fuse(f *Func) {
 	for _, b := range f.Blocks {
 		if b.Kind != BlockPlain {
 			continue
 		}
 		c := b.Succs[0]
 		if len(c.Preds) != 1 {
 			continue
 		}
 		// move all of b's values to c.
 		for _, v := range b.Values {
 			v.Block = c
 			c.Values = append(c.Values, v)
 		}
 		// replace b->c edge with preds(b) -> c
 		c.Preds = b.Preds
 		for _, p := range c.Preds {
 			for i, q := range p.Succs {
 				if q == b {
 					p.Succs[i] = c
 				}
 			}
 		}
 		// trash b, just in case
 		b.Kind = BlockUnknown
 		b.Values = nil
 		b.Preds = nil
 		b.Succs = nil
 	}
 }
--- a/src/cmd/internal/ssa/generic.go
+++ b/src/cmd/internal/ssa/generic.go
@ -0,0 +1,111 @@
 // autogenerated from rulegen/generic.rules: do not edit!
 // generated with: go run rulegen/rulegen.go rulegen/generic.rules genericRules generic.go
 package ssa
 func genericRules(v *Value) bool {
 	switch v.Op {
 	case OpAdd:
 		// match: (Add <t> (ConstInt [c]) (ConstInt [d]))
 		// cond: is64BitInt(t)
 		// result: (ConstInt [{c.(int64)+d.(int64)}])
 		{
 			t := v.Type
 			if v.Args[0].Op != OpConstInt {
 				goto end0
 			}
 			c := v.Args[0].Aux
 			if v.Args[1].Op != OpConstInt {
 				goto end0
 			}
 			d := v.Args[1].Aux
 			if !(is64BitInt(t)) {
 				goto end0
 			}
 			v.Op = OpConstInt
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = c.(int64) + d.(int64)
 			return true
 		}
 	end0:
 		;
 	case OpLoad:
 		// match: (Load (FPAddr [offset]) mem)
 		// cond:
 		// result: (LoadFP [offset] mem)
 		{
 			if v.Args[0].Op != OpFPAddr {
 				goto end1
 			}
 			offset := v.Args[0].Aux
 			mem := v.Args[1]
 			v.Op = OpLoadFP
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(mem)
 			return true
 		}
 	end1:
 		;
 		// match: (Load (SPAddr [offset]) mem)
 		// cond:
 		// result: (LoadSP [offset] mem)
 		{
 			if v.Args[0].Op != OpSPAddr {
 				goto end2
 			}
 			offset := v.Args[0].Aux
 			mem := v.Args[1]
 			v.Op = OpLoadSP
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(mem)
 			return true
 		}
 	end2:
 		;
 	case OpStore:
 		// match: (Store (FPAddr [offset]) val mem)
 		// cond:
 		// result: (StoreFP [offset] val mem)
 		{
 			if v.Args[0].Op != OpFPAddr {
 				goto end3
 			}
 			offset := v.Args[0].Aux
 			val := v.Args[1]
 			mem := v.Args[2]
 			v.Op = OpStoreFP
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(val)
 			v.AddArg(mem)
 			return true
 		}
 	end3:
 		;
 		// match: (Store (SPAddr [offset]) val mem)
 		// cond:
 		// result: (StoreSP [offset] val mem)
 		{
 			if v.Args[0].Op != OpSPAddr {
 				goto end4
 			}
 			offset := v.Args[0].Aux
 			val := v.Args[1]
 			mem := v.Args[2]
 			v.Op = OpStoreSP
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(val)
 			v.AddArg(mem)
 			return true
 		}
 	end4:
 	}
 	return false
 }
--- a/src/cmd/internal/ssa/layout.go
+++ b/src/cmd/internal/ssa/layout.go
@ -0,0 +1,88 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 import "log"
 // layout orders basic blocks in f with the goal of minimizing control flow instructions.
 // After this phase returns, the order of f.Blocks matters and is the order
 // in which those blocks will appear in the assembly output.
 func layout(f *Func) {
 	order := make([]*Block, 0, f.NumBlocks())
 	scheduled := make([]bool, f.NumBlocks())
 	idToBlock := make([]*Block, f.NumBlocks())
 	indegree := make([]int, f.NumBlocks())
 	posdegree := newSparseSet(f.NumBlocks())  // blocks with positive remaining degree
 	zerodegree := newSparseSet(f.NumBlocks()) // blocks with zero remaining degree
 	// Initialize indegree of each block
 	for _, b := range f.Blocks {
 		idToBlock[b.ID] = b
 		indegree[b.ID] = len(b.Preds)
 		if len(b.Preds) == 0 {
 			zerodegree.add(b.ID)
 		} else {
 			posdegree.add(b.ID)
 		}
 	}
 	bid := f.Entry.ID
 blockloop:
 	for {
 		// add block to schedule
 		b := idToBlock[bid]
 		order = append(order, b)
 		scheduled[bid] = true
 		if len(order) == len(f.Blocks) {
 			break
 		}
 		for _, c := range b.Succs {
 			indegree[c.ID]--
 			if indegree[c.ID] == 0 {
 				posdegree.remove(c.ID)
 				zerodegree.add(c.ID)
 			}
 		}
 		// Pick the next block to schedule
 		// Pick among the successor blocks that have not been scheduled yet.
 		// Just use degree for now.  TODO(khr): use likely direction hints.
 		bid = 0
 		mindegree := f.NumBlocks()
 		for _, c := range order[len(order)-1].Succs {
 			if scheduled[c.ID] {
 				continue
 			}
 			if indegree[c.ID] < mindegree {
 				mindegree = indegree[c.ID]
 				bid = c.ID
 			}
 		}
 		if bid != 0 {
 			continue
 		}
 		// TODO: improve this part
 		// No successor of the previously scheduled block works.
 		// Pick a zero-degree block if we can.
 		for zerodegree.size() > 0 {
 			cid := zerodegree.pop()
 			if !scheduled[cid] {
 				bid = cid
 				continue blockloop
 			}
 		}
 		// Still nothing, pick any block.
 		for {
 			cid := posdegree.pop()
 			if !scheduled[cid] {
 				bid = cid
 				continue blockloop
 			}
 		}
 		log.Panicf("no block available for layout")
 	}
 	f.Blocks = order
 }
--- a/src/cmd/internal/ssa/lower.go
+++ b/src/cmd/internal/ssa/lower.go
@ -0,0 +1,43 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 var (
 	// TODO(khr): put arch configuration constants together somewhere
 	intSize = 8
 	ptrSize = 8
 )
 //go:generate go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go
 // convert to machine-dependent ops
 func lower(f *Func) {
 	// repeat rewrites until we find no more rewrites
 	// TODO: pick the target arch from config
 	applyRewrite(f, lowerAmd64)
 	// TODO: check for unlowered opcodes, fail if we find one
 	// additional pass for 386/amd64, link condition codes directly to blocks
 	// TODO: do generically somehow?  Special "block" rewrite rules?
 	for _, b := range f.Blocks {
 		switch b.Kind {
 		case BlockIf:
 			switch b.Control.Op {
 			case OpSETL:
 				b.Kind = BlockLT
 				b.Control = b.Control.Args[0]
 				// TODO: others
 			}
 		case BlockLT:
 			if b.Control.Op == OpInvertFlags {
 				b.Kind = BlockGE
 				b.Control = b.Control.Args[0]
 			}
 			// TODO: others
 		}
 	}
 	deadcode(f) // TODO: separate pass?
 }
--- a/src/cmd/internal/ssa/lowerAmd64.go
+++ b/src/cmd/internal/ssa/lowerAmd64.go
@ -0,0 +1,307 @@
 // autogenerated from rulegen/lower_amd64.rules: do not edit!
 // generated with: go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go
 package ssa
 func lowerAmd64(v *Value) bool {
 	switch v.Op {
 	case OpADDQ:
 		// match: (ADDQ x (ConstInt [c]))
 		// cond:
 		// result: (ADDCQ [c] x)
 		{
 			x := v.Args[0]
 			if v.Args[1].Op != OpConstInt {
 				goto end0
 			}
 			c := v.Args[1].Aux
 			v.Op = OpADDCQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = c
 			v.AddArg(x)
 			return true
 		}
 	end0:
 		;
 		// match: (ADDQ (ConstInt [c]) x)
 		// cond:
 		// result: (ADDCQ [c] x)
 		{
 			if v.Args[0].Op != OpConstInt {
 				goto end1
 			}
 			c := v.Args[0].Aux
 			x := v.Args[1]
 			v.Op = OpADDCQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = c
 			v.AddArg(x)
 			return true
 		}
 	end1:
 		;
 	case OpAdd:
 		// match: (Add <t> x y)
 		// cond: is64BitInt(t)
 		// result: (ADDQ x y)
 		{
 			t := v.Type
 			x := v.Args[0]
 			y := v.Args[1]
 			if !(is64BitInt(t)) {
 				goto end2
 			}
 			v.Op = OpADDQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			v.AddArg(y)
 			return true
 		}
 	end2:
 		;
 		// match: (Add <t> x y)
 		// cond: is32BitInt(t)
 		// result: (ADDL x y)
 		{
 			t := v.Type
 			x := v.Args[0]
 			y := v.Args[1]
 			if !(is32BitInt(t)) {
 				goto end3
 			}
 			v.Op = OpADDL
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			v.AddArg(y)
 			return true
 		}
 	end3:
 		;
 	case OpCMPQ:
 		// match: (CMPQ x (ConstInt [c]))
 		// cond:
 		// result: (CMPCQ x [c])
 		{
 			x := v.Args[0]
 			if v.Args[1].Op != OpConstInt {
 				goto end4
 			}
 			c := v.Args[1].Aux
 			v.Op = OpCMPCQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			v.Aux = c
 			return true
 		}
 	end4:
 		;
 		// match: (CMPQ (ConstInt [c]) x)
 		// cond:
 		// result: (InvertFlags (CMPCQ x [c]))
 		{
 			if v.Args[0].Op != OpConstInt {
 				goto end5
 			}
 			c := v.Args[0].Aux
 			x := v.Args[1]
 			v.Op = OpInvertFlags
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v0 := v.Block.NewValue(OpCMPCQ, TypeInvalid, nil)
 			v0.AddArg(x)
 			v0.Aux = c
 			v0.SetType()
 			v.AddArg(v0)
 			return true
 		}
 	end5:
 		;
 	case OpLess:
 		// match: (Less x y)
 		// cond: is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type)
 		// result: (SETL (CMPQ x y))
 		{
 			x := v.Args[0]
 			y := v.Args[1]
 			if !(is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type)) {
 				goto end6
 			}
 			v.Op = OpSETL
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v0 := v.Block.NewValue(OpCMPQ, TypeInvalid, nil)
 			v0.AddArg(x)
 			v0.AddArg(y)
 			v0.SetType()
 			v.AddArg(v0)
 			return true
 		}
 	end6:
 		;
 	case OpLoadFP:
 		// match: (LoadFP <t> [offset] mem)
 		// cond: typeSize(t) == 8
 		// result: (LoadFP8 <t> [offset] mem)
 		{
 			t := v.Type
 			offset := v.Aux
 			mem := v.Args[0]
 			if !(typeSize(t) == 8) {
 				goto end7
 			}
 			v.Op = OpLoadFP8
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Type = t
 			v.Aux = offset
 			v.AddArg(mem)
 			return true
 		}
 	end7:
 		;
 	case OpLoadSP:
 		// match: (LoadSP <t> [offset] mem)
 		// cond: typeSize(t) == 8
 		// result: (LoadSP8 <t> [offset] mem)
 		{
 			t := v.Type
 			offset := v.Aux
 			mem := v.Args[0]
 			if !(typeSize(t) == 8) {
 				goto end8
 			}
 			v.Op = OpLoadSP8
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Type = t
 			v.Aux = offset
 			v.AddArg(mem)
 			return true
 		}
 	end8:
 		;
 	case OpSETL:
 		// match: (SETL (InvertFlags x))
 		// cond:
 		// result: (SETGE x)
 		{
 			if v.Args[0].Op != OpInvertFlags {
 				goto end9
 			}
 			x := v.Args[0].Args[0]
 			v.Op = OpSETGE
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			return true
 		}
 	end9:
 		;
 	case OpSUBQ:
 		// match: (SUBQ x (ConstInt [c]))
 		// cond:
 		// result: (SUBCQ x [c])
 		{
 			x := v.Args[0]
 			if v.Args[1].Op != OpConstInt {
 				goto end10
 			}
 			c := v.Args[1].Aux
 			v.Op = OpSUBCQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			v.Aux = c
 			return true
 		}
 	end10:
 		;
 		// match: (SUBQ (ConstInt [c]) x)
 		// cond:
 		// result: (NEGQ (SUBCQ x [c]))
 		{
 			if v.Args[0].Op != OpConstInt {
 				goto end11
 			}
 			c := v.Args[0].Aux
 			x := v.Args[1]
 			v.Op = OpNEGQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v0 := v.Block.NewValue(OpSUBCQ, TypeInvalid, nil)
 			v0.AddArg(x)
 			v0.Aux = c
 			v0.SetType()
 			v.AddArg(v0)
 			return true
 		}
 	end11:
 		;
 	case OpStoreFP:
 		// match: (StoreFP [offset] val mem)
 		// cond: typeSize(val.Type) == 8
 		// result: (StoreFP8 [offset] val mem)
 		{
 			offset := v.Aux
 			val := v.Args[0]
 			mem := v.Args[1]
 			if !(typeSize(val.Type) == 8) {
 				goto end12
 			}
 			v.Op = OpStoreFP8
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(val)
 			v.AddArg(mem)
 			return true
 		}
 	end12:
 		;
 	case OpStoreSP:
 		// match: (StoreSP [offset] val mem)
 		// cond: typeSize(val.Type) == 8
 		// result: (StoreSP8 [offset] val mem)
 		{
 			offset := v.Aux
 			val := v.Args[0]
 			mem := v.Args[1]
 			if !(typeSize(val.Type) == 8) {
 				goto end13
 			}
 			v.Op = OpStoreSP8
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.Aux = offset
 			v.AddArg(val)
 			v.AddArg(mem)
 			return true
 		}
 	end13:
 		;
 	case OpSub:
 		// match: (Sub <t> x y)
 		// cond: is64BitInt(t)
 		// result: (SUBQ x y)
 		{
 			t := v.Type
 			x := v.Args[0]
 			y := v.Args[1]
 			if !(is64BitInt(t)) {
 				goto end14
 			}
 			v.Op = OpSUBQ
 			v.Aux = nil
 			v.Args = v.argstorage[:0]
 			v.AddArg(x)
 			v.AddArg(y)
 			return true
 		}
 	end14:
 	}
 	return false
 }
--- a/src/cmd/internal/ssa/op.go
+++ b/src/cmd/internal/ssa/op.go
@ -62,7 +62,9 @@ const (
 	OpCheckBound // 0 <= arg[0] < arg[1]
 	// function calls.  Arguments to the call have already been written to the stack.
-	// Return values appear on the stack.
+	// Return values appear on the stack.  The method receiver, if any, is treated
 	// as a phantom first argument.
 	// TODO: closure pointer must be in a register.
 	OpCall       // args are function ptr, memory
 	OpStaticCall // aux is function, arg is memory
@ -82,33 +84,38 @@ const (
 	OpStoreFP
 	OpStoreSP
-	// spill and restore ops for the register allocator.  These are
+	// spill&restore ops for the register allocator.  These are
-	// semantically identical to OpCopy - they do not take/return
+	// semantically identical to OpCopy; they do not take/return
-	// stores like regular memory ops do.  We can get away with that because
+	// stores like regular memory ops do.  We can get away without memory
-	// we know there is no aliasing to spill slots on the stack.
+	// args because we know there is no aliasing of spill slots on the stack.
 	OpStoreReg8
 	OpLoadReg8
 	// machine-dependent opcodes go here
-	// x86
+	// amd64
 	OpADDQ
 	OpSUBQ
-	OpADDCQ // 1 input arg, add aux which is an int64 constant
+	OpADDCQ // 1 input arg.  output = input + aux.(int64)
 	OpSUBCQ // 1 input arg.  output = input - aux.(int64)
 	OpNEGQ
 	OpCMPQ
 	OpCMPCQ // 1 input arg.  Compares input with aux.(int64)
 	OpADDL
-	OpInvertFlags // inverts interpretation of the flags register (< to >=, etc.)
+	OpSETL // generate bool = "flags encode less than"
 	OpSETL        // generate bool = "flags encode less than"
 	OpSETGE
 	// InvertFlags reverses direction of flags register interpretation:
 	// (InvertFlags (OpCMPQ a b)) == (OpCMPQ b a)
 	// This is a pseudo-op which can't appear in assembly output.
 	OpInvertFlags
 	OpLEAQ  // x+y
 	OpLEAQ2 // x+2*y
 	OpLEAQ4 // x+4*y
 	OpLEAQ8 // x+8*y
 	// load/store 8-byte integer register from stack slot.
 	OpLoadFP8
 	OpLoadSP8
 	OpStoreFP8
--- a/src/cmd/internal/ssa/op_string.go
+++ b/src/cmd/internal/ssa/op_string.go
@ -4,9 +4,9 @@ package ssa
 import "fmt"
-const _Op_name = "OpUnknownOpNopOpThunkOpAddOpSubOpMulOpLessOpConstNilOpConstBoolOpConstStringOpConstIntOpConstFloatOpConstComplexOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpSliceOpIndexOpIndexAddrOpLoadOpStoreOpCheckNilOpCheckBoundOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpLoadFPOpLoadSPOpStoreFPOpStoreSPOpStoreReg8OpLoadReg8OpADDQOpSUBQOpADDCQOpSUBCQOpNEGQOpCMPQOpCMPCQOpADDLOpInvertFlagsOpSETLOpSETGEOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLoadFP8OpLoadSP8OpStoreFP8OpStoreSP8OpMax"
+const _Op_name = "OpUnknownOpNopOpThunkOpAddOpSubOpMulOpLessOpConstNilOpConstBoolOpConstStringOpConstIntOpConstFloatOpConstComplexOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpSliceOpIndexOpIndexAddrOpLoadOpStoreOpCheckNilOpCheckBoundOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpLoadFPOpLoadSPOpStoreFPOpStoreSPOpStoreReg8OpLoadReg8OpADDQOpSUBQOpADDCQOpSUBCQOpNEGQOpCMPQOpCMPCQOpADDLOpSETLOpSETGEOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLoadFP8OpLoadSP8OpStoreFP8OpStoreSP8OpMax"
-var _Op_index = [...]uint16{0, 9, 14, 21, 26, 31, 36, 42, 52, 63, 76, 86, 98, 112, 117, 125, 131, 137, 142, 153, 163, 173, 183, 195, 206, 217, 224, 231, 242, 248, 255, 265, 277, 283, 295, 304, 313, 321, 329, 337, 345, 354, 363, 374, 384, 390, 396, 403, 410, 416, 422, 429, 435, 448, 454, 461, 467, 474, 481, 488, 497, 506, 516, 526, 531}
+var _Op_index = [...]uint16{0, 9, 14, 21, 26, 31, 36, 42, 52, 63, 76, 86, 98, 112, 117, 125, 131, 137, 142, 153, 163, 173, 183, 195, 206, 217, 224, 231, 242, 248, 255, 265, 277, 283, 295, 304, 313, 321, 329, 337, 345, 354, 363, 374, 384, 390, 396, 403, 410, 416, 422, 429, 435, 441, 448, 461, 467, 474, 481, 488, 497, 506, 516, 526, 531}
 func (i Op) String() string {
 	if i < 0 || i+1 >= Op(len(_Op_index)) {
--- a/src/cmd/internal/ssa/opt.go
+++ b/src/cmd/internal/ssa/opt.go
@ -0,0 +1,13 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 // machine-independent optimization
 //go:generate go run rulegen/rulegen.go rulegen/generic.rules genericRules generic.go
 func opt(f *Func) {
 	applyRewrite(f, genericRules)
 }
--- a/src/cmd/internal/ssa/rewrite.go
+++ b/src/cmd/internal/ssa/rewrite.go
@ -0,0 +1,70 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 import (
 	"cmd/internal/ssa/types" // TODO: use golang.org/x/tools/go/types instead
 )
 func applyRewrite(f *Func, r func(*Value) bool) {
 	// repeat rewrites until we find no more rewrites
 	for {
 		change := false
 		for _, b := range f.Blocks {
 			for _, v := range b.Values {
 				if r(v) {
 					change = true
 				}
 			}
 		}
 		if !change {
 			return
 		}
 	}
 }
 // Common functions called from rewriting rules
 func is64BitInt(t Type) bool {
 	return typeIdentical(t, TypeInt64) ||
 		typeIdentical(t, TypeUint64) ||
 		(typeIdentical(t, TypeInt) && intSize == 8) ||
 		(typeIdentical(t, TypeUint) && intSize == 8) ||
 		(typeIdentical(t, TypeUintptr) && ptrSize == 8)
 }
 func is32BitInt(t Type) bool {
 	return typeIdentical(t, TypeInt32) ||
 		typeIdentical(t, TypeUint32) ||
 		(typeIdentical(t, TypeInt) && intSize == 4) ||
 		(typeIdentical(t, TypeUint) && intSize == 4) ||
 		(typeIdentical(t, TypeUintptr) && ptrSize == 4)
 }
 func isSigned(t Type) bool {
 	return typeIdentical(t, TypeInt) ||
 		typeIdentical(t, TypeInt8) ||
 		typeIdentical(t, TypeInt16) ||
 		typeIdentical(t, TypeInt32) ||
 		typeIdentical(t, TypeInt64)
 }
 func typeSize(t Type) int {
 	switch t {
 	case TypeInt32, TypeUint32:
 		return 4
 	case TypeInt64, TypeUint64:
 		return 8
 	case TypeUintptr:
 		return ptrSize
 	case TypeInt, TypeUint:
 		return intSize
 	default:
 		if _, ok := t.(*types.Pointer); ok {
 			return ptrSize
 		}
 		panic("TODO: width of " + t.String())
 	}
 }
--- a/src/cmd/internal/ssa/rulegen/generic.rules
+++ b/src/cmd/internal/ssa/rulegen/generic.rules
@ -0,0 +1,16 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // constant folding
 (Add <t> (ConstInt [c]) (ConstInt [d])) && is64BitInt(t) -> (ConstInt [{c.(int64)+d.(int64)}])
 // load/store to stack
 (Load (FPAddr [offset]) mem) -> (LoadFP [offset] mem)
 (Store (FPAddr [offset]) val mem) -> (StoreFP [offset] val mem)
 (Load (SPAddr [offset]) mem) -> (LoadSP [offset] mem)
 (Store (SPAddr [offset]) val mem) -> (StoreSP [offset] val mem)
 // expand array indexing
 // others?  Depends on what is already done by frontend
--- a/src/cmd/internal/ssa/rulegen/lower_amd64.rules
+++ b/src/cmd/internal/ssa/rulegen/lower_amd64.rules
@ -0,0 +1,46 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // values are specified using the following format:
 // (op <type> [aux] arg0 arg1 ...)
 // the type and aux fields are optional
 // on the matching side
 //  - the types and aux fields must match if they are specified.
 // on the generated side
 //  - types will be computed by opcode typers if not specified explicitly.
 //  - aux will be nil if not specified.
 // x86 register conventions:
 //  - Integer types live in the low portion of registers.  Upper portions are junk.
 //  - Boolean types use the low-order byte of a register.  Upper bytes are junk.
 //  - We do not use AH,BH,CH,DH registers.
 //  - Floating-point types will live in the low natural slot of an sse2 register.
 //    Unused portions are junk.
 // These are the lowerings themselves
 (Add <t> x y) && is64BitInt(t) -> (ADDQ x y)
 (Add <t> x y) && is32BitInt(t) -> (ADDL x y)
 (Sub <t> x y) && is64BitInt(t) -> (SUBQ x y)
 (Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ x y))
 // stack loads/stores
 (LoadFP <t> [offset] mem) && typeSize(t) == 8 -> (LoadFP8 <t> [offset] mem)
 (StoreFP [offset] val mem) && typeSize(val.Type) == 8 -> (StoreFP8 [offset] val mem)
 (LoadSP <t> [offset] mem) && typeSize(t) == 8 -> (LoadSP8 <t> [offset] mem)
 (StoreSP [offset] val mem) && typeSize(val.Type) == 8 -> (StoreSP8 [offset] val mem)
 // Rules below here apply some simple optimizations after lowering.
 // TODO: Should this be a separate pass?
 (ADDQ x (ConstInt [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range?
 (ADDQ (ConstInt [c]) x) -> (ADDCQ [c] x)
 (SUBQ x (ConstInt [c])) -> (SUBCQ x [c])
 (SUBQ (ConstInt [c]) x) -> (NEGQ (SUBCQ x [c]))
 (CMPQ x (ConstInt [c])) -> (CMPCQ x [c])
 (CMPQ (ConstInt [c]) x) -> (InvertFlags (CMPCQ x [c]))
 // reverse ordering of compare instruction
 (SETL (InvertFlags x)) -> (SETGE x)
--- a/src/cmd/internal/ssa/rulegen/rulegen.go
+++ b/src/cmd/internal/ssa/rulegen/rulegen.go
@ -0,0 +1,328 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // This program generates Go code that applies rewrite rules to a Value.
 // The generated code implements a function of type func (v *Value) bool
 // which returns true iff if did something.
 // Ideas stolen from Swift: http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-2000-2.html
 // Run with something like "go run rulegen.go lower_amd64.rules lowerAmd64 lowerAmd64.go"
 package main
 import (
 	"bufio"
 	"bytes"
 	"fmt"
 	"go/format"
 	"io"
 	"log"
 	"os"
 	"sort"
 	"strings"
 )
 // rule syntax:
 //  sexpr [&& extra conditions] -> sexpr
 //
 // sexpr are s-expressions (lisp-like parenthesized groupings)
 // sexpr ::= (opcode sexpr*)
 //         | variable
 //         | [aux]
 //         | <type>
 //         | {code}
 //
 // aux      ::= variable | {code}
 // type     ::= variable | {code}
 // variable ::= some token
 // opcode   ::= one of the opcodes from ../op.go (without the Op prefix)
 // extra conditions is just a chunk of Go that evaluates to a boolean.  It may use
 // variables declared in the matching sexpr.  The variable "v" is predefined to be
 // the value matched by the entire rule.
 // If multiple rules match, the first one in file order is selected.
 func main() {
 	if len(os.Args) < 3 || len(os.Args) > 4 {
 		fmt.Printf("usage: go run rulegen.go <rule file> <function name> [<output file>]")
 		os.Exit(1)
 	}
 	rulefile := os.Args[1]
 	rulefn := os.Args[2]
 	// Open input file.
 	text, err := os.Open(rulefile)
 	if err != nil {
 		log.Fatalf("can't read rule file: %v", err)
 	}
 	// oprules contains a list of rules for each opcode
 	oprules := map[string][]string{}
 	// read rule file
 	scanner := bufio.NewScanner(text)
 	for scanner.Scan() {
 		line := scanner.Text()
 		if i := strings.Index(line, "//"); i >= 0 {
 			// Remove comments.  Note that this isn't string safe, so
 			// it will truncate lines with // inside strings.  Oh well.
 			line = line[:i]
 		}
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		op := strings.Split(line, " ")[0][1:]
 		oprules[op] = append(oprules[op], line)
 	}
 	if err := scanner.Err(); err != nil {
 		log.Fatalf("scanner failed: %v\n", err)
 	}
 	// Start output buffer, write header.
 	w := new(bytes.Buffer)
 	fmt.Fprintf(w, "// autogenerated from %s: do not edit!\n", rulefile)
 	fmt.Fprintf(w, "// generated with: go run rulegen/rulegen.go %s\n", strings.Join(os.Args[1:], " "))
 	fmt.Fprintln(w, "package ssa")
 	fmt.Fprintf(w, "func %s(v *Value) bool {\n", rulefn)
 	// generate code for each rule
 	fmt.Fprintf(w, "switch v.Op {\n")
 	var ops []string
 	for op := range oprules {
 		ops = append(ops, op)
 	}
 	sort.Strings(ops)
 	rulenum := 0
 	for _, op := range ops {
 		fmt.Fprintf(w, "case Op%s:\n", op)
 		for _, rule := range oprules[op] {
 			// split at ->
 			s := strings.Split(rule, "->")
 			if len(s) != 2 {
 				log.Fatalf("no arrow in rule %s", rule)
 			}
 			lhs := strings.Trim(s[0], " \t")
 			result := strings.Trim(s[1], " \t\n")
 			// split match into matching part and additional condition
 			match := lhs
 			cond := ""
 			if i := strings.Index(match, "&&"); i >= 0 {
 				cond = strings.Trim(match[i+2:], " \t")
 				match = strings.Trim(match[:i], " \t")
 			}
 			fmt.Fprintf(w, "// match: %s\n", match)
 			fmt.Fprintf(w, "// cond: %s\n", cond)
 			fmt.Fprintf(w, "// result: %s\n", result)
 			fail := fmt.Sprintf("{\ngoto end%d\n}\n", rulenum)
 			fmt.Fprintf(w, "{\n")
 			genMatch(w, match, fail)
 			if cond != "" {
 				fmt.Fprintf(w, "if !(%s) %s", cond, fail)
 			}
 			genResult(w, result)
 			fmt.Fprintf(w, "return true\n")
 			fmt.Fprintf(w, "}\n")
 			fmt.Fprintf(w, "end%d:;\n", rulenum)
 			rulenum++
 		}
 	}
 	fmt.Fprintf(w, "}\n")
 	fmt.Fprintf(w, "return false\n")
 	fmt.Fprintf(w, "}\n")
 	// gofmt result
 	b := w.Bytes()
 	b, err = format.Source(b)
 	if err != nil {
 		panic(err)
 	}
 	// Write to a file if given, otherwise stdout.
 	var out io.WriteCloser
 	if len(os.Args) >= 4 {
 		outfile := os.Args[3]
 		out, err = os.Create(outfile)
 		if err != nil {
 			log.Fatalf("can't open output file %s: %v\n", outfile, err)
 		}
 	} else {
 		out = os.Stdout
 	}
 	if _, err = out.Write(b); err != nil {
 		log.Fatalf("can't write output: %v\n", err)
 	}
 	if err = out.Close(); err != nil {
 		log.Fatalf("can't close output: %v\n", err)
 	}
 }
 func genMatch(w io.Writer, match, fail string) {
 	genMatch0(w, match, "v", fail, map[string]string{}, true)
 }
 func genMatch0(w io.Writer, match, v, fail string, m map[string]string, top bool) {
 	if match[0] != '(' {
 		if x, ok := m[match]; ok {
 			// variable already has a definition.  Check whether
 			// the old definition and the new definition match.
 			// For example, (add x x).  Equality is just pointer equality
 			// on Values (so cse is important to do before lowering).
 			fmt.Fprintf(w, "if %s != %s %s", v, x, fail)
 			return
 		}
 		// remember that this variable references the given value
 		m[match] = v
 		fmt.Fprintf(w, "%s := %s\n", match, v)
 		return
 	}
 	// split body up into regions.  Split by spaces/tabs, except those
 	// contained in () or {}.
 	s := split(match[1 : len(match)-1])
 	// check op
 	if !top {
 		fmt.Fprintf(w, "if %s.Op != Op%s %s", v, s[0], fail)
 	}
 	// check type/aux/args
 	argnum := 0
 	for _, a := range s[1:] {
 		if a[0] == '<' {
 			// type restriction
 			t := a[1 : len(a)-1]
 			if t[0] == '{' {
 				// code.  We must match the results of this code.
 				fmt.Fprintf(w, "if %s.Type != %s %s", v, t[1:len(t)-1], fail)
 			} else {
 				// variable
 				if u, ok := m[t]; ok {
 					// must match previous variable
 					fmt.Fprintf(w, "if %s.Type != %s %s", v, u, fail)
 				} else {
 					m[t] = v + ".Type"
 					fmt.Fprintf(w, "%s := %s.Type\n", t, v)
 				}
 			}
 		} else if a[0] == '[' {
 			// aux restriction
 			x := a[1 : len(a)-1]
 			if x[0] == '{' {
 				// code
 				fmt.Fprintf(w, "if %s.Aux != %s %s", v, x[1:len(x)-1], fail)
 			} else {
 				// variable
 				if y, ok := m[x]; ok {
 					fmt.Fprintf(w, "if %s.Aux != %s %s", v, y, fail)
 				} else {
 					m[x] = v + ".Aux"
 					fmt.Fprintf(w, "%s := %s.Aux\n", x, v)
 				}
 			}
 		} else if a[0] == '{' {
 			fmt.Fprintf(w, "if %s.Args[%d] != %s %s", v, argnum, a[1:len(a)-1], fail)
 			argnum++
 		} else {
 			// variable or sexpr
 			genMatch0(w, a, fmt.Sprintf("%s.Args[%d]", v, argnum), fail, m, false)
 			argnum++
 		}
 	}
 }
 func genResult(w io.Writer, result string) {
 	genResult0(w, result, new(int), true)
 }
 func genResult0(w io.Writer, result string, alloc *int, top bool) string {
 	if result[0] != '(' {
 		// variable
 		return result
 	}
 	s := split(result[1 : len(result)-1])
 	var v string
 	var needsType bool
 	if top {
 		v = "v"
 		fmt.Fprintf(w, "v.Op = Op%s\n", s[0])
 		fmt.Fprintf(w, "v.Aux = nil\n")
 		fmt.Fprintf(w, "v.Args = v.argstorage[:0]\n")
 	} else {
 		v = fmt.Sprintf("v%d", *alloc)
 		*alloc++
 		fmt.Fprintf(w, "%s := v.Block.NewValue(Op%s, TypeInvalid, nil)\n", v, s[0])
 		needsType = true
 	}
 	for _, a := range s[1:] {
 		if a[0] == '<' {
 			// type restriction
 			t := a[1 : len(a)-1]
 			if t[0] == '{' {
 				t = t[1 : len(t)-1]
 			}
 			fmt.Fprintf(w, "%s.Type = %s\n", v, t)
 			needsType = false
 		} else if a[0] == '[' {
 			// aux restriction
 			x := a[1 : len(a)-1]
 			if x[0] == '{' {
 				x = x[1 : len(x)-1]
 			}
 			fmt.Fprintf(w, "%s.Aux = %s\n", v, x)
 		} else if a[0] == '{' {
 			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, a[1:len(a)-1])
 		} else {
 			// regular argument (sexpr or variable)
 			x := genResult0(w, a, alloc, false)
 			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
 		}
 	}
 	if needsType {
 		fmt.Fprintf(w, "%s.SetType()\n", v)
 	}
 	return v
 }
 func split(s string) []string {
 	var r []string
 outer:
 	for s != "" {
 		d := 0         // depth of ({[<
 		nonsp := false // found a non-space char so far
 		for i := 0; i < len(s); i++ {
 			switch s[i] {
 			case '(', '{', '[', '<':
 				d++
 			case ')', '}', ']', '>':
 				d--
 			case ' ', '\t':
 				if d == 0 && nonsp {
 					r = append(r, strings.TrimSpace(s[:i]))
 					s = s[i:]
 					continue outer
 				}
 			default:
 				nonsp = true
 			}
 		}
 		if d != 0 {
 			panic("imbalanced expression: " + s)
 		}
 		if nonsp {
 			r = append(r, strings.TrimSpace(s))
 		}
 		break
 	}
 	return r
 }
--- a/src/cmd/internal/ssa/schedule.go
+++ b/src/cmd/internal/ssa/schedule.go
@ -0,0 +1,69 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package ssa
 // Schedule the Values in each Block.  After this phase returns, the
 // order of b.Values matters and is the order in which those values
 // will appear in the assembly output.  For now it generates an
 // arbitrary valid schedule using a topological sort.  TODO(khr):
 // schedule smarter.
 func schedule(f *Func) {
 	const (
 		unmarked = 0
 		found    = 1
 		expanded = 2
 		done     = 3
 	)
 	state := make([]byte, f.NumValues())
 	var queue []*Value //stack-like worklist.  Contains found and expanded nodes.
 	var order []*Value
 	for _, b := range f.Blocks {
 		// Topologically sort the values in b.
 		order = order[:0]
 		for _, v := range b.Values {
 			if v.Op == OpPhi {
 				// Phis all go first.  We handle phis specially
 				// because they may have self edges "a = phi(a, b, c)"
 				order = append(order, v)
 				continue
 			}
 			if state[v.ID] != unmarked {
 				if state[v.ID] != done {
 					panic("bad state")
 				}
 				continue
 			}
 			state[v.ID] = found
 			queue = append(queue, v)
 			for len(queue) > 0 {
 				v = queue[len(queue)-1]
 				switch state[v.ID] {
 				case found:
 					state[v.ID] = expanded
 					// Note that v is not popped.  We leave it in place
 					// until all its children have been explored.
 					for _, w := range v.Args {
 						if w.Block == b && w.Op != OpPhi && state[w.ID] == unmarked {
 							state[w.ID] = found
 							queue = append(queue, w)
 						}
 					}
 				case expanded:
 					queue = queue[:len(queue)-1]
 					state[v.ID] = done
 					order = append(order, v)
 				default:
 					panic("bad state")
 				}
 			}
 		}
 		copy(b.Values, order)
 	}
 	// TODO: only allow one live mem type and one live flags type (x86)
 	// This restriction will force any loads (and any flag uses) to appear
 	// before the next store (flag update).  This "anti-dependence" is not
 	// recorded explicitly in ssa form.
 }
--- a/src/cmd/internal/ssa/ssac/fib.goir
+++ b/src/cmd/internal/ssa/ssac/fib.goir
@ -13,6 +13,7 @@
    (TYPE T127bd68 int)
    (TYPE T127bd68 int)
    (DCL n T127bd68)
    (AS n (LOAD (FP T127bd68 0)))
    (DCL ~r1 T127bd68)
    (DCL n T127bd68)
    (DCL autotmp_0000 T127bd68)
@ -29,7 +30,7 @@
    (IF (LT n (CINT 2)) .then0 .else0)
    (LABEL .then0)
    (AS ~r1 n)
-    (AS (SP T127bd68 8) ~r1)
+    (AS (FP T127bd68 8) ~r1)
    (RETURN)
    (GOTO .end0)
    (LABEL .else0)
@ -42,5 +43,5 @@
    (CALL fib)
    (AS autotmp_0001 (LOAD (SP T127bd68 8)))
    (AS ~r1 (ADD autotmp_0000 autotmp_0001))
-    (AS (SP T127bd68 8) ~r1)
+    (AS (FP T127bd68 8) ~r1)
    (RETURN)
--- a/src/cmd/internal/ssa/ssac/fibiter.goir
+++ b/src/cmd/internal/ssa/ssac/fibiter.goir
@ -43,7 +43,7 @@
    (DCL autotmp_0003 Tf5dd68)
    (DCL ~r1 Tf5dd68)
    (DCL a Tf5dd68)
-    (AS n (LOAD (SP Tf5dd68 0)))
+    (AS n (LOAD (FP Tf5dd68 0)))
    (AS a (CINT 0))
    (AS b (CINT 1))
    (AS i (CINT 0))
@ -58,5 +58,5 @@
    (AS i (ADD autotmp_0002 (CINT 1)))
    (GOTO .top0)
    (LABEL .end0)
-    (AS (SP Tf5dd68 8) a)
+    (AS (FP Tf5dd68 8) a)
    (RETURN)