From f91ff1a509c41ba0d14c3018f486fb64b3b54425 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Tue, 4 Aug 2015 14:55:35 -0700 Subject: [PATCH] [dev.ssa] cmd/compile: add SSA pass to move values closer to uses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even this very simple, restricted initial implementation helps. While running make.bash, it moves 84437 values to new, closer homes. As a concrete example: func f_ssa(i, j int, b bool) int { if !b { return 0 } return i + j } It cuts off one stack slot and two instructions: Before: "".f_ssa t=1 size=96 value=0 args=0x20 locals=0x18 0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $24-32 0x0000 00000 (x.go:3) SUBQ $24, SP 0x0004 00004 (x.go:3) FUNCDATA $0, "".gcargs·0(SB) 0x0004 00004 (x.go:3) FUNCDATA $1, "".gclocals·1(SB) 0x0004 00004 (x.go:5) MOVQ $0, AX 0x0006 00006 (x.go:3) MOVQ 32(SP), CX 0x000b 00011 (x.go:3) MOVQ 40(SP), DX 0x0010 00016 (x.go:3) LEAQ 48(SP), BX 0x0015 00021 (x.go:3) MOVB (BX), BPB 0x0018 00024 (x.go:3) MOVQ $0, SI 0x001a 00026 (x.go:3) MOVQ SI, 56(SP) 0x001f 00031 (x.go:3) TESTB BPB, BPB 0x0022 00034 (x.go:5) MOVQ AX, (SP) 0x0026 00038 (x.go:3) MOVQ CX, 8(SP) 0x002b 00043 (x.go:3) MOVQ DX, 16(SP) 0x0030 00048 (x.go:4) JEQ 74 0x0032 00050 (x.go:3) MOVQ 8(SP), AX 0x0037 00055 (x.go:3) MOVQ 16(SP), CX 0x003c 00060 (x.go:7) LEAQ (AX)(CX*1), DX 0x0040 00064 (x.go:7) MOVQ DX, 56(SP) 0x0045 00069 (x.go:3) ADDQ $24, SP 0x0049 00073 (x.go:3) RET 0x004a 00074 (x.go:5) MOVQ (SP), AX 0x004e 00078 (x.go:5) MOVQ AX, 56(SP) 0x0053 00083 (x.go:3) JMP 69 After: "".f_ssa t=1 size=80 value=0 args=0x20 locals=0x10 0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $16-32 0x0000 00000 (x.go:3) SUBQ $16, SP 0x0004 00004 (x.go:3) FUNCDATA $0, "".gcargs·0(SB) 0x0004 00004 (x.go:3) FUNCDATA $1, "".gclocals·1(SB) 0x0004 00004 (x.go:3) MOVQ 32(SP), AX 0x0009 00009 (x.go:3) MOVQ 24(SP), CX 0x000e 00014 (x.go:3) LEAQ 40(SP), DX 0x0013 00019 (x.go:3) MOVB (DX), BL 0x0015 00021 (x.go:3) MOVQ $0, BP 0x0017 00023 (x.go:3) MOVQ BP, 48(SP) 0x001c 00028 (x.go:3) TESTB BL, BL 0x001e 00030 (x.go:3) MOVQ AX, (SP) 0x0022 00034 (x.go:3) MOVQ CX, 8(SP) 0x0027 00039 (x.go:4) JEQ 64 0x0029 00041 (x.go:3) MOVQ 8(SP), AX 0x002e 00046 (x.go:3) MOVQ (SP), CX 0x0032 00050 (x.go:7) LEAQ (AX)(CX*1), DX 0x0036 00054 (x.go:7) MOVQ DX, 48(SP) 0x003b 00059 (x.go:3) ADDQ $16, SP 0x003f 00063 (x.go:3) RET 0x0040 00064 (x.go:5) MOVQ $0, AX 0x0042 00066 (x.go:5) MOVQ AX, 48(SP) 0x0047 00071 (x.go:3) JMP 59 Of course, the old backend is still well ahead: "".f_ssa t=1 size=48 value=0 args=0x20 locals=0x0 0x0000 00000 (x.go:3) TEXT "".f_ssa(SB), $0-32 0x0000 00000 (x.go:3) NOP 0x0000 00000 (x.go:3) NOP 0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·a8eabfc4a4514ed6b3b0c61e9680e440(SB) 0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (x.go:4) CMPB "".b+24(FP), $0 0x0005 00005 (x.go:4) JNE 17 0x0007 00007 (x.go:5) MOVQ $0, "".~r3+32(FP) 0x0010 00016 (x.go:5) RET 0x0011 00017 (x.go:7) MOVQ "".i+8(FP), BX 0x0016 00022 (x.go:7) MOVQ "".j+16(FP), BP 0x001b 00027 (x.go:7) ADDQ BP, BX 0x001e 00030 (x.go:7) MOVQ BX, "".~r3+32(FP) 0x0023 00035 (x.go:7) RET Some regalloc improvements should help considerably. Change-Id: I95bb5dd83e56afd70ae4e983f1d32dffd0c3d46a Reviewed-on: https://go-review.googlesource.com/13142 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/compile.go | 6 +++ src/cmd/compile/internal/ssa/tighten.go | 70 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/cmd/compile/internal/ssa/tighten.go diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 9111254a32..7ab8ddf3dc 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -65,6 +65,7 @@ var passes = [...]pass{ {"generic deadcode", deadcode}, {"dse", dse}, {"fuse", fuse}, + {"tighten", tighten}, // move values closer to their uses {"lower", lower}, {"lowered cse", cse}, {"lowered deadcode", deadcode}, @@ -94,6 +95,11 @@ var passOrder = [...]constraint{ {"nilcheckelim", "generic deadcode"}, // nilcheckelim generates sequences of plain basic blocks {"nilcheckelim", "fuse"}, + // tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET + {"tighten", "lower"}, + // tighten will be most effective when as many values have been removed as possible + {"generic deadcode", "tighten"}, + {"generic cse", "tighten"}, // don't layout blocks until critical edges have been removed {"critical", "layout"}, // regalloc requires the removal of all critical edges diff --git a/src/cmd/compile/internal/ssa/tighten.go b/src/cmd/compile/internal/ssa/tighten.go new file mode 100644 index 0000000000..9cf9a44590 --- /dev/null +++ b/src/cmd/compile/internal/ssa/tighten.go @@ -0,0 +1,70 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// tighten moves Values closer to the Blocks in which they are used. +// This can reduce the amount of register spilling required, +// if it doesn't also create more live values. +// For now, it handles only the trivial case in which a +// Value with one or fewer args is only used in a single Block. +// TODO: Do something smarter. +// A Value can be moved to any block that +// dominates all blocks in which it is used. +// Figure out when that will be an improvement. +func tighten(f *Func) { + // For each value, the number of blocks in which it is used. + uses := make([]int, f.NumValues()) + + // For each value, one block in which that value is used. + home := make([]*Block, f.NumValues()) + + changed := true + for changed { + changed = false + + // Reset uses + for i := range uses { + uses[i] = 0 + } + // No need to reset home; any relevant values will be written anew anyway + + for _, b := range f.Blocks { + for _, v := range b.Values { + for _, w := range v.Args { + uses[w.ID]++ + home[w.ID] = b + } + } + if b.Control != nil { + uses[b.Control.ID]++ + home[b.Control.ID] = b + } + } + + for _, b := range f.Blocks { + for i := 0; i < len(b.Values); i++ { + v := b.Values[i] + if v.Op == OpPhi { + continue + } + if uses[v.ID] == 1 && home[v.ID] != b && len(v.Args) < 2 { + // v is used in exactly one block, and it is not b. + // Furthermore, it takes at most one input, + // so moving it will not increase the + // number of live values anywhere. + // Move v to that block. + c := home[v.ID] + c.Values = append(c.Values, v) + v.Block = c + last := len(b.Values) - 1 + b.Values[i] = b.Values[last] + b.Values[last] = nil + b.Values = b.Values[:last] + changed = true + } + } + } + } +}