mirror of
https://github.com/golang/go
synced 2024-09-30 00:24:29 -06:00
cmd/compile: enable carry chain scheduling for arm64
This is a follow up of CL 393656 on arm64. This CL puts ScoreCarryChainTail before ScoreMemory and after ScoreReadFlags, so that the scheduling of the carry chain will not break the scheduling of ScoreVarDef. Benchmarks: name old time/op new time/op delta ScalarMult/P256-8 42.0µs ± 0% 42.0µs ± 0% -0.13% (p=0.032 n=5+5) ScalarMult/P224-8 135µs ± 0% 96µs ± 0% -29.04% (p=0.008 n=5+5) ScalarMult/P384-8 573µs ± 1% 355µs ± 0% -38.05% (p=0.008 n=5+5) ScalarMult/P521-8 1.50ms ± 4% 0.77ms ± 0% -48.78% (p=0.008 n=5+5) MarshalUnmarshal/P256/Uncompressed-8 505ns ± 1% 506ns ± 0% ~ (p=0.460 n=5+5) MarshalUnmarshal/P256/Compressed-8 6.75µs ± 0% 6.73µs ± 0% -0.27% (p=0.016 n=5+5) MarshalUnmarshal/P224/Uncompressed-8 927ns ± 0% 818ns ± 0% -11.76% (p=0.008 n=5+5) MarshalUnmarshal/P224/Compressed-8 136µs ± 0% 96µs ± 0% -29.58% (p=0.008 n=5+5) MarshalUnmarshal/P384/Uncompressed-8 1.77µs ± 0% 1.36µs ± 1% -23.14% (p=0.008 n=5+5) MarshalUnmarshal/P384/Compressed-8 56.5µs ± 0% 31.9µs ± 0% -43.59% (p=0.016 n=5+4) MarshalUnmarshal/P521/Uncompressed-8 2.91µs ± 0% 2.03µs ± 1% -30.32% (p=0.008 n=5+5) MarshalUnmarshal/P521/Compressed-8 148µs ± 0% 68µs ± 1% -54.28% (p=0.008 n=5+5) Change-Id: I4bf4e3265d7e1ee85765ff2bf006ca5a794d4979 Reviewed-on: https://go-review.googlesource.com/c/go/+/432275 Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Keith Randall <khr@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Eric Fang <eric.fang@arm.com>
This commit is contained in:
parent
40c7e94cc5
commit
669ec549b5
@ -14,10 +14,10 @@ const (
|
||||
ScorePhi = iota // towards top of block
|
||||
ScoreArg
|
||||
ScoreNilCheck
|
||||
ScoreCarryChainTail
|
||||
ScoreReadTuple
|
||||
ScoreVarDef
|
||||
ScoreMemory
|
||||
ScoreCarryChainTail
|
||||
ScoreReadFlags
|
||||
ScoreDefault
|
||||
ScoreFlags
|
||||
@ -155,7 +155,7 @@ func schedule(f *Func) {
|
||||
// VARDEF ops are scheduled before the corresponding LEA.
|
||||
score[v.ID] = ScoreMemory
|
||||
case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN:
|
||||
if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].Op.isCarry() || v.Type.IsFlags()) {
|
||||
if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].isCarry() || v.Type.IsFlags()) {
|
||||
// When the Select pseudo op is being used for a carry or flag from
|
||||
// a tuple then score it as ScoreFlags so it happens later. This
|
||||
// prevents the bit from being clobbered before it is used.
|
||||
@ -163,8 +163,8 @@ func schedule(f *Func) {
|
||||
} else {
|
||||
score[v.ID] = ScoreReadTuple
|
||||
}
|
||||
case v.Op.isCarry():
|
||||
if w := v.getCarryProducer(); w != nil {
|
||||
case v.isCarry():
|
||||
if w := v.getCarryInput(); w != nil && w.Block == b {
|
||||
// The producing op is not the final user of the carry bit. Its
|
||||
// current score is one of unscored, Flags, or CarryChainTail.
|
||||
// These occur if the producer has not been scored, another user
|
||||
@ -183,7 +183,7 @@ func schedule(f *Func) {
|
||||
// one chain to be scheduled, if possible.
|
||||
score[v.ID] = ScoreCarryChainTail
|
||||
}
|
||||
case v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags():
|
||||
case v.isFlagOp():
|
||||
// Schedule flag register generation as late as possible.
|
||||
// This makes sure that we only have one live flags
|
||||
// value at a time.
|
||||
@ -192,7 +192,7 @@ func schedule(f *Func) {
|
||||
score[v.ID] = ScoreDefault
|
||||
// If we're reading flags, schedule earlier to keep flag lifetime short.
|
||||
for _, a := range v.Args {
|
||||
if a.Type.IsFlags() {
|
||||
if a.isFlagOp() {
|
||||
score[v.ID] = ScoreReadFlags
|
||||
}
|
||||
}
|
||||
@ -263,7 +263,6 @@ func schedule(f *Func) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// To put things into a priority queue
|
||||
@ -287,7 +286,7 @@ func schedule(f *Func) {
|
||||
|
||||
v := heap.Pop(priq).(*Value)
|
||||
|
||||
if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.Op.isCarry() {
|
||||
if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.isCarry() {
|
||||
// Add some debugging noise if the chain of carrying ops will not
|
||||
// likely be scheduled without potential carry flag clobbers.
|
||||
if !isCarryChainReady(v, uses) {
|
||||
@ -551,45 +550,77 @@ func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value
|
||||
return order
|
||||
}
|
||||
|
||||
// Return whether all dependent carry ops can be scheduled after this.
|
||||
// isFlagOp reports if v is an OP with the flag type.
|
||||
func (v *Value) isFlagOp() bool {
|
||||
return v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags()
|
||||
}
|
||||
|
||||
// isCarryChainReady reports whether all dependent carry ops can be scheduled after this.
|
||||
func isCarryChainReady(v *Value, uses []int32) bool {
|
||||
// A chain can be scheduled in it's entirety if
|
||||
// the use count of each dependent op is 1. If none,
|
||||
// schedule the first.
|
||||
j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1.
|
||||
for k := v; k != nil; k = k.getCarryProducer() {
|
||||
for k := v; k != nil; k = k.getCarryInput() {
|
||||
j += int(uses[k.ID]) - 1
|
||||
}
|
||||
return j == 0
|
||||
}
|
||||
|
||||
// Return whether op is an operation which produces a carry bit value, but does not consume it.
|
||||
func (op Op) isCarryCreator() bool {
|
||||
switch op {
|
||||
// isCarryInput reports whether v accepts a carry value as input.
|
||||
func (v *Value) isCarryInput() bool {
|
||||
return v.getCarryInput() != nil
|
||||
}
|
||||
|
||||
// isCarryOutput reports whether v generates a carry as output.
|
||||
func (v *Value) isCarryOutput() bool {
|
||||
// special cases for PPC64 which put their carry values in XER instead of flags
|
||||
switch v.Block.Func.Config.arch {
|
||||
case "ppc64", "ppc64le":
|
||||
switch v.Op {
|
||||
case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return v.isFlagOp() && v.Op != OpSelect1
|
||||
}
|
||||
|
||||
// Return whether op consumes or creates a carry a bit value.
|
||||
func (op Op) isCarry() bool {
|
||||
switch op {
|
||||
// isCarryCreator reports whether op is an operation which produces a carry bit value,
|
||||
// but does not consume it.
|
||||
func (v *Value) isCarryCreator() bool {
|
||||
return v.isCarryOutput() && !v.isCarryInput()
|
||||
}
|
||||
|
||||
// isCarry reports whether op consumes or creates a carry a bit value.
|
||||
func (v *Value) isCarry() bool {
|
||||
return v.isCarryOutput() || v.isCarryInput()
|
||||
}
|
||||
|
||||
// getCarryInput returns the producing *Value of the carry bit of this op, or nil if none.
|
||||
func (v *Value) getCarryInput() *Value {
|
||||
// special cases for PPC64 which put their carry values in XER instead of flags
|
||||
switch v.Block.Func.Config.arch {
|
||||
case "ppc64", "ppc64le":
|
||||
switch v.Op {
|
||||
case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero:
|
||||
return true
|
||||
}
|
||||
return op.isCarryCreator()
|
||||
}
|
||||
|
||||
// Return the producing *Value of the carry bit of this op, or nil if none.
|
||||
func (v *Value) getCarryProducer() *Value {
|
||||
if v.Op.isCarry() && !v.Op.isCarryCreator() {
|
||||
// PPC64 carry dependencies are conveyed through their final argument.
|
||||
// Likewise, there is always an OpSelect1 between them.
|
||||
return v.Args[len(v.Args)-1].Args[0]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
for _, a := range v.Args {
|
||||
if !a.isFlagOp() {
|
||||
continue
|
||||
}
|
||||
if a.Op == OpSelect1 {
|
||||
a = a.Args[0]
|
||||
}
|
||||
return a
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type bySourcePos []*Value
|
||||
|
||||
|
@ -99,3 +99,62 @@ func TestStoreOrder(t *testing.T) {
|
||||
t.Errorf("store order is wrong: got %v, want v2 v3 v4 after v5", order)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCarryChainOrder(t *testing.T) {
|
||||
// In the function below, there are two carry chains that have no dependencies on each other,
|
||||
// one is A1 -> A1carry -> A1Carryvalue, the other is A2 -> A2carry -> A2Carryvalue. If they
|
||||
// are not scheduled properly, the carry will be clobbered, causing the carry to be regenerated.
|
||||
c := testConfigARM64(t)
|
||||
fun := c.Fun("entry",
|
||||
Bloc("entry",
|
||||
Valu("mem0", OpInitMem, types.TypeMem, 0, nil),
|
||||
Valu("x", OpARM64MOVDconst, c.config.Types.UInt64, 5, nil),
|
||||
Valu("y", OpARM64MOVDconst, c.config.Types.UInt64, 6, nil),
|
||||
Valu("z", OpARM64MOVDconst, c.config.Types.UInt64, 7, nil),
|
||||
Valu("A1", OpARM64ADDSflags, types.NewTuple(c.config.Types.UInt64, types.TypeFlags), 0, nil, "x", "z"), // x+z, set flags
|
||||
Valu("A1carry", OpSelect1, types.TypeFlags, 0, nil, "A1"),
|
||||
Valu("A2", OpARM64ADDSflags, types.NewTuple(c.config.Types.UInt64, types.TypeFlags), 0, nil, "y", "z"), // y+z, set flags
|
||||
Valu("A2carry", OpSelect1, types.TypeFlags, 0, nil, "A2"),
|
||||
Valu("A1value", OpSelect0, c.config.Types.UInt64, 0, nil, "A1"),
|
||||
Valu("A1Carryvalue", OpARM64ADCzerocarry, c.config.Types.UInt64, 0, nil, "A1carry"), // 0+0+A1carry
|
||||
Valu("A2value", OpSelect0, c.config.Types.UInt64, 0, nil, "A2"),
|
||||
Valu("A2Carryvalue", OpARM64ADCzerocarry, c.config.Types.UInt64, 0, nil, "A2carry"), // 0+0+A2carry
|
||||
Valu("ValueSum", OpARM64ADD, c.config.Types.UInt64, 0, nil, "A1value", "A2value"),
|
||||
Valu("CarrySum", OpARM64ADD, c.config.Types.UInt64, 0, nil, "A1Carryvalue", "A2Carryvalue"),
|
||||
Valu("Sum", OpARM64AND, c.config.Types.UInt64, 0, nil, "ValueSum", "CarrySum"),
|
||||
Goto("exit")),
|
||||
Bloc("exit",
|
||||
Exit("mem0")),
|
||||
)
|
||||
|
||||
CheckFunc(fun.f)
|
||||
schedule(fun.f)
|
||||
|
||||
// The expected order is A1 < A1carry < A1Carryvalue < A2 < A2carry < A2Carryvalue.
|
||||
// There is no dependency between the two carry chains, so it doesn't matter which
|
||||
// comes first and which comes after, but the unsorted position of A1 is before A2,
|
||||
// so A1Carryvalue < A2.
|
||||
var ai, bi, ci, di, ei, fi int
|
||||
for i, v := range fun.f.Blocks[0].Values {
|
||||
switch {
|
||||
case fun.values["A1"] == v:
|
||||
ai = i
|
||||
case fun.values["A1carry"] == v:
|
||||
bi = i
|
||||
case fun.values["A1Carryvalue"] == v:
|
||||
ci = i
|
||||
case fun.values["A2"] == v:
|
||||
di = i
|
||||
case fun.values["A2carry"] == v:
|
||||
ei = i
|
||||
case fun.values["A2Carryvalue"] == v:
|
||||
fi = i
|
||||
}
|
||||
}
|
||||
if !(ai < bi && bi < ci && ci < di && di < ei && ei < fi) {
|
||||
t.Logf("Func: %s", fun.f)
|
||||
t.Errorf("carry chain order is wrong: got %v, want V%d after V%d after V%d after V%d after V%d after V%d,",
|
||||
fun.f.Blocks[0], fun.values["A1"].ID, fun.values["A1carry"].ID, fun.values["A1Carryvalue"].ID,
|
||||
fun.values["A2"].ID, fun.values["A2carry"].ID, fun.values["A2Carryvalue"].ID)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user