mirror of
https://github.com/golang/go
synced 2024-11-17 08:24:43 -07:00
cmd/compile: enable carry chain scheduling for arm64
This is a follow up of CL 393656 on arm64. Benchmarks: name old time/op new time/op delta ScalarMult/P256-8 42.0µs ± 0% 42.0µs ± 0% -0.13% (p=0.032 n=5+5) ScalarMult/P224-8 135µs ± 0% 96µs ± 0% -29.04% (p=0.008 n=5+5) ScalarMult/P384-8 573µs ± 1% 355µs ± 0% -38.05% (p=0.008 n=5+5) ScalarMult/P521-8 1.50ms ± 4% 0.77ms ± 0% -48.78% (p=0.008 n=5+5) MarshalUnmarshal/P256/Uncompressed-8 505ns ± 1% 506ns ± 0% ~ (p=0.460 n=5+5) MarshalUnmarshal/P256/Compressed-8 6.75µs ± 0% 6.73µs ± 0% -0.27% (p=0.016 n=5+5) MarshalUnmarshal/P224/Uncompressed-8 927ns ± 0% 818ns ± 0% -11.76% (p=0.008 n=5+5) MarshalUnmarshal/P224/Compressed-8 136µs ± 0% 96µs ± 0% -29.58% (p=0.008 n=5+5) MarshalUnmarshal/P384/Uncompressed-8 1.77µs ± 0% 1.36µs ± 1% -23.14% (p=0.008 n=5+5) MarshalUnmarshal/P384/Compressed-8 56.5µs ± 0% 31.9µs ± 0% -43.59% (p=0.016 n=5+4) MarshalUnmarshal/P521/Uncompressed-8 2.91µs ± 0% 2.03µs ± 1% -30.32% (p=0.008 n=5+5) MarshalUnmarshal/P521/Compressed-8 148µs ± 0% 68µs ± 1% -54.28% (p=0.008 n=5+5) Change-Id: I33170360eb8279b998e3c559f7136717fe32e07d Reviewed-on: https://go-review.googlesource.com/c/go/+/424907 Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Eric Fang <eric.fang@arm.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
cf53990b18
commit
4c414c7673
@ -155,7 +155,7 @@ func schedule(f *Func) {
|
|||||||
// VARDEF ops are scheduled before the corresponding LEA.
|
// VARDEF ops are scheduled before the corresponding LEA.
|
||||||
score[v.ID] = ScoreMemory
|
score[v.ID] = ScoreMemory
|
||||||
case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN:
|
case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN:
|
||||||
if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].Op.isCarry() || v.Type.IsFlags()) {
|
if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].isCarry() || v.Type.IsFlags()) {
|
||||||
// When the Select pseudo op is being used for a carry or flag from
|
// When the Select pseudo op is being used for a carry or flag from
|
||||||
// a tuple then score it as ScoreFlags so it happens later. This
|
// a tuple then score it as ScoreFlags so it happens later. This
|
||||||
// prevents the bit from being clobbered before it is used.
|
// prevents the bit from being clobbered before it is used.
|
||||||
@ -163,8 +163,8 @@ func schedule(f *Func) {
|
|||||||
} else {
|
} else {
|
||||||
score[v.ID] = ScoreReadTuple
|
score[v.ID] = ScoreReadTuple
|
||||||
}
|
}
|
||||||
case v.Op.isCarry():
|
case v.isCarry():
|
||||||
if w := v.getCarryProducer(); w != nil {
|
if w := v.getCarryInput(); w != nil && w.Block == b {
|
||||||
// The producing op is not the final user of the carry bit. Its
|
// The producing op is not the final user of the carry bit. Its
|
||||||
// current score is one of unscored, Flags, or CarryChainTail.
|
// current score is one of unscored, Flags, or CarryChainTail.
|
||||||
// These occur if the producer has not been scored, another user
|
// These occur if the producer has not been scored, another user
|
||||||
@ -183,7 +183,7 @@ func schedule(f *Func) {
|
|||||||
// one chain to be scheduled, if possible.
|
// one chain to be scheduled, if possible.
|
||||||
score[v.ID] = ScoreCarryChainTail
|
score[v.ID] = ScoreCarryChainTail
|
||||||
}
|
}
|
||||||
case v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags():
|
case v.isFlagOp():
|
||||||
// Schedule flag register generation as late as possible.
|
// Schedule flag register generation as late as possible.
|
||||||
// This makes sure that we only have one live flags
|
// This makes sure that we only have one live flags
|
||||||
// value at a time.
|
// value at a time.
|
||||||
@ -192,7 +192,7 @@ func schedule(f *Func) {
|
|||||||
score[v.ID] = ScoreDefault
|
score[v.ID] = ScoreDefault
|
||||||
// If we're reading flags, schedule earlier to keep flag lifetime short.
|
// If we're reading flags, schedule earlier to keep flag lifetime short.
|
||||||
for _, a := range v.Args {
|
for _, a := range v.Args {
|
||||||
if a.Type.IsFlags() {
|
if a.isFlagOp() {
|
||||||
score[v.ID] = ScoreReadFlags
|
score[v.ID] = ScoreReadFlags
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -263,7 +263,6 @@ func schedule(f *Func) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// To put things into a priority queue
|
// To put things into a priority queue
|
||||||
@ -287,7 +286,7 @@ func schedule(f *Func) {
|
|||||||
|
|
||||||
v := heap.Pop(priq).(*Value)
|
v := heap.Pop(priq).(*Value)
|
||||||
|
|
||||||
if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.Op.isCarry() {
|
if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.isCarry() {
|
||||||
// Add some debugging noise if the chain of carrying ops will not
|
// Add some debugging noise if the chain of carrying ops will not
|
||||||
// likely be scheduled without potential carry flag clobbers.
|
// likely be scheduled without potential carry flag clobbers.
|
||||||
if !isCarryChainReady(v, uses) {
|
if !isCarryChainReady(v, uses) {
|
||||||
@ -551,39 +550,66 @@ func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value
|
|||||||
return order
|
return order
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return whether all dependent carry ops can be scheduled after this.
|
// isFlagOp reports if v is an OP with the flag type.
|
||||||
|
func (v *Value) isFlagOp() bool {
|
||||||
|
return v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isCarryChainReady reports whether all dependent carry ops can be scheduled after this.
|
||||||
func isCarryChainReady(v *Value, uses []int32) bool {
|
func isCarryChainReady(v *Value, uses []int32) bool {
|
||||||
// A chain can be scheduled in it's entirety if
|
// A chain can be scheduled in it's entirety if
|
||||||
// the use count of each dependent op is 1. If none,
|
// the use count of each dependent op is 1. If none,
|
||||||
// schedule the first.
|
// schedule the first.
|
||||||
j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1.
|
j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1.
|
||||||
for k := v; k != nil; k = k.getCarryProducer() {
|
for k := v; k != nil; k = k.getCarryInput() {
|
||||||
j += int(uses[k.ID]) - 1
|
j += int(uses[k.ID]) - 1
|
||||||
}
|
}
|
||||||
return j == 0
|
return j == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return whether op is an operation which produces a carry bit value, but does not consume it.
|
// isCarryInput reports whether v accepts a carry value as input.
|
||||||
func (op Op) isCarryCreator() bool {
|
func (v *Value) isCarryInput() bool {
|
||||||
switch op {
|
return v.getCarryInput() != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isCarryOutput reports whether v generates a carry as output.
|
||||||
|
func (v *Value) isCarryOutput() bool {
|
||||||
|
if v.isFlagOp() && v.Op != OpSelect1 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// special cases for PPC64 which put their carry values in XER instead of flags
|
||||||
|
switch v.Op {
|
||||||
case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst:
|
case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return whether op consumes or creates a carry a bit value.
|
// isCarryCreator reports whether op is an operation which produces a carry bit value,
|
||||||
func (op Op) isCarry() bool {
|
// but does not consume it.
|
||||||
switch op {
|
func (v *Value) isCarryCreator() bool {
|
||||||
case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero:
|
return v.isCarryOutput() && !v.isCarryInput()
|
||||||
return true
|
|
||||||
}
|
|
||||||
return op.isCarryCreator()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the producing *Value of the carry bit of this op, or nil if none.
|
// isCarry reports whether op consumes or creates a carry a bit value.
|
||||||
func (v *Value) getCarryProducer() *Value {
|
func (v *Value) isCarry() bool {
|
||||||
if v.Op.isCarry() && !v.Op.isCarryCreator() {
|
return v.isCarryOutput() || v.isCarryInput()
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCarryProducer returns the producing *Value of the carry bit of this op, or nil if none.
|
||||||
|
func (v *Value) getCarryInput() *Value {
|
||||||
|
for _, a := range v.Args {
|
||||||
|
if !a.isFlagOp() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if a.Op == OpSelect1 {
|
||||||
|
a = a.Args[0]
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
// special cases for PPC64 which put their carry values in XER instead of flags
|
||||||
|
switch v.Op {
|
||||||
|
case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero:
|
||||||
// PPC64 carry dependencies are conveyed through their final argument.
|
// PPC64 carry dependencies are conveyed through their final argument.
|
||||||
// Likewise, there is always an OpSelect1 between them.
|
// Likewise, there is always an OpSelect1 between them.
|
||||||
return v.Args[len(v.Args)-1].Args[0]
|
return v.Args[len(v.Args)-1].Args[0]
|
||||||
|
@ -99,3 +99,62 @@ func TestStoreOrder(t *testing.T) {
|
|||||||
t.Errorf("store order is wrong: got %v, want v2 v3 v4 after v5", order)
|
t.Errorf("store order is wrong: got %v, want v2 v3 v4 after v5", order)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCarryChainOrder(t *testing.T) {
|
||||||
|
// In the function below, there are two carry chains that have no dependencies on each other,
|
||||||
|
// one is A1 -> A1carry -> A1Carryvalue, the other is A2 -> A2carry -> A2Carryvalue. If they
|
||||||
|
// are not scheduled properly, the carry will be clobbered, causing the carry to be regenerated.
|
||||||
|
c := testConfigARM64(t)
|
||||||
|
fun := c.Fun("entry",
|
||||||
|
Bloc("entry",
|
||||||
|
Valu("mem0", OpInitMem, types.TypeMem, 0, nil),
|
||||||
|
Valu("x", OpARM64MOVDconst, c.config.Types.UInt64, 5, nil),
|
||||||
|
Valu("y", OpARM64MOVDconst, c.config.Types.UInt64, 6, nil),
|
||||||
|
Valu("z", OpARM64MOVDconst, c.config.Types.UInt64, 7, nil),
|
||||||
|
Valu("A1", OpARM64ADDSflags, types.NewTuple(c.config.Types.UInt64, types.TypeFlags), 0, nil, "x", "z"), // x+z, set flags
|
||||||
|
Valu("A1carry", OpSelect1, types.TypeFlags, 0, nil, "A1"),
|
||||||
|
Valu("A2", OpARM64ADDSflags, types.NewTuple(c.config.Types.UInt64, types.TypeFlags), 0, nil, "y", "z"), // y+z, set flags
|
||||||
|
Valu("A2carry", OpSelect1, types.TypeFlags, 0, nil, "A2"),
|
||||||
|
Valu("A1value", OpSelect0, c.config.Types.UInt64, 0, nil, "A1"),
|
||||||
|
Valu("A1Carryvalue", OpARM64ADCzerocarry, c.config.Types.UInt64, 0, nil, "A1carry"), // 0+0+A1carry
|
||||||
|
Valu("A2value", OpSelect0, c.config.Types.UInt64, 0, nil, "A2"),
|
||||||
|
Valu("A2Carryvalue", OpARM64ADCzerocarry, c.config.Types.UInt64, 0, nil, "A2carry"), // 0+0+A2carry
|
||||||
|
Valu("ValueSum", OpARM64ADD, c.config.Types.UInt64, 0, nil, "A1value", "A2value"),
|
||||||
|
Valu("CarrySum", OpARM64ADD, c.config.Types.UInt64, 0, nil, "A1Carryvalue", "A2Carryvalue"),
|
||||||
|
Valu("Sum", OpARM64AND, c.config.Types.UInt64, 0, nil, "ValueSum", "CarrySum"),
|
||||||
|
Goto("exit")),
|
||||||
|
Bloc("exit",
|
||||||
|
Exit("mem0")),
|
||||||
|
)
|
||||||
|
|
||||||
|
CheckFunc(fun.f)
|
||||||
|
schedule(fun.f)
|
||||||
|
|
||||||
|
// The expected order is A1 < A1carry < A1Carryvalue < A2 < A2carry < A2Carryvalue.
|
||||||
|
// There is no dependency between the two carry chains, so it doesn't matter which
|
||||||
|
// comes first and which comes after, but the unsorted position of A1 is before A2,
|
||||||
|
// so A1Carryvalue < A2.
|
||||||
|
var ai, bi, ci, di, ei, fi int
|
||||||
|
for i, v := range fun.f.Blocks[0].Values {
|
||||||
|
switch {
|
||||||
|
case fun.values["A1"] == v:
|
||||||
|
ai = i
|
||||||
|
case fun.values["A1carry"] == v:
|
||||||
|
bi = i
|
||||||
|
case fun.values["A1Carryvalue"] == v:
|
||||||
|
ci = i
|
||||||
|
case fun.values["A2"] == v:
|
||||||
|
di = i
|
||||||
|
case fun.values["A2carry"] == v:
|
||||||
|
ei = i
|
||||||
|
case fun.values["A2Carryvalue"] == v:
|
||||||
|
fi = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !(ai < bi && bi < ci && ci < di && di < ei && ei < fi) {
|
||||||
|
t.Logf("Func: %s", fun.f)
|
||||||
|
t.Errorf("carry chain order is wrong: got %v, want V%d after V%d after V%d after V%d after V%d after V%d,",
|
||||||
|
fun.f.Blocks[0], fun.values["A1"].ID, fun.values["A1carry"].ID, fun.values["A1Carryvalue"].ID,
|
||||||
|
fun.values["A2"].ID, fun.values["A2carry"].ID, fun.values["A2Carryvalue"].ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user