mirror of
https://github.com/golang/go
synced 2024-11-24 04:50:07 -07:00
cmd/compile/internal/gc: refactor cgen_div
This commit adds two new functions to cgen.go: hasHMUL64 and hasRROTC64. These are used to determine whether or not an architecture supports the instructions needed to perform an optimization in cgen_div. This commit should not affect existing architectures (although it does add s390x to the new functions). However, since most architectures support HMUL the hasHMUL64 function could be modified to enable most of the optimizations in cgen_div on those platforms. Change-Id: I33bf329ddeb6cf2954bd17b7c161012de352fb62 Reviewed-on: https://go-review.googlesource.com/21775 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
a00ad5f47e
commit
2a4158207e
@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hasHMUL64 reports whether the architecture supports 64-bit
|
||||||
|
// signed and unsigned high multiplication (OHMUL).
|
||||||
|
func hasHMUL64() bool {
|
||||||
|
switch Ctxt.Arch.Family {
|
||||||
|
case sys.AMD64, sys.S390X:
|
||||||
|
return true
|
||||||
|
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
Fatalf("unknown architecture")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// hasRROTC64 reports whether the architecture supports 64-bit
|
||||||
|
// rotate through carry instructions (ORROTC).
|
||||||
|
func hasRROTC64() bool {
|
||||||
|
switch Ctxt.Arch.Family {
|
||||||
|
case sys.AMD64:
|
||||||
|
return true
|
||||||
|
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
Fatalf("unknown architecture")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// generate division according to op, one of:
|
// generate division according to op, one of:
|
||||||
// res = nl / nr
|
// res = nl / nr
|
||||||
// res = nl % nr
|
// res = nl % nr
|
||||||
func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
||||||
var w int
|
var w int
|
||||||
|
|
||||||
// TODO(rsc): arm64 needs to support the relevant instructions
|
// Architectures need to support 64-bit high multiplications
|
||||||
// in peep and optoas in order to enable this.
|
// (OHMUL) in order to perform divide by constant optimizations.
|
||||||
// TODO(rsc): ppc64 needs to support the relevant instructions
|
if nr.Op != OLITERAL || !hasHMUL64() {
|
||||||
// in peep and optoas in order to enable this.
|
|
||||||
if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
|
|
||||||
goto longdiv
|
goto longdiv
|
||||||
}
|
}
|
||||||
w = int(nl.Type.Width * 8)
|
w = int(nl.Type.Width * 8)
|
||||||
|
|
||||||
// Front end handled 32-bit division. We only need to handle 64-bit.
|
// Front end handled 32-bit division. We only need to handle 64-bit.
|
||||||
// try to do division by multiply by (2^w)/d
|
// Try to do division using multiplication: (2^w)/d.
|
||||||
// see hacker's delight chapter 10
|
// See Hacker's Delight, chapter 10.
|
||||||
switch Simtype[nl.Type.Etype] {
|
switch Simtype[nl.Type.Etype] {
|
||||||
default:
|
default:
|
||||||
goto longdiv
|
goto longdiv
|
||||||
@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
|||||||
if m.Bad != 0 {
|
if m.Bad != 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In order to add the numerator we need to be able to
|
||||||
|
// avoid overflow. This is done by shifting the result of the
|
||||||
|
// addition right by 1 and inserting the carry bit into
|
||||||
|
// the MSB. For now this needs the RROTC instruction.
|
||||||
|
// TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
|
||||||
|
// an alternative sequence of instructions for architectures
|
||||||
|
// that do not have a shift right with carry instruction.
|
||||||
|
if m.Ua != 0 && !hasRROTC64() {
|
||||||
|
goto longdiv
|
||||||
|
}
|
||||||
if op == OMOD {
|
if op == OMOD {
|
||||||
goto longmod
|
goto longmod
|
||||||
}
|
}
|
||||||
@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
|||||||
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
||||||
|
|
||||||
if m.Ua != 0 {
|
if m.Ua != 0 {
|
||||||
// need to add numerator accounting for overflow
|
// Need to add numerator accounting for overflow.
|
||||||
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
||||||
|
|
||||||
Nodconst(&n2, nl.Type, 1)
|
Nodconst(&n2, nl.Type, 1)
|
||||||
@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
|||||||
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
||||||
|
|
||||||
if m.Sm < 0 {
|
if m.Sm < 0 {
|
||||||
// need to add numerator
|
// Need to add numerator (cannot overflow).
|
||||||
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
|||||||
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
|
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
|
||||||
|
|
||||||
if m.Sd < 0 {
|
if m.Sd < 0 {
|
||||||
// this could probably be removed
|
// This could probably be removed by factoring it into
|
||||||
// by factoring it into the multiplier
|
// the multiplier.
|
||||||
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
|
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
|||||||
|
|
||||||
goto longdiv
|
goto longdiv
|
||||||
|
|
||||||
// division and mod using (slow) hardware instruction
|
// Division and mod using (slow) hardware instruction.
|
||||||
longdiv:
|
longdiv:
|
||||||
Thearch.Dodiv(op, nl, nr, res)
|
Thearch.Dodiv(op, nl, nr, res)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
// mod using formula A%B = A-(A/B*B) but
|
// Mod using formula A%B = A-(A/B*B) but
|
||||||
// we know that there is a fast algorithm for A/B
|
// we know that there is a fast algorithm for A/B.
|
||||||
longmod:
|
longmod:
|
||||||
var n1 Node
|
var n1 Node
|
||||||
Regalloc(&n1, nl.Type, res)
|
Regalloc(&n1, nl.Type, res)
|
||||||
@ -2746,11 +2781,6 @@ longmod:
|
|||||||
Regalloc(&n2, nl.Type, nil)
|
Regalloc(&n2, nl.Type, nil)
|
||||||
cgen_div(ODIV, &n1, nr, &n2)
|
cgen_div(ODIV, &n1, nr, &n2)
|
||||||
a := Thearch.Optoas(OMUL, nl.Type)
|
a := Thearch.Optoas(OMUL, nl.Type)
|
||||||
if w == 8 {
|
|
||||||
// use 2-operand 16-bit multiply
|
|
||||||
// because there is no 2-operand 8-bit multiply
|
|
||||||
a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
|
|
||||||
}
|
|
||||||
|
|
||||||
if !Smallintconst(nr) {
|
if !Smallintconst(nr) {
|
||||||
var n3 Node
|
var n3 Node
|
||||||
|
Loading…
Reference in New Issue
Block a user