cmd/compile/internal/gc: refactor cgen_div

This commit adds two new functions to cgen.go: hasHMUL64 and hasRROTC64. These are used to determine whether or not an architecture supports the instructions needed to perform an optimization in cgen_div. This commit should not affect existing architectures (although it does add s390x to the new functions). However, since most architectures support HMUL the hasHMUL64 function could be modified to enable most of the optimizations in cgen_div on those platforms. Change-Id: I33bf329ddeb6cf2954bd17b7c161012de352fb62 Reviewed-on: https://go-review.googlesource.com/21775 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2024-11-18 23:14:43 -07:00 · 2016-04-10 21:58:37 -04:00 · 2016-04-10 21:58:37 -04:00 · 2a4158207e
commit 2a4158207e
parent a00ad5f47e
1 changed files with 49 additions and 19 deletions
--- a/src/cmd/compile/internal/gc/cgen.go
+++ b/src/cmd/compile/internal/gc/cgen.go
@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) {
 	}
 }

+// hasHMUL64 reports whether the architecture supports 64-bit
+// signed and unsigned high multiplication (OHMUL).
+func hasHMUL64() bool {
+	switch Ctxt.Arch.Family {
+	case sys.AMD64, sys.S390X:
+		return true
+	case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
+		return false
+	}
+	Fatalf("unknown architecture")
+	return false
+}
+
+// hasRROTC64 reports whether the architecture supports 64-bit
+// rotate through carry instructions (ORROTC).
+func hasRROTC64() bool {
+	switch Ctxt.Arch.Family {
+	case sys.AMD64:
+		return true
+	case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
+		return false
+	}
+	Fatalf("unknown architecture")
+	return false
+}
+
 // generate division according to op, one of:
 //	res = nl / nr
 //	res = nl % nr
 func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 	var w int

-	// TODO(rsc): arm64 needs to support the relevant instructions
-	// in peep and optoas in order to enable this.
-	// TODO(rsc): ppc64 needs to support the relevant instructions
-	// in peep and optoas in order to enable this.
-	if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
+	// Architectures need to support 64-bit high multiplications
+	// (OHMUL) in order to perform divide by constant optimizations.
+	if nr.Op != OLITERAL || !hasHMUL64() {
 		goto longdiv
 	}
 	w = int(nl.Type.Width * 8)

 	// Front end handled 32-bit division. We only need to handle 64-bit.
-	// try to do division by multiply by (2^w)/d
-	// see hacker's delight chapter 10
+	// Try to do division using multiplication: (2^w)/d.
+	// See Hacker's Delight, chapter 10.
 	switch Simtype[nl.Type.Etype] {
 	default:
 		goto longdiv
@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 		if m.Bad != 0 {
 			break
 		}
+
+		// In order to add the numerator we need to be able to
+		// avoid overflow. This is done by shifting the result of the
+		// addition right by 1 and inserting the carry bit into
+		// the MSB. For now this needs the RROTC instruction.
+		// TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
+		// an alternative sequence of instructions for architectures
+		// that do not have a shift right with carry instruction.
+		if m.Ua != 0 && !hasRROTC64() {
+			goto longdiv
+		}
 		if op == OMOD {
 			goto longmod
 		}
@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 		Thearch.Cgen_hmul(&n1, &n2, &n3)

 		if m.Ua != 0 {
-			// need to add numerator accounting for overflow
+			// Need to add numerator accounting for overflow.
 			Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)

 			Nodconst(&n2, nl.Type, 1)
@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 		Thearch.Cgen_hmul(&n1, &n2, &n3)

 		if m.Sm < 0 {
-			// need to add numerator
+			// Need to add numerator (cannot overflow).
 			Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
 		}

@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 		Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added

 		if m.Sd < 0 {
-			// this could probably be removed
-			// by factoring it into the multiplier
+			// This could probably be removed by factoring it into
+			// the multiplier.
 			Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
 		}

@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {

 	goto longdiv

-	// division and mod using (slow) hardware instruction
+	// Division and mod using (slow) hardware instruction.
 longdiv:
 	Thearch.Dodiv(op, nl, nr, res)

 	return

-	// mod using formula A%B = A-(A/B*B) but
-	// we know that there is a fast algorithm for A/B
+	// Mod using formula A%B = A-(A/B*B) but
+	// we know that there is a fast algorithm for A/B.
 longmod:
 	var n1 Node
 	Regalloc(&n1, nl.Type, res)
@ -2746,11 +2781,6 @@ longmod:
 	Regalloc(&n2, nl.Type, nil)
 	cgen_div(ODIV, &n1, nr, &n2)
 	a := Thearch.Optoas(OMUL, nl.Type)
-	if w == 8 {
-		// use 2-operand 16-bit multiply
-		// because there is no 2-operand 8-bit multiply
-		a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
-	}

 	if !Smallintconst(nr) {
 		var n3 Node