diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go index a9cedf7cfc2..eacbc30f87c 100644 --- a/src/cmd/compile/internal/gc/cgen.go +++ b/src/cmd/compile/internal/gc/cgen.go @@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) { } } +// hasHMUL64 reports whether the architecture supports 64-bit +// signed and unsigned high multiplication (OHMUL). +func hasHMUL64() bool { + switch Ctxt.Arch.Family { + case sys.AMD64, sys.S390X: + return true + case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64: + return false + } + Fatalf("unknown architecture") + return false +} + +// hasRROTC64 reports whether the architecture supports 64-bit +// rotate through carry instructions (ORROTC). +func hasRROTC64() bool { + switch Ctxt.Arch.Family { + case sys.AMD64: + return true + case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X: + return false + } + Fatalf("unknown architecture") + return false +} + // generate division according to op, one of: // res = nl / nr // res = nl % nr func cgen_div(op Op, nl *Node, nr *Node, res *Node) { var w int - // TODO(rsc): arm64 needs to support the relevant instructions - // in peep and optoas in order to enable this. - // TODO(rsc): ppc64 needs to support the relevant instructions - // in peep and optoas in order to enable this. - if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 { + // Architectures need to support 64-bit high multiplications + // (OHMUL) in order to perform divide by constant optimizations. + if nr.Op != OLITERAL || !hasHMUL64() { goto longdiv } w = int(nl.Type.Width * 8) // Front end handled 32-bit division. We only need to handle 64-bit. - // try to do division by multiply by (2^w)/d - // see hacker's delight chapter 10 + // Try to do division using multiplication: (2^w)/d. + // See Hacker's Delight, chapter 10. switch Simtype[nl.Type.Etype] { default: goto longdiv @@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { if m.Bad != 0 { break } + + // In order to add the numerator we need to be able to + // avoid overflow. This is done by shifting the result of the + // addition right by 1 and inserting the carry bit into + // the MSB. For now this needs the RROTC instruction. + // TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes + // an alternative sequence of instructions for architectures + // that do not have a shift right with carry instruction. + if m.Ua != 0 && !hasRROTC64() { + goto longdiv + } if op == OMOD { goto longmod } @@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { Thearch.Cgen_hmul(&n1, &n2, &n3) if m.Ua != 0 { - // need to add numerator accounting for overflow + // Need to add numerator accounting for overflow. Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) Nodconst(&n2, nl.Type, 1) @@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { Thearch.Cgen_hmul(&n1, &n2, &n3) if m.Sm < 0 { - // need to add numerator + // Need to add numerator (cannot overflow). Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) } @@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added if m.Sd < 0 { - // this could probably be removed - // by factoring it into the multiplier + // This could probably be removed by factoring it into + // the multiplier. Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3) } @@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { goto longdiv - // division and mod using (slow) hardware instruction + // Division and mod using (slow) hardware instruction. longdiv: Thearch.Dodiv(op, nl, nr, res) return - // mod using formula A%B = A-(A/B*B) but - // we know that there is a fast algorithm for A/B + // Mod using formula A%B = A-(A/B*B) but + // we know that there is a fast algorithm for A/B. longmod: var n1 Node Regalloc(&n1, nl.Type, res) @@ -2746,11 +2781,6 @@ longmod: Regalloc(&n2, nl.Type, nil) cgen_div(ODIV, &n1, nr, &n2) a := Thearch.Optoas(OMUL, nl.Type) - if w == 8 { - // use 2-operand 16-bit multiply - // because there is no 2-operand 8-bit multiply - a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW - } if !Smallintconst(nr) { var n3 Node