From a45f2d8f2898d23804de473841d42670fcdda5dc Mon Sep 17 00:00:00 2001
From: Todd Neal <todd@tneal.org>
Date: Mon, 17 Aug 2015 17:46:06 -0500
Subject: [PATCH] [dev.ssa] cmd/compile/internal/ssa: implement ODIV

Implement integer division for non-consts.

Change-Id: If40cbde20e5f0ebb9993064def7be468e4eca076
Reviewed-on: https://go-review.googlesource.com/13644
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/ssa.go            |  80 +++
 src/cmd/compile/internal/gc/ssa_test.go       |   3 +
 .../internal/gc/testdata/arithBoundary_ssa.go | 640 ++++++++++++++++++
 src/cmd/compile/internal/ssa/TODO             |   3 +-
 src/cmd/compile/internal/ssa/gen/AMD64.rules  |   9 +
 src/cmd/compile/internal/ssa/gen/AMD64Ops.go  |  12 +
 .../compile/internal/ssa/gen/genericOps.go    |   9 +
 src/cmd/compile/internal/ssa/opGen.go         | 130 ++++
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 156 +++++
 9 files changed, 1041 insertions(+), 1 deletion(-)
 create mode 100644 src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go

diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index ef90ed40e7..90b29b9b09 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -779,6 +779,15 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{ODIV, TFLOAT32}: ssa.OpDiv32F,
 	opAndType{ODIV, TFLOAT64}: ssa.OpDiv64F,
 
+	opAndType{ODIV, TINT8}:   ssa.OpDiv8,
+	opAndType{ODIV, TUINT8}:  ssa.OpDiv8u,
+	opAndType{ODIV, TINT16}:  ssa.OpDiv16,
+	opAndType{ODIV, TUINT16}: ssa.OpDiv16u,
+	opAndType{ODIV, TINT32}:  ssa.OpDiv32,
+	opAndType{ODIV, TUINT32}: ssa.OpDiv32u,
+	opAndType{ODIV, TINT64}:  ssa.OpDiv64,
+	opAndType{ODIV, TUINT64}: ssa.OpDiv64u,
+
 	opAndType{OAND, TINT8}:   ssa.OpAnd8,
 	opAndType{OAND, TUINT8}:  ssa.OpAnd8,
 	opAndType{OAND, TINT16}:  ssa.OpAnd16,
@@ -2019,6 +2028,77 @@ func genValue(v *ssa.Value) {
 		}
 		opregreg(v.Op.Asm(), r, y)
 
+	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
+		ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
+
+		// Arg[0] is already in AX as it's the only register we allow
+		// and AX is the only output
+		x := regnum(v.Args[1])
+
+		// CPU faults upon signed overflow, which occurs when most
+		// negative int is divided by -1.  So we check for division
+		// by -1 and negate the input.
+		var j *obj.Prog
+		if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
+			v.Op == ssa.OpAMD64DIVW {
+
+			var c *obj.Prog
+			switch v.Op {
+			case ssa.OpAMD64DIVQ:
+				c = Prog(x86.ACMPQ)
+			case ssa.OpAMD64DIVL:
+				c = Prog(x86.ACMPL)
+			case ssa.OpAMD64DIVW:
+				c = Prog(x86.ACMPW)
+			}
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x
+			c.To.Type = obj.TYPE_CONST
+			c.To.Offset = -1
+
+			j = Prog(x86.AJEQ)
+			j.To.Type = obj.TYPE_BRANCH
+
+		}
+
+		// dividend is ax, so we sign extend to
+		// dx:ax for DIV input
+		switch v.Op {
+		case ssa.OpAMD64DIVQU:
+			fallthrough
+		case ssa.OpAMD64DIVLU:
+			fallthrough
+		case ssa.OpAMD64DIVWU:
+			c := Prog(x86.AXORQ)
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x86.REG_DX
+			c.To.Type = obj.TYPE_REG
+			c.To.Reg = x86.REG_DX
+		case ssa.OpAMD64DIVQ:
+			Prog(x86.ACQO)
+		case ssa.OpAMD64DIVL:
+			Prog(x86.ACDQ)
+		case ssa.OpAMD64DIVW:
+			Prog(x86.ACWD)
+		}
+
+		p := Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = x
+
+		// signed division, rest of the check for -1 case
+		if j != nil {
+			j2 := Prog(obj.AJMP)
+			j2.To.Type = obj.TYPE_BRANCH
+
+			n := Prog(x86.ANEGQ)
+			n.To.Type = obj.TYPE_REG
+			n.To.Reg = x86.REG_AX
+
+			j.To.Val = n
+			j2.To.Val = Pc
+		}
+
 	case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB,
 		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
 		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB:
diff --git a/src/cmd/compile/internal/gc/ssa_test.go b/src/cmd/compile/internal/gc/ssa_test.go
index f0060cb12d..d4dfa5d5bf 100644
--- a/src/cmd/compile/internal/gc/ssa_test.go
+++ b/src/cmd/compile/internal/gc/ssa_test.go
@@ -48,3 +48,6 @@ func TestArithmetic(t *testing.T) { runTest(t, "arith_ssa.go") }
 
 // TestFP tests that both backends have the same result for floating point expressions.
 func TestFP(t *testing.T) { runTest(t, "fp_ssa.go") }
+
+// TestArithmeticBoundary tests boundary results for arithmetic operations.
+func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") }
diff --git a/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go b/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go
new file mode 100644
index 0000000000..8f84026a5d
--- /dev/null
+++ b/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go
@@ -0,0 +1,640 @@
+package main
+
+import "fmt"
+
+type utd64 struct {
+	a, b               uint64
+	add, sub, mul, div uint64
+}
+type itd64 struct {
+	a, b               int64
+	add, sub, mul, div int64
+}
+type utd32 struct {
+	a, b               uint32
+	add, sub, mul, div uint32
+}
+type itd32 struct {
+	a, b               int32
+	add, sub, mul, div int32
+}
+type utd16 struct {
+	a, b               uint16
+	add, sub, mul, div uint16
+}
+type itd16 struct {
+	a, b               int16
+	add, sub, mul, div int16
+}
+type utd8 struct {
+	a, b               uint8
+	add, sub, mul, div uint8
+}
+type itd8 struct {
+	a, b               int8
+	add, sub, mul, div int8
+}
+
+func add_uint64_ssa(a, b uint64) uint64 {
+	switch {
+	}
+	return a + b
+}
+func sub_uint64_ssa(a, b uint64) uint64 {
+	switch {
+	}
+	return a - b
+}
+func div_uint64_ssa(a, b uint64) uint64 {
+	switch {
+	}
+	return a / b
+}
+func mul_uint64_ssa(a, b uint64) uint64 {
+	switch {
+	}
+	return a * b
+}
+func add_int64_ssa(a, b int64) int64 {
+	switch {
+	}
+	return a + b
+}
+func sub_int64_ssa(a, b int64) int64 {
+	switch {
+	}
+	return a - b
+}
+func div_int64_ssa(a, b int64) int64 {
+	switch {
+	}
+	return a / b
+}
+func mul_int64_ssa(a, b int64) int64 {
+	switch {
+	}
+	return a * b
+}
+func add_uint32_ssa(a, b uint32) uint32 {
+	switch {
+	}
+	return a + b
+}
+func sub_uint32_ssa(a, b uint32) uint32 {
+	switch {
+	}
+	return a - b
+}
+func div_uint32_ssa(a, b uint32) uint32 {
+	switch {
+	}
+	return a / b
+}
+func mul_uint32_ssa(a, b uint32) uint32 {
+	switch {
+	}
+	return a * b
+}
+func add_int32_ssa(a, b int32) int32 {
+	switch {
+	}
+	return a + b
+}
+func sub_int32_ssa(a, b int32) int32 {
+	switch {
+	}
+	return a - b
+}
+func div_int32_ssa(a, b int32) int32 {
+	switch {
+	}
+	return a / b
+}
+func mul_int32_ssa(a, b int32) int32 {
+	switch {
+	}
+	return a * b
+}
+func add_uint16_ssa(a, b uint16) uint16 {
+	switch {
+	}
+	return a + b
+}
+func sub_uint16_ssa(a, b uint16) uint16 {
+	switch {
+	}
+	return a - b
+}
+func div_uint16_ssa(a, b uint16) uint16 {
+	switch {
+	}
+	return a / b
+}
+func mul_uint16_ssa(a, b uint16) uint16 {
+	switch {
+	}
+	return a * b
+}
+func add_int16_ssa(a, b int16) int16 {
+	switch {
+	}
+	return a + b
+}
+func sub_int16_ssa(a, b int16) int16 {
+	switch {
+	}
+	return a - b
+}
+func div_int16_ssa(a, b int16) int16 {
+	switch {
+	}
+	return a / b
+}
+func mul_int16_ssa(a, b int16) int16 {
+	switch {
+	}
+	return a * b
+}
+func add_uint8_ssa(a, b uint8) uint8 {
+	switch {
+	}
+	return a + b
+}
+func sub_uint8_ssa(a, b uint8) uint8 {
+	switch {
+	}
+	return a - b
+}
+func div_uint8_ssa(a, b uint8) uint8 {
+	switch {
+	}
+	return a / b
+}
+func mul_uint8_ssa(a, b uint8) uint8 {
+	switch {
+	}
+	return a * b
+}
+func add_int8_ssa(a, b int8) int8 {
+	switch {
+	}
+	return a + b
+}
+func sub_int8_ssa(a, b int8) int8 {
+	switch {
+	}
+	return a - b
+}
+func div_int8_ssa(a, b int8) int8 {
+	switch {
+	}
+	return a / b
+}
+func mul_int8_ssa(a, b int8) int8 {
+	switch {
+	}
+	return a * b
+}
+
+var uint64_data []utd64 = []utd64{utd64{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd64{a: 0, b: 1, add: 1, sub: 18446744073709551615, mul: 0, div: 0},
+	utd64{a: 0, b: 4294967296, add: 4294967296, sub: 18446744069414584320, mul: 0, div: 0},
+	utd64{a: 0, b: 18446744073709551615, add: 18446744073709551615, sub: 1, mul: 0, div: 0},
+	utd64{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	utd64{a: 1, b: 4294967296, add: 4294967297, sub: 18446744069414584321, mul: 4294967296, div: 0},
+	utd64{a: 1, b: 18446744073709551615, add: 0, sub: 2, mul: 18446744073709551615, div: 0},
+	utd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0},
+	utd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296},
+	utd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1},
+	utd64{a: 4294967296, b: 18446744073709551615, add: 4294967295, sub: 4294967297, mul: 18446744069414584320, div: 0},
+	utd64{a: 18446744073709551615, b: 0, add: 18446744073709551615, sub: 18446744073709551615, mul: 0},
+	utd64{a: 18446744073709551615, b: 1, add: 0, sub: 18446744073709551614, mul: 18446744073709551615, div: 18446744073709551615},
+	utd64{a: 18446744073709551615, b: 4294967296, add: 4294967295, sub: 18446744069414584319, mul: 18446744069414584320, div: 4294967295},
+	utd64{a: 18446744073709551615, b: 18446744073709551615, add: 18446744073709551614, sub: 0, mul: 1, div: 1},
+}
+var int64_data []itd64 = []itd64{itd64{a: -9223372036854775808, b: -9223372036854775808, add: 0, sub: 0, mul: 0, div: 1},
+	itd64{a: -9223372036854775808, b: -9223372036854775807, add: 1, sub: -1, mul: -9223372036854775808, div: 1},
+	itd64{a: -9223372036854775808, b: -4294967296, add: 9223372032559808512, sub: -9223372032559808512, mul: 0, div: 2147483648},
+	itd64{a: -9223372036854775808, b: -1, add: 9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808},
+	itd64{a: -9223372036854775808, b: 0, add: -9223372036854775808, sub: -9223372036854775808, mul: 0},
+	itd64{a: -9223372036854775808, b: 1, add: -9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808},
+	itd64{a: -9223372036854775808, b: 4294967296, add: -9223372032559808512, sub: 9223372032559808512, mul: 0, div: -2147483648},
+	itd64{a: -9223372036854775808, b: 9223372036854775806, add: -2, sub: 2, mul: 0, div: -1},
+	itd64{a: -9223372036854775808, b: 9223372036854775807, add: -1, sub: 1, mul: -9223372036854775808, div: -1},
+	itd64{a: -9223372036854775807, b: -9223372036854775808, add: 1, sub: 1, mul: -9223372036854775808, div: 0},
+	itd64{a: -9223372036854775807, b: -9223372036854775807, add: 2, sub: 0, mul: 1, div: 1},
+	itd64{a: -9223372036854775807, b: -4294967296, add: 9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 2147483647},
+	itd64{a: -9223372036854775807, b: -1, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807},
+	itd64{a: -9223372036854775807, b: 0, add: -9223372036854775807, sub: -9223372036854775807, mul: 0},
+	itd64{a: -9223372036854775807, b: 1, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807},
+	itd64{a: -9223372036854775807, b: 4294967296, add: -9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: -2147483647},
+	itd64{a: -9223372036854775807, b: 9223372036854775806, add: -1, sub: 3, mul: 9223372036854775806, div: -1},
+	itd64{a: -9223372036854775807, b: 9223372036854775807, add: 0, sub: 2, mul: -1, div: -1},
+	itd64{a: -4294967296, b: -9223372036854775808, add: 9223372032559808512, sub: 9223372032559808512, mul: 0, div: 0},
+	itd64{a: -4294967296, b: -9223372036854775807, add: 9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 0},
+	itd64{a: -4294967296, b: -4294967296, add: -8589934592, sub: 0, mul: 0, div: 1},
+	itd64{a: -4294967296, b: -1, add: -4294967297, sub: -4294967295, mul: 4294967296, div: 4294967296},
+	itd64{a: -4294967296, b: 0, add: -4294967296, sub: -4294967296, mul: 0},
+	itd64{a: -4294967296, b: 1, add: -4294967295, sub: -4294967297, mul: -4294967296, div: -4294967296},
+	itd64{a: -4294967296, b: 4294967296, add: 0, sub: -8589934592, mul: 0, div: -1},
+	itd64{a: -4294967296, b: 9223372036854775806, add: 9223372032559808510, sub: 9223372032559808514, mul: 8589934592, div: 0},
+	itd64{a: -4294967296, b: 9223372036854775807, add: 9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: 0},
+	itd64{a: -1, b: -9223372036854775808, add: 9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: 0},
+	itd64{a: -1, b: -9223372036854775807, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 0},
+	itd64{a: -1, b: -4294967296, add: -4294967297, sub: 4294967295, mul: 4294967296, div: 0},
+	itd64{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1},
+	itd64{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd64{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1},
+	itd64{a: -1, b: 4294967296, add: 4294967295, sub: -4294967297, mul: -4294967296, div: 0},
+	itd64{a: -1, b: 9223372036854775806, add: 9223372036854775805, sub: -9223372036854775807, mul: -9223372036854775806, div: 0},
+	itd64{a: -1, b: 9223372036854775807, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0},
+	itd64{a: 0, b: -9223372036854775808, add: -9223372036854775808, sub: -9223372036854775808, mul: 0, div: 0},
+	itd64{a: 0, b: -9223372036854775807, add: -9223372036854775807, sub: 9223372036854775807, mul: 0, div: 0},
+	itd64{a: 0, b: -4294967296, add: -4294967296, sub: 4294967296, mul: 0, div: 0},
+	itd64{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0},
+	itd64{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd64{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0},
+	itd64{a: 0, b: 4294967296, add: 4294967296, sub: -4294967296, mul: 0, div: 0},
+	itd64{a: 0, b: 9223372036854775806, add: 9223372036854775806, sub: -9223372036854775806, mul: 0, div: 0},
+	itd64{a: 0, b: 9223372036854775807, add: 9223372036854775807, sub: -9223372036854775807, mul: 0, div: 0},
+	itd64{a: 1, b: -9223372036854775808, add: -9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: 0},
+	itd64{a: 1, b: -9223372036854775807, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0},
+	itd64{a: 1, b: -4294967296, add: -4294967295, sub: 4294967297, mul: -4294967296, div: 0},
+	itd64{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1},
+	itd64{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	itd64{a: 1, b: 4294967296, add: 4294967297, sub: -4294967295, mul: 4294967296, div: 0},
+	itd64{a: 1, b: 9223372036854775806, add: 9223372036854775807, sub: -9223372036854775805, mul: 9223372036854775806, div: 0},
+	itd64{a: 1, b: 9223372036854775807, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 0},
+	itd64{a: 4294967296, b: -9223372036854775808, add: -9223372032559808512, sub: -9223372032559808512, mul: 0, div: 0},
+	itd64{a: 4294967296, b: -9223372036854775807, add: -9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: 0},
+	itd64{a: 4294967296, b: -4294967296, add: 0, sub: 8589934592, mul: 0, div: -1},
+	itd64{a: 4294967296, b: -1, add: 4294967295, sub: 4294967297, mul: -4294967296, div: -4294967296},
+	itd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0},
+	itd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296},
+	itd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1},
+	itd64{a: 4294967296, b: 9223372036854775806, add: -9223372032559808514, sub: -9223372032559808510, mul: -8589934592, div: 0},
+	itd64{a: 4294967296, b: 9223372036854775807, add: -9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 0},
+	itd64{a: 9223372036854775806, b: -9223372036854775808, add: -2, sub: -2, mul: 0, div: 0},
+	itd64{a: 9223372036854775806, b: -9223372036854775807, add: -1, sub: -3, mul: 9223372036854775806, div: 0},
+	itd64{a: 9223372036854775806, b: -4294967296, add: 9223372032559808510, sub: -9223372032559808514, mul: 8589934592, div: -2147483647},
+	itd64{a: 9223372036854775806, b: -1, add: 9223372036854775805, sub: 9223372036854775807, mul: -9223372036854775806, div: -9223372036854775806},
+	itd64{a: 9223372036854775806, b: 0, add: 9223372036854775806, sub: 9223372036854775806, mul: 0},
+	itd64{a: 9223372036854775806, b: 1, add: 9223372036854775807, sub: 9223372036854775805, mul: 9223372036854775806, div: 9223372036854775806},
+	itd64{a: 9223372036854775806, b: 4294967296, add: -9223372032559808514, sub: 9223372032559808510, mul: -8589934592, div: 2147483647},
+	itd64{a: 9223372036854775806, b: 9223372036854775806, add: -4, sub: 0, mul: 4, div: 1},
+	itd64{a: 9223372036854775806, b: 9223372036854775807, add: -3, sub: -1, mul: -9223372036854775806, div: 0},
+	itd64{a: 9223372036854775807, b: -9223372036854775808, add: -1, sub: -1, mul: -9223372036854775808, div: 0},
+	itd64{a: 9223372036854775807, b: -9223372036854775807, add: 0, sub: -2, mul: -1, div: -1},
+	itd64{a: 9223372036854775807, b: -4294967296, add: 9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: -2147483647},
+	itd64{a: 9223372036854775807, b: -1, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807},
+	itd64{a: 9223372036854775807, b: 0, add: 9223372036854775807, sub: 9223372036854775807, mul: 0},
+	itd64{a: 9223372036854775807, b: 1, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807},
+	itd64{a: 9223372036854775807, b: 4294967296, add: -9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 2147483647},
+	itd64{a: 9223372036854775807, b: 9223372036854775806, add: -3, sub: 1, mul: -9223372036854775806, div: 1},
+	itd64{a: 9223372036854775807, b: 9223372036854775807, add: -2, sub: 0, mul: 1, div: 1},
+}
+var uint32_data []utd32 = []utd32{utd32{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd32{a: 0, b: 1, add: 1, sub: 4294967295, mul: 0, div: 0},
+	utd32{a: 0, b: 4294967295, add: 4294967295, sub: 1, mul: 0, div: 0},
+	utd32{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	utd32{a: 1, b: 4294967295, add: 0, sub: 2, mul: 4294967295, div: 0},
+	utd32{a: 4294967295, b: 0, add: 4294967295, sub: 4294967295, mul: 0},
+	utd32{a: 4294967295, b: 1, add: 0, sub: 4294967294, mul: 4294967295, div: 4294967295},
+	utd32{a: 4294967295, b: 4294967295, add: 4294967294, sub: 0, mul: 1, div: 1},
+}
+var int32_data []itd32 = []itd32{itd32{a: -2147483648, b: -2147483648, add: 0, sub: 0, mul: 0, div: 1},
+	itd32{a: -2147483648, b: -2147483647, add: 1, sub: -1, mul: -2147483648, div: 1},
+	itd32{a: -2147483648, b: -1, add: 2147483647, sub: -2147483647, mul: -2147483648, div: -2147483648},
+	itd32{a: -2147483648, b: 0, add: -2147483648, sub: -2147483648, mul: 0},
+	itd32{a: -2147483648, b: 1, add: -2147483647, sub: 2147483647, mul: -2147483648, div: -2147483648},
+	itd32{a: -2147483648, b: 2147483647, add: -1, sub: 1, mul: -2147483648, div: -1},
+	itd32{a: -2147483647, b: -2147483648, add: 1, sub: 1, mul: -2147483648, div: 0},
+	itd32{a: -2147483647, b: -2147483647, add: 2, sub: 0, mul: 1, div: 1},
+	itd32{a: -2147483647, b: -1, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 2147483647},
+	itd32{a: -2147483647, b: 0, add: -2147483647, sub: -2147483647, mul: 0},
+	itd32{a: -2147483647, b: 1, add: -2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647},
+	itd32{a: -2147483647, b: 2147483647, add: 0, sub: 2, mul: -1, div: -1},
+	itd32{a: -1, b: -2147483648, add: 2147483647, sub: 2147483647, mul: -2147483648, div: 0},
+	itd32{a: -1, b: -2147483647, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 0},
+	itd32{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1},
+	itd32{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd32{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1},
+	itd32{a: -1, b: 2147483647, add: 2147483646, sub: -2147483648, mul: -2147483647, div: 0},
+	itd32{a: 0, b: -2147483648, add: -2147483648, sub: -2147483648, mul: 0, div: 0},
+	itd32{a: 0, b: -2147483647, add: -2147483647, sub: 2147483647, mul: 0, div: 0},
+	itd32{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0},
+	itd32{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd32{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0},
+	itd32{a: 0, b: 2147483647, add: 2147483647, sub: -2147483647, mul: 0, div: 0},
+	itd32{a: 1, b: -2147483648, add: -2147483647, sub: -2147483647, mul: -2147483648, div: 0},
+	itd32{a: 1, b: -2147483647, add: -2147483646, sub: -2147483648, mul: -2147483647, div: 0},
+	itd32{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1},
+	itd32{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	itd32{a: 1, b: 2147483647, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 0},
+	itd32{a: 2147483647, b: -2147483648, add: -1, sub: -1, mul: -2147483648, div: 0},
+	itd32{a: 2147483647, b: -2147483647, add: 0, sub: -2, mul: -1, div: -1},
+	itd32{a: 2147483647, b: -1, add: 2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647},
+	itd32{a: 2147483647, b: 0, add: 2147483647, sub: 2147483647, mul: 0},
+	itd32{a: 2147483647, b: 1, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 2147483647},
+	itd32{a: 2147483647, b: 2147483647, add: -2, sub: 0, mul: 1, div: 1},
+}
+var uint16_data []utd16 = []utd16{utd16{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd16{a: 0, b: 1, add: 1, sub: 65535, mul: 0, div: 0},
+	utd16{a: 0, b: 65535, add: 65535, sub: 1, mul: 0, div: 0},
+	utd16{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	utd16{a: 1, b: 65535, add: 0, sub: 2, mul: 65535, div: 0},
+	utd16{a: 65535, b: 0, add: 65535, sub: 65535, mul: 0},
+	utd16{a: 65535, b: 1, add: 0, sub: 65534, mul: 65535, div: 65535},
+	utd16{a: 65535, b: 65535, add: 65534, sub: 0, mul: 1, div: 1},
+}
+var int16_data []itd16 = []itd16{itd16{a: -32768, b: -32768, add: 0, sub: 0, mul: 0, div: 1},
+	itd16{a: -32768, b: -32767, add: 1, sub: -1, mul: -32768, div: 1},
+	itd16{a: -32768, b: -1, add: 32767, sub: -32767, mul: -32768, div: -32768},
+	itd16{a: -32768, b: 0, add: -32768, sub: -32768, mul: 0},
+	itd16{a: -32768, b: 1, add: -32767, sub: 32767, mul: -32768, div: -32768},
+	itd16{a: -32768, b: 32766, add: -2, sub: 2, mul: 0, div: -1},
+	itd16{a: -32768, b: 32767, add: -1, sub: 1, mul: -32768, div: -1},
+	itd16{a: -32767, b: -32768, add: 1, sub: 1, mul: -32768, div: 0},
+	itd16{a: -32767, b: -32767, add: 2, sub: 0, mul: 1, div: 1},
+	itd16{a: -32767, b: -1, add: -32768, sub: -32766, mul: 32767, div: 32767},
+	itd16{a: -32767, b: 0, add: -32767, sub: -32767, mul: 0},
+	itd16{a: -32767, b: 1, add: -32766, sub: -32768, mul: -32767, div: -32767},
+	itd16{a: -32767, b: 32766, add: -1, sub: 3, mul: 32766, div: -1},
+	itd16{a: -32767, b: 32767, add: 0, sub: 2, mul: -1, div: -1},
+	itd16{a: -1, b: -32768, add: 32767, sub: 32767, mul: -32768, div: 0},
+	itd16{a: -1, b: -32767, add: -32768, sub: 32766, mul: 32767, div: 0},
+	itd16{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1},
+	itd16{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd16{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1},
+	itd16{a: -1, b: 32766, add: 32765, sub: -32767, mul: -32766, div: 0},
+	itd16{a: -1, b: 32767, add: 32766, sub: -32768, mul: -32767, div: 0},
+	itd16{a: 0, b: -32768, add: -32768, sub: -32768, mul: 0, div: 0},
+	itd16{a: 0, b: -32767, add: -32767, sub: 32767, mul: 0, div: 0},
+	itd16{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0},
+	itd16{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd16{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0},
+	itd16{a: 0, b: 32766, add: 32766, sub: -32766, mul: 0, div: 0},
+	itd16{a: 0, b: 32767, add: 32767, sub: -32767, mul: 0, div: 0},
+	itd16{a: 1, b: -32768, add: -32767, sub: -32767, mul: -32768, div: 0},
+	itd16{a: 1, b: -32767, add: -32766, sub: -32768, mul: -32767, div: 0},
+	itd16{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1},
+	itd16{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	itd16{a: 1, b: 32766, add: 32767, sub: -32765, mul: 32766, div: 0},
+	itd16{a: 1, b: 32767, add: -32768, sub: -32766, mul: 32767, div: 0},
+	itd16{a: 32766, b: -32768, add: -2, sub: -2, mul: 0, div: 0},
+	itd16{a: 32766, b: -32767, add: -1, sub: -3, mul: 32766, div: 0},
+	itd16{a: 32766, b: -1, add: 32765, sub: 32767, mul: -32766, div: -32766},
+	itd16{a: 32766, b: 0, add: 32766, sub: 32766, mul: 0},
+	itd16{a: 32766, b: 1, add: 32767, sub: 32765, mul: 32766, div: 32766},
+	itd16{a: 32766, b: 32766, add: -4, sub: 0, mul: 4, div: 1},
+	itd16{a: 32766, b: 32767, add: -3, sub: -1, mul: -32766, div: 0},
+	itd16{a: 32767, b: -32768, add: -1, sub: -1, mul: -32768, div: 0},
+	itd16{a: 32767, b: -32767, add: 0, sub: -2, mul: -1, div: -1},
+	itd16{a: 32767, b: -1, add: 32766, sub: -32768, mul: -32767, div: -32767},
+	itd16{a: 32767, b: 0, add: 32767, sub: 32767, mul: 0},
+	itd16{a: 32767, b: 1, add: -32768, sub: 32766, mul: 32767, div: 32767},
+	itd16{a: 32767, b: 32766, add: -3, sub: 1, mul: -32766, div: 1},
+	itd16{a: 32767, b: 32767, add: -2, sub: 0, mul: 1, div: 1},
+}
+var uint8_data []utd8 = []utd8{utd8{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd8{a: 0, b: 1, add: 1, sub: 255, mul: 0, div: 0},
+	utd8{a: 0, b: 255, add: 255, sub: 1, mul: 0, div: 0},
+	utd8{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	utd8{a: 1, b: 255, add: 0, sub: 2, mul: 255, div: 0},
+	utd8{a: 255, b: 0, add: 255, sub: 255, mul: 0},
+	utd8{a: 255, b: 1, add: 0, sub: 254, mul: 255, div: 255},
+	utd8{a: 255, b: 255, add: 254, sub: 0, mul: 1, div: 1},
+}
+var int8_data []itd8 = []itd8{itd8{a: -128, b: -128, add: 0, sub: 0, mul: 0, div: 1},
+	itd8{a: -128, b: -127, add: 1, sub: -1, mul: -128, div: 1},
+	itd8{a: -128, b: -1, add: 127, sub: -127, mul: -128, div: -128},
+	itd8{a: -128, b: 0, add: -128, sub: -128, mul: 0},
+	itd8{a: -128, b: 1, add: -127, sub: 127, mul: -128, div: -128},
+	itd8{a: -128, b: 126, add: -2, sub: 2, mul: 0, div: -1},
+	itd8{a: -128, b: 127, add: -1, sub: 1, mul: -128, div: -1},
+	itd8{a: -127, b: -128, add: 1, sub: 1, mul: -128, div: 0},
+	itd8{a: -127, b: -127, add: 2, sub: 0, mul: 1, div: 1},
+	itd8{a: -127, b: -1, add: -128, sub: -126, mul: 127, div: 127},
+	itd8{a: -127, b: 0, add: -127, sub: -127, mul: 0},
+	itd8{a: -127, b: 1, add: -126, sub: -128, mul: -127, div: -127},
+	itd8{a: -127, b: 126, add: -1, sub: 3, mul: 126, div: -1},
+	itd8{a: -127, b: 127, add: 0, sub: 2, mul: -1, div: -1},
+	itd8{a: -1, b: -128, add: 127, sub: 127, mul: -128, div: 0},
+	itd8{a: -1, b: -127, add: -128, sub: 126, mul: 127, div: 0},
+	itd8{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1},
+	itd8{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd8{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1},
+	itd8{a: -1, b: 126, add: 125, sub: -127, mul: -126, div: 0},
+	itd8{a: -1, b: 127, add: 126, sub: -128, mul: -127, div: 0},
+	itd8{a: 0, b: -128, add: -128, sub: -128, mul: 0, div: 0},
+	itd8{a: 0, b: -127, add: -127, sub: 127, mul: 0, div: 0},
+	itd8{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0},
+	itd8{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd8{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0},
+	itd8{a: 0, b: 126, add: 126, sub: -126, mul: 0, div: 0},
+	itd8{a: 0, b: 127, add: 127, sub: -127, mul: 0, div: 0},
+	itd8{a: 1, b: -128, add: -127, sub: -127, mul: -128, div: 0},
+	itd8{a: 1, b: -127, add: -126, sub: -128, mul: -127, div: 0},
+	itd8{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1},
+	itd8{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1},
+	itd8{a: 1, b: 126, add: 127, sub: -125, mul: 126, div: 0},
+	itd8{a: 1, b: 127, add: -128, sub: -126, mul: 127, div: 0},
+	itd8{a: 126, b: -128, add: -2, sub: -2, mul: 0, div: 0},
+	itd8{a: 126, b: -127, add: -1, sub: -3, mul: 126, div: 0},
+	itd8{a: 126, b: -1, add: 125, sub: 127, mul: -126, div: -126},
+	itd8{a: 126, b: 0, add: 126, sub: 126, mul: 0},
+	itd8{a: 126, b: 1, add: 127, sub: 125, mul: 126, div: 126},
+	itd8{a: 126, b: 126, add: -4, sub: 0, mul: 4, div: 1},
+	itd8{a: 126, b: 127, add: -3, sub: -1, mul: -126, div: 0},
+	itd8{a: 127, b: -128, add: -1, sub: -1, mul: -128, div: 0},
+	itd8{a: 127, b: -127, add: 0, sub: -2, mul: -1, div: -1},
+	itd8{a: 127, b: -1, add: 126, sub: -128, mul: -127, div: -127},
+	itd8{a: 127, b: 0, add: 127, sub: 127, mul: 0},
+	itd8{a: 127, b: 1, add: -128, sub: 126, mul: 127, div: 127},
+	itd8{a: 127, b: 126, add: -3, sub: 1, mul: -126, div: 1},
+	itd8{a: 127, b: 127, add: -2, sub: 0, mul: 1, div: 1},
+}
+var failed bool
+
+func main() {
+
+	for _, v := range uint64_data {
+		if got := add_uint64_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint64_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint64_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_uint64_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int64_data {
+		if got := add_int64_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int64_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int64_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_int64_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint32_data {
+		if got := add_uint32_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint32_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint32_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_uint32_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int32_data {
+		if got := add_int32_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int32_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int32_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_int32_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint16_data {
+		if got := add_uint16_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint16_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint16_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_uint16_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int16_data {
+		if got := add_int16_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int16_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int16_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_int16_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint8_data {
+		if got := add_uint8_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint8_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint8_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_uint8_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int8_data {
+		if got := add_int8_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int8_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int8_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if got := mul_int8_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	if failed {
+		panic("tests failed")
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO
index d049bea872..1773dbbc98 100644
--- a/src/cmd/compile/internal/ssa/TODO
+++ b/src/cmd/compile/internal/ssa/TODO
@@ -5,7 +5,7 @@ Coverage
 --------
 - Floating point numbers
 - Complex numbers
-- Integer division
+- Integer division (HMUL & MOD)
 - Fat objects (strings/slices/interfaces) vs. Phi
 - Defer?
 - Closure args
@@ -49,6 +49,7 @@ Optimizations (better compiler)
 - OpStore uses 3 args.  Increase the size of Value.argstorage to 3?
 - Constant cache
 - Reuseable slices (e.g. []int of size NumValues()) cached in Func
+- Handle signed division overflow and sign extension earlier
 
 Regalloc
 --------
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 9ea9781d93..0cde6f26d4 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -36,6 +36,15 @@
 (Div32F x y) -> (DIVSS x y)
 (Div64F x y) -> (DIVSD x y)
 
+(Div64 x y) -> (DIVQ x y)
+(Div64u x y) -> (DIVQU x y)
+(Div32 x y) -> (DIVL x y)
+(Div32u x y) -> (DIVLU x y)
+(Div16 x y) -> (DIVW x y)
+(Div16u x y) -> (DIVWU x y)
+(Div8 x y) ->  (DIVW (SignExt8to16 <config.Frontend().TypeInt16()> x) (SignExt8to16 <config.Frontend().TypeInt16()> y))
+(Div8u x y) ->  (DIVWU (ZeroExt8to16 <config.Frontend().TypeUInt16()> x) (ZeroExt8to16 <config.Frontend().TypeUInt16()> y))
+
 (And64 x y) -> (ANDQ x y)
 (And32 x y) -> (ANDL x y)
 (And16 x y) -> (ANDW x y)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 5aa5e60e33..220e5b01cd 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -72,7 +72,9 @@ func init() {
 
 	// Common individual register masks
 	var (
+		ax         = buildReg("AX")
 		cx         = buildReg("CX")
+		dx         = buildReg("DX")
 		x15        = buildReg("X15")
 		gp         = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15")
 		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15")
@@ -97,6 +99,8 @@ func init() {
 		gp21      = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: gponly, clobbers: flags}
 		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly, clobbers: flags}
 		gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags}
+		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
+			clobbers: dx | flags}
 
 		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
 		gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
@@ -180,6 +184,14 @@ func init() {
 		{name: "MULWconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
 		{name: "MULBconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
 
+		{name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
+		{name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
+		{name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
+
+		{name: "DIVQU", reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
+		{name: "DIVLU", reg: gp11div, asm: "DIVL"}, // arg0 / arg1
+		{name: "DIVWU", reg: gp11div, asm: "DIVW"}, // arg0 / arg1
+
 		{name: "ANDQ", reg: gp21, asm: "ANDQ"},      // arg0 & arg1
 		{name: "ANDL", reg: gp21, asm: "ANDL"},      // arg0 & arg1
 		{name: "ANDW", reg: gp21, asm: "ANDW"},      // arg0 & arg1
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 1488e0f644..a0d8f8e000 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -37,6 +37,15 @@ var genericOps = []opData{
 	{name: "Div64F"},
 	// TODO: Div8, Div16, Div32, Div64 and unsigned
 
+	{name: "Div8"}, // arg0 / arg1
+	{name: "Div8u"},
+	{name: "Div16"},
+	{name: "Div16u"},
+	{name: "Div32"},
+	{name: "Div32u"},
+	{name: "Div64"},
+	{name: "Div64u"},
+
 	{name: "And8"}, // arg0 & arg1
 	{name: "And16"},
 	{name: "And32"},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index cbabbfade5..44fd6e3737 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -93,6 +93,12 @@ const (
 	OpAMD64MULLconst
 	OpAMD64MULWconst
 	OpAMD64MULBconst
+	OpAMD64DIVQ
+	OpAMD64DIVL
+	OpAMD64DIVW
+	OpAMD64DIVQU
+	OpAMD64DIVLU
+	OpAMD64DIVWU
 	OpAMD64ANDQ
 	OpAMD64ANDL
 	OpAMD64ANDW
@@ -239,6 +245,14 @@ const (
 	OpMul64F
 	OpDiv32F
 	OpDiv64F
+	OpDiv8
+	OpDiv8u
+	OpDiv16
+	OpDiv16u
+	OpDiv32
+	OpDiv32u
+	OpDiv64
+	OpDiv64u
 	OpAnd8
 	OpAnd16
 	OpAnd32
@@ -963,6 +977,90 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name: "DIVQ",
+		asm:  x86.AIDIVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
+	{
+		name: "DIVL",
+		asm:  x86.AIDIVL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
+	{
+		name: "DIVW",
+		asm:  x86.AIDIVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
+	{
+		name: "DIVQU",
+		asm:  x86.ADIVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
+	{
+		name: "DIVLU",
+		asm:  x86.ADIVL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
+	{
+		name: "DIVWU",
+		asm:  x86.ADIVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1},     // .AX
+				{1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+			clobbers: 8589934596, // .DX .FLAGS
+			outputs: []regMask{
+				1, // .AX
+			},
+		},
+	},
 	{
 		name: "ANDQ",
 		asm:  x86.AANDQ,
@@ -2592,6 +2690,38 @@ var opcodeTable = [...]opInfo{
 		name:    "Div64F",
 		generic: true,
 	},
+	{
+		name:    "Div8",
+		generic: true,
+	},
+	{
+		name:    "Div8u",
+		generic: true,
+	},
+	{
+		name:    "Div16",
+		generic: true,
+	},
+	{
+		name:    "Div16u",
+		generic: true,
+	},
+	{
+		name:    "Div32",
+		generic: true,
+	},
+	{
+		name:    "Div32u",
+		generic: true,
+	},
+	{
+		name:    "Div64",
+		generic: true,
+	},
+	{
+		name:    "Div64u",
+		generic: true,
+	},
 	{
 		name:    "And8",
 		generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 75393ad58a..993838b537 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -1688,6 +1688,60 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endc395c0a53eeccf597e225a07b53047d1
 	endc395c0a53eeccf597e225a07b53047d1:
 		;
+	case OpDiv16:
+		// match: (Div16 x y)
+		// cond:
+		// result: (DIVW x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVW
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endb60a86e606726640c84d3e1e5a5ce890
+	endb60a86e606726640c84d3e1e5a5ce890:
+		;
+	case OpDiv16u:
+		// match: (Div16u x y)
+		// cond:
+		// result: (DIVWU x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVWU
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto end6af9e212a865593e506bfdf7db67c9ec
+	end6af9e212a865593e506bfdf7db67c9ec:
+		;
+	case OpDiv32:
+		// match: (Div32 x y)
+		// cond:
+		// result: (DIVL x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVL
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endf20ac71407e57c2904684d3cc33cf697
+	endf20ac71407e57c2904684d3cc33cf697:
+		;
 	case OpDiv32F:
 		// match: (Div32F x y)
 		// cond:
@@ -1706,6 +1760,42 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto enddca0462c7b176c4138854d7d5627ab5b
 	enddca0462c7b176c4138854d7d5627ab5b:
 		;
+	case OpDiv32u:
+		// match: (Div32u x y)
+		// cond:
+		// result: (DIVLU x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVLU
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto enda22604d23eeb1298008c97b817f60bbd
+	enda22604d23eeb1298008c97b817f60bbd:
+		;
+	case OpDiv64:
+		// match: (Div64 x y)
+		// cond:
+		// result: (DIVQ x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVQ
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto end86490d9b337333dfc09a413e1e0120a9
+	end86490d9b337333dfc09a413e1e0120a9:
+		;
 	case OpDiv64F:
 		// match: (Div64F x y)
 		// cond:
@@ -1724,6 +1814,72 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end12299d76db5144a60f564d34ba97eb43
 	end12299d76db5144a60f564d34ba97eb43:
 		;
+	case OpDiv64u:
+		// match: (Div64u x y)
+		// cond:
+		// result: (DIVQU x y)
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVQU
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endf871d8b397e5fad6a5b500cc0c759a8d
+	endf871d8b397e5fad6a5b500cc0c759a8d:
+		;
+	case OpDiv8:
+		// match: (Div8 x y)
+		// cond:
+		// result: (DIVW (SignExt8to16 <config.Frontend().TypeInt16()> x) (SignExt8to16 <config.Frontend().TypeInt16()> y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVW
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpSignExt8to16, TypeInvalid)
+			v0.Type = config.Frontend().TypeInt16()
+			v0.AddArg(x)
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpSignExt8to16, TypeInvalid)
+			v1.Type = config.Frontend().TypeInt16()
+			v1.AddArg(y)
+			v.AddArg(v1)
+			return true
+		}
+		goto ende25a7899b9c7a869f74226b4b6033084
+	ende25a7899b9c7a869f74226b4b6033084:
+		;
+	case OpDiv8u:
+		// match: (Div8u x y)
+		// cond:
+		// result: (DIVWU (ZeroExt8to16 <config.Frontend().TypeUInt16()> x) (ZeroExt8to16 <config.Frontend().TypeUInt16()> y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64DIVWU
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpZeroExt8to16, TypeInvalid)
+			v0.Type = config.Frontend().TypeUInt16()
+			v0.AddArg(x)
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpZeroExt8to16, TypeInvalid)
+			v1.Type = config.Frontend().TypeUInt16()
+			v1.AddArg(y)
+			v.AddArg(v1)
+			return true
+		}
+		goto ende655b41d48feafc4d139b815a3b7b55c
+	ende655b41d48feafc4d139b815a3b7b55c:
+		;
 	case OpEq16:
 		// match: (Eq16 x y)
 		// cond: