From a45f2d8f2898d23804de473841d42670fcdda5dc Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 17 Aug 2015 17:46:06 -0500 Subject: [PATCH] [dev.ssa] cmd/compile/internal/ssa: implement ODIV Implement integer division for non-consts. Change-Id: If40cbde20e5f0ebb9993064def7be468e4eca076 Reviewed-on: https://go-review.googlesource.com/13644 Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/ssa.go | 80 +++ src/cmd/compile/internal/gc/ssa_test.go | 3 + .../internal/gc/testdata/arithBoundary_ssa.go | 640 ++++++++++++++++++ src/cmd/compile/internal/ssa/TODO | 3 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 9 + src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 12 + .../compile/internal/ssa/gen/genericOps.go | 9 + src/cmd/compile/internal/ssa/opGen.go | 130 ++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 156 +++++ 9 files changed, 1041 insertions(+), 1 deletion(-) create mode 100644 src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index ef90ed40e7..90b29b9b09 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -779,6 +779,15 @@ var opToSSA = map[opAndType]ssa.Op{ opAndType{ODIV, TFLOAT32}: ssa.OpDiv32F, opAndType{ODIV, TFLOAT64}: ssa.OpDiv64F, + opAndType{ODIV, TINT8}: ssa.OpDiv8, + opAndType{ODIV, TUINT8}: ssa.OpDiv8u, + opAndType{ODIV, TINT16}: ssa.OpDiv16, + opAndType{ODIV, TUINT16}: ssa.OpDiv16u, + opAndType{ODIV, TINT32}: ssa.OpDiv32, + opAndType{ODIV, TUINT32}: ssa.OpDiv32u, + opAndType{ODIV, TINT64}: ssa.OpDiv64, + opAndType{ODIV, TUINT64}: ssa.OpDiv64u, + opAndType{OAND, TINT8}: ssa.OpAnd8, opAndType{OAND, TUINT8}: ssa.OpAnd8, opAndType{OAND, TINT16}: ssa.OpAnd16, @@ -2019,6 +2028,77 @@ func genValue(v *ssa.Value) { } opregreg(v.Op.Asm(), r, y) + case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW, + ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: + + // Arg[0] is already in AX as it's the only register we allow + // and AX is the only output + x := regnum(v.Args[1]) + + // CPU faults upon signed overflow, which occurs when most + // negative int is divided by -1. So we check for division + // by -1 and negate the input. + var j *obj.Prog + if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || + v.Op == ssa.OpAMD64DIVW { + + var c *obj.Prog + switch v.Op { + case ssa.OpAMD64DIVQ: + c = Prog(x86.ACMPQ) + case ssa.OpAMD64DIVL: + c = Prog(x86.ACMPL) + case ssa.OpAMD64DIVW: + c = Prog(x86.ACMPW) + } + c.From.Type = obj.TYPE_REG + c.From.Reg = x + c.To.Type = obj.TYPE_CONST + c.To.Offset = -1 + + j = Prog(x86.AJEQ) + j.To.Type = obj.TYPE_BRANCH + + } + + // dividend is ax, so we sign extend to + // dx:ax for DIV input + switch v.Op { + case ssa.OpAMD64DIVQU: + fallthrough + case ssa.OpAMD64DIVLU: + fallthrough + case ssa.OpAMD64DIVWU: + c := Prog(x86.AXORQ) + c.From.Type = obj.TYPE_REG + c.From.Reg = x86.REG_DX + c.To.Type = obj.TYPE_REG + c.To.Reg = x86.REG_DX + case ssa.OpAMD64DIVQ: + Prog(x86.ACQO) + case ssa.OpAMD64DIVL: + Prog(x86.ACDQ) + case ssa.OpAMD64DIVW: + Prog(x86.ACWD) + } + + p := Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = x + + // signed division, rest of the check for -1 case + if j != nil { + j2 := Prog(obj.AJMP) + j2.To.Type = obj.TYPE_BRANCH + + n := Prog(x86.ANEGQ) + n.To.Type = obj.TYPE_REG + n.To.Reg = x86.REG_AX + + j.To.Val = n + j2.To.Val = Pc + } + case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB: diff --git a/src/cmd/compile/internal/gc/ssa_test.go b/src/cmd/compile/internal/gc/ssa_test.go index f0060cb12d..d4dfa5d5bf 100644 --- a/src/cmd/compile/internal/gc/ssa_test.go +++ b/src/cmd/compile/internal/gc/ssa_test.go @@ -48,3 +48,6 @@ func TestArithmetic(t *testing.T) { runTest(t, "arith_ssa.go") } // TestFP tests that both backends have the same result for floating point expressions. func TestFP(t *testing.T) { runTest(t, "fp_ssa.go") } + +// TestArithmeticBoundary tests boundary results for arithmetic operations. +func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") } diff --git a/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go b/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go new file mode 100644 index 0000000000..8f84026a5d --- /dev/null +++ b/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go @@ -0,0 +1,640 @@ +package main + +import "fmt" + +type utd64 struct { + a, b uint64 + add, sub, mul, div uint64 +} +type itd64 struct { + a, b int64 + add, sub, mul, div int64 +} +type utd32 struct { + a, b uint32 + add, sub, mul, div uint32 +} +type itd32 struct { + a, b int32 + add, sub, mul, div int32 +} +type utd16 struct { + a, b uint16 + add, sub, mul, div uint16 +} +type itd16 struct { + a, b int16 + add, sub, mul, div int16 +} +type utd8 struct { + a, b uint8 + add, sub, mul, div uint8 +} +type itd8 struct { + a, b int8 + add, sub, mul, div int8 +} + +func add_uint64_ssa(a, b uint64) uint64 { + switch { + } + return a + b +} +func sub_uint64_ssa(a, b uint64) uint64 { + switch { + } + return a - b +} +func div_uint64_ssa(a, b uint64) uint64 { + switch { + } + return a / b +} +func mul_uint64_ssa(a, b uint64) uint64 { + switch { + } + return a * b +} +func add_int64_ssa(a, b int64) int64 { + switch { + } + return a + b +} +func sub_int64_ssa(a, b int64) int64 { + switch { + } + return a - b +} +func div_int64_ssa(a, b int64) int64 { + switch { + } + return a / b +} +func mul_int64_ssa(a, b int64) int64 { + switch { + } + return a * b +} +func add_uint32_ssa(a, b uint32) uint32 { + switch { + } + return a + b +} +func sub_uint32_ssa(a, b uint32) uint32 { + switch { + } + return a - b +} +func div_uint32_ssa(a, b uint32) uint32 { + switch { + } + return a / b +} +func mul_uint32_ssa(a, b uint32) uint32 { + switch { + } + return a * b +} +func add_int32_ssa(a, b int32) int32 { + switch { + } + return a + b +} +func sub_int32_ssa(a, b int32) int32 { + switch { + } + return a - b +} +func div_int32_ssa(a, b int32) int32 { + switch { + } + return a / b +} +func mul_int32_ssa(a, b int32) int32 { + switch { + } + return a * b +} +func add_uint16_ssa(a, b uint16) uint16 { + switch { + } + return a + b +} +func sub_uint16_ssa(a, b uint16) uint16 { + switch { + } + return a - b +} +func div_uint16_ssa(a, b uint16) uint16 { + switch { + } + return a / b +} +func mul_uint16_ssa(a, b uint16) uint16 { + switch { + } + return a * b +} +func add_int16_ssa(a, b int16) int16 { + switch { + } + return a + b +} +func sub_int16_ssa(a, b int16) int16 { + switch { + } + return a - b +} +func div_int16_ssa(a, b int16) int16 { + switch { + } + return a / b +} +func mul_int16_ssa(a, b int16) int16 { + switch { + } + return a * b +} +func add_uint8_ssa(a, b uint8) uint8 { + switch { + } + return a + b +} +func sub_uint8_ssa(a, b uint8) uint8 { + switch { + } + return a - b +} +func div_uint8_ssa(a, b uint8) uint8 { + switch { + } + return a / b +} +func mul_uint8_ssa(a, b uint8) uint8 { + switch { + } + return a * b +} +func add_int8_ssa(a, b int8) int8 { + switch { + } + return a + b +} +func sub_int8_ssa(a, b int8) int8 { + switch { + } + return a - b +} +func div_int8_ssa(a, b int8) int8 { + switch { + } + return a / b +} +func mul_int8_ssa(a, b int8) int8 { + switch { + } + return a * b +} + +var uint64_data []utd64 = []utd64{utd64{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + utd64{a: 0, b: 1, add: 1, sub: 18446744073709551615, mul: 0, div: 0}, + utd64{a: 0, b: 4294967296, add: 4294967296, sub: 18446744069414584320, mul: 0, div: 0}, + utd64{a: 0, b: 18446744073709551615, add: 18446744073709551615, sub: 1, mul: 0, div: 0}, + utd64{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + utd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + utd64{a: 1, b: 4294967296, add: 4294967297, sub: 18446744069414584321, mul: 4294967296, div: 0}, + utd64{a: 1, b: 18446744073709551615, add: 0, sub: 2, mul: 18446744073709551615, div: 0}, + utd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0}, + utd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296}, + utd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1}, + utd64{a: 4294967296, b: 18446744073709551615, add: 4294967295, sub: 4294967297, mul: 18446744069414584320, div: 0}, + utd64{a: 18446744073709551615, b: 0, add: 18446744073709551615, sub: 18446744073709551615, mul: 0}, + utd64{a: 18446744073709551615, b: 1, add: 0, sub: 18446744073709551614, mul: 18446744073709551615, div: 18446744073709551615}, + utd64{a: 18446744073709551615, b: 4294967296, add: 4294967295, sub: 18446744069414584319, mul: 18446744069414584320, div: 4294967295}, + utd64{a: 18446744073709551615, b: 18446744073709551615, add: 18446744073709551614, sub: 0, mul: 1, div: 1}, +} +var int64_data []itd64 = []itd64{itd64{a: -9223372036854775808, b: -9223372036854775808, add: 0, sub: 0, mul: 0, div: 1}, + itd64{a: -9223372036854775808, b: -9223372036854775807, add: 1, sub: -1, mul: -9223372036854775808, div: 1}, + itd64{a: -9223372036854775808, b: -4294967296, add: 9223372032559808512, sub: -9223372032559808512, mul: 0, div: 2147483648}, + itd64{a: -9223372036854775808, b: -1, add: 9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808}, + itd64{a: -9223372036854775808, b: 0, add: -9223372036854775808, sub: -9223372036854775808, mul: 0}, + itd64{a: -9223372036854775808, b: 1, add: -9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808}, + itd64{a: -9223372036854775808, b: 4294967296, add: -9223372032559808512, sub: 9223372032559808512, mul: 0, div: -2147483648}, + itd64{a: -9223372036854775808, b: 9223372036854775806, add: -2, sub: 2, mul: 0, div: -1}, + itd64{a: -9223372036854775808, b: 9223372036854775807, add: -1, sub: 1, mul: -9223372036854775808, div: -1}, + itd64{a: -9223372036854775807, b: -9223372036854775808, add: 1, sub: 1, mul: -9223372036854775808, div: 0}, + itd64{a: -9223372036854775807, b: -9223372036854775807, add: 2, sub: 0, mul: 1, div: 1}, + itd64{a: -9223372036854775807, b: -4294967296, add: 9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 2147483647}, + itd64{a: -9223372036854775807, b: -1, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807}, + itd64{a: -9223372036854775807, b: 0, add: -9223372036854775807, sub: -9223372036854775807, mul: 0}, + itd64{a: -9223372036854775807, b: 1, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807}, + itd64{a: -9223372036854775807, b: 4294967296, add: -9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: -2147483647}, + itd64{a: -9223372036854775807, b: 9223372036854775806, add: -1, sub: 3, mul: 9223372036854775806, div: -1}, + itd64{a: -9223372036854775807, b: 9223372036854775807, add: 0, sub: 2, mul: -1, div: -1}, + itd64{a: -4294967296, b: -9223372036854775808, add: 9223372032559808512, sub: 9223372032559808512, mul: 0, div: 0}, + itd64{a: -4294967296, b: -9223372036854775807, add: 9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 0}, + itd64{a: -4294967296, b: -4294967296, add: -8589934592, sub: 0, mul: 0, div: 1}, + itd64{a: -4294967296, b: -1, add: -4294967297, sub: -4294967295, mul: 4294967296, div: 4294967296}, + itd64{a: -4294967296, b: 0, add: -4294967296, sub: -4294967296, mul: 0}, + itd64{a: -4294967296, b: 1, add: -4294967295, sub: -4294967297, mul: -4294967296, div: -4294967296}, + itd64{a: -4294967296, b: 4294967296, add: 0, sub: -8589934592, mul: 0, div: -1}, + itd64{a: -4294967296, b: 9223372036854775806, add: 9223372032559808510, sub: 9223372032559808514, mul: 8589934592, div: 0}, + itd64{a: -4294967296, b: 9223372036854775807, add: 9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: 0}, + itd64{a: -1, b: -9223372036854775808, add: 9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: 0}, + itd64{a: -1, b: -9223372036854775807, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 0}, + itd64{a: -1, b: -4294967296, add: -4294967297, sub: 4294967295, mul: 4294967296, div: 0}, + itd64{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1}, + itd64{a: -1, b: 0, add: -1, sub: -1, mul: 0}, + itd64{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1}, + itd64{a: -1, b: 4294967296, add: 4294967295, sub: -4294967297, mul: -4294967296, div: 0}, + itd64{a: -1, b: 9223372036854775806, add: 9223372036854775805, sub: -9223372036854775807, mul: -9223372036854775806, div: 0}, + itd64{a: -1, b: 9223372036854775807, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0}, + itd64{a: 0, b: -9223372036854775808, add: -9223372036854775808, sub: -9223372036854775808, mul: 0, div: 0}, + itd64{a: 0, b: -9223372036854775807, add: -9223372036854775807, sub: 9223372036854775807, mul: 0, div: 0}, + itd64{a: 0, b: -4294967296, add: -4294967296, sub: 4294967296, mul: 0, div: 0}, + itd64{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0}, + itd64{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + itd64{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0}, + itd64{a: 0, b: 4294967296, add: 4294967296, sub: -4294967296, mul: 0, div: 0}, + itd64{a: 0, b: 9223372036854775806, add: 9223372036854775806, sub: -9223372036854775806, mul: 0, div: 0}, + itd64{a: 0, b: 9223372036854775807, add: 9223372036854775807, sub: -9223372036854775807, mul: 0, div: 0}, + itd64{a: 1, b: -9223372036854775808, add: -9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: 0}, + itd64{a: 1, b: -9223372036854775807, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0}, + itd64{a: 1, b: -4294967296, add: -4294967295, sub: 4294967297, mul: -4294967296, div: 0}, + itd64{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1}, + itd64{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + itd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + itd64{a: 1, b: 4294967296, add: 4294967297, sub: -4294967295, mul: 4294967296, div: 0}, + itd64{a: 1, b: 9223372036854775806, add: 9223372036854775807, sub: -9223372036854775805, mul: 9223372036854775806, div: 0}, + itd64{a: 1, b: 9223372036854775807, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 0}, + itd64{a: 4294967296, b: -9223372036854775808, add: -9223372032559808512, sub: -9223372032559808512, mul: 0, div: 0}, + itd64{a: 4294967296, b: -9223372036854775807, add: -9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: 0}, + itd64{a: 4294967296, b: -4294967296, add: 0, sub: 8589934592, mul: 0, div: -1}, + itd64{a: 4294967296, b: -1, add: 4294967295, sub: 4294967297, mul: -4294967296, div: -4294967296}, + itd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0}, + itd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296}, + itd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1}, + itd64{a: 4294967296, b: 9223372036854775806, add: -9223372032559808514, sub: -9223372032559808510, mul: -8589934592, div: 0}, + itd64{a: 4294967296, b: 9223372036854775807, add: -9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 0}, + itd64{a: 9223372036854775806, b: -9223372036854775808, add: -2, sub: -2, mul: 0, div: 0}, + itd64{a: 9223372036854775806, b: -9223372036854775807, add: -1, sub: -3, mul: 9223372036854775806, div: 0}, + itd64{a: 9223372036854775806, b: -4294967296, add: 9223372032559808510, sub: -9223372032559808514, mul: 8589934592, div: -2147483647}, + itd64{a: 9223372036854775806, b: -1, add: 9223372036854775805, sub: 9223372036854775807, mul: -9223372036854775806, div: -9223372036854775806}, + itd64{a: 9223372036854775806, b: 0, add: 9223372036854775806, sub: 9223372036854775806, mul: 0}, + itd64{a: 9223372036854775806, b: 1, add: 9223372036854775807, sub: 9223372036854775805, mul: 9223372036854775806, div: 9223372036854775806}, + itd64{a: 9223372036854775806, b: 4294967296, add: -9223372032559808514, sub: 9223372032559808510, mul: -8589934592, div: 2147483647}, + itd64{a: 9223372036854775806, b: 9223372036854775806, add: -4, sub: 0, mul: 4, div: 1}, + itd64{a: 9223372036854775806, b: 9223372036854775807, add: -3, sub: -1, mul: -9223372036854775806, div: 0}, + itd64{a: 9223372036854775807, b: -9223372036854775808, add: -1, sub: -1, mul: -9223372036854775808, div: 0}, + itd64{a: 9223372036854775807, b: -9223372036854775807, add: 0, sub: -2, mul: -1, div: -1}, + itd64{a: 9223372036854775807, b: -4294967296, add: 9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: -2147483647}, + itd64{a: 9223372036854775807, b: -1, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807}, + itd64{a: 9223372036854775807, b: 0, add: 9223372036854775807, sub: 9223372036854775807, mul: 0}, + itd64{a: 9223372036854775807, b: 1, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807}, + itd64{a: 9223372036854775807, b: 4294967296, add: -9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 2147483647}, + itd64{a: 9223372036854775807, b: 9223372036854775806, add: -3, sub: 1, mul: -9223372036854775806, div: 1}, + itd64{a: 9223372036854775807, b: 9223372036854775807, add: -2, sub: 0, mul: 1, div: 1}, +} +var uint32_data []utd32 = []utd32{utd32{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + utd32{a: 0, b: 1, add: 1, sub: 4294967295, mul: 0, div: 0}, + utd32{a: 0, b: 4294967295, add: 4294967295, sub: 1, mul: 0, div: 0}, + utd32{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + utd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + utd32{a: 1, b: 4294967295, add: 0, sub: 2, mul: 4294967295, div: 0}, + utd32{a: 4294967295, b: 0, add: 4294967295, sub: 4294967295, mul: 0}, + utd32{a: 4294967295, b: 1, add: 0, sub: 4294967294, mul: 4294967295, div: 4294967295}, + utd32{a: 4294967295, b: 4294967295, add: 4294967294, sub: 0, mul: 1, div: 1}, +} +var int32_data []itd32 = []itd32{itd32{a: -2147483648, b: -2147483648, add: 0, sub: 0, mul: 0, div: 1}, + itd32{a: -2147483648, b: -2147483647, add: 1, sub: -1, mul: -2147483648, div: 1}, + itd32{a: -2147483648, b: -1, add: 2147483647, sub: -2147483647, mul: -2147483648, div: -2147483648}, + itd32{a: -2147483648, b: 0, add: -2147483648, sub: -2147483648, mul: 0}, + itd32{a: -2147483648, b: 1, add: -2147483647, sub: 2147483647, mul: -2147483648, div: -2147483648}, + itd32{a: -2147483648, b: 2147483647, add: -1, sub: 1, mul: -2147483648, div: -1}, + itd32{a: -2147483647, b: -2147483648, add: 1, sub: 1, mul: -2147483648, div: 0}, + itd32{a: -2147483647, b: -2147483647, add: 2, sub: 0, mul: 1, div: 1}, + itd32{a: -2147483647, b: -1, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 2147483647}, + itd32{a: -2147483647, b: 0, add: -2147483647, sub: -2147483647, mul: 0}, + itd32{a: -2147483647, b: 1, add: -2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647}, + itd32{a: -2147483647, b: 2147483647, add: 0, sub: 2, mul: -1, div: -1}, + itd32{a: -1, b: -2147483648, add: 2147483647, sub: 2147483647, mul: -2147483648, div: 0}, + itd32{a: -1, b: -2147483647, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 0}, + itd32{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1}, + itd32{a: -1, b: 0, add: -1, sub: -1, mul: 0}, + itd32{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1}, + itd32{a: -1, b: 2147483647, add: 2147483646, sub: -2147483648, mul: -2147483647, div: 0}, + itd32{a: 0, b: -2147483648, add: -2147483648, sub: -2147483648, mul: 0, div: 0}, + itd32{a: 0, b: -2147483647, add: -2147483647, sub: 2147483647, mul: 0, div: 0}, + itd32{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0}, + itd32{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + itd32{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0}, + itd32{a: 0, b: 2147483647, add: 2147483647, sub: -2147483647, mul: 0, div: 0}, + itd32{a: 1, b: -2147483648, add: -2147483647, sub: -2147483647, mul: -2147483648, div: 0}, + itd32{a: 1, b: -2147483647, add: -2147483646, sub: -2147483648, mul: -2147483647, div: 0}, + itd32{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1}, + itd32{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + itd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + itd32{a: 1, b: 2147483647, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 0}, + itd32{a: 2147483647, b: -2147483648, add: -1, sub: -1, mul: -2147483648, div: 0}, + itd32{a: 2147483647, b: -2147483647, add: 0, sub: -2, mul: -1, div: -1}, + itd32{a: 2147483647, b: -1, add: 2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647}, + itd32{a: 2147483647, b: 0, add: 2147483647, sub: 2147483647, mul: 0}, + itd32{a: 2147483647, b: 1, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 2147483647}, + itd32{a: 2147483647, b: 2147483647, add: -2, sub: 0, mul: 1, div: 1}, +} +var uint16_data []utd16 = []utd16{utd16{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + utd16{a: 0, b: 1, add: 1, sub: 65535, mul: 0, div: 0}, + utd16{a: 0, b: 65535, add: 65535, sub: 1, mul: 0, div: 0}, + utd16{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + utd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + utd16{a: 1, b: 65535, add: 0, sub: 2, mul: 65535, div: 0}, + utd16{a: 65535, b: 0, add: 65535, sub: 65535, mul: 0}, + utd16{a: 65535, b: 1, add: 0, sub: 65534, mul: 65535, div: 65535}, + utd16{a: 65535, b: 65535, add: 65534, sub: 0, mul: 1, div: 1}, +} +var int16_data []itd16 = []itd16{itd16{a: -32768, b: -32768, add: 0, sub: 0, mul: 0, div: 1}, + itd16{a: -32768, b: -32767, add: 1, sub: -1, mul: -32768, div: 1}, + itd16{a: -32768, b: -1, add: 32767, sub: -32767, mul: -32768, div: -32768}, + itd16{a: -32768, b: 0, add: -32768, sub: -32768, mul: 0}, + itd16{a: -32768, b: 1, add: -32767, sub: 32767, mul: -32768, div: -32768}, + itd16{a: -32768, b: 32766, add: -2, sub: 2, mul: 0, div: -1}, + itd16{a: -32768, b: 32767, add: -1, sub: 1, mul: -32768, div: -1}, + itd16{a: -32767, b: -32768, add: 1, sub: 1, mul: -32768, div: 0}, + itd16{a: -32767, b: -32767, add: 2, sub: 0, mul: 1, div: 1}, + itd16{a: -32767, b: -1, add: -32768, sub: -32766, mul: 32767, div: 32767}, + itd16{a: -32767, b: 0, add: -32767, sub: -32767, mul: 0}, + itd16{a: -32767, b: 1, add: -32766, sub: -32768, mul: -32767, div: -32767}, + itd16{a: -32767, b: 32766, add: -1, sub: 3, mul: 32766, div: -1}, + itd16{a: -32767, b: 32767, add: 0, sub: 2, mul: -1, div: -1}, + itd16{a: -1, b: -32768, add: 32767, sub: 32767, mul: -32768, div: 0}, + itd16{a: -1, b: -32767, add: -32768, sub: 32766, mul: 32767, div: 0}, + itd16{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1}, + itd16{a: -1, b: 0, add: -1, sub: -1, mul: 0}, + itd16{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1}, + itd16{a: -1, b: 32766, add: 32765, sub: -32767, mul: -32766, div: 0}, + itd16{a: -1, b: 32767, add: 32766, sub: -32768, mul: -32767, div: 0}, + itd16{a: 0, b: -32768, add: -32768, sub: -32768, mul: 0, div: 0}, + itd16{a: 0, b: -32767, add: -32767, sub: 32767, mul: 0, div: 0}, + itd16{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0}, + itd16{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + itd16{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0}, + itd16{a: 0, b: 32766, add: 32766, sub: -32766, mul: 0, div: 0}, + itd16{a: 0, b: 32767, add: 32767, sub: -32767, mul: 0, div: 0}, + itd16{a: 1, b: -32768, add: -32767, sub: -32767, mul: -32768, div: 0}, + itd16{a: 1, b: -32767, add: -32766, sub: -32768, mul: -32767, div: 0}, + itd16{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1}, + itd16{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + itd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + itd16{a: 1, b: 32766, add: 32767, sub: -32765, mul: 32766, div: 0}, + itd16{a: 1, b: 32767, add: -32768, sub: -32766, mul: 32767, div: 0}, + itd16{a: 32766, b: -32768, add: -2, sub: -2, mul: 0, div: 0}, + itd16{a: 32766, b: -32767, add: -1, sub: -3, mul: 32766, div: 0}, + itd16{a: 32766, b: -1, add: 32765, sub: 32767, mul: -32766, div: -32766}, + itd16{a: 32766, b: 0, add: 32766, sub: 32766, mul: 0}, + itd16{a: 32766, b: 1, add: 32767, sub: 32765, mul: 32766, div: 32766}, + itd16{a: 32766, b: 32766, add: -4, sub: 0, mul: 4, div: 1}, + itd16{a: 32766, b: 32767, add: -3, sub: -1, mul: -32766, div: 0}, + itd16{a: 32767, b: -32768, add: -1, sub: -1, mul: -32768, div: 0}, + itd16{a: 32767, b: -32767, add: 0, sub: -2, mul: -1, div: -1}, + itd16{a: 32767, b: -1, add: 32766, sub: -32768, mul: -32767, div: -32767}, + itd16{a: 32767, b: 0, add: 32767, sub: 32767, mul: 0}, + itd16{a: 32767, b: 1, add: -32768, sub: 32766, mul: 32767, div: 32767}, + itd16{a: 32767, b: 32766, add: -3, sub: 1, mul: -32766, div: 1}, + itd16{a: 32767, b: 32767, add: -2, sub: 0, mul: 1, div: 1}, +} +var uint8_data []utd8 = []utd8{utd8{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + utd8{a: 0, b: 1, add: 1, sub: 255, mul: 0, div: 0}, + utd8{a: 0, b: 255, add: 255, sub: 1, mul: 0, div: 0}, + utd8{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + utd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + utd8{a: 1, b: 255, add: 0, sub: 2, mul: 255, div: 0}, + utd8{a: 255, b: 0, add: 255, sub: 255, mul: 0}, + utd8{a: 255, b: 1, add: 0, sub: 254, mul: 255, div: 255}, + utd8{a: 255, b: 255, add: 254, sub: 0, mul: 1, div: 1}, +} +var int8_data []itd8 = []itd8{itd8{a: -128, b: -128, add: 0, sub: 0, mul: 0, div: 1}, + itd8{a: -128, b: -127, add: 1, sub: -1, mul: -128, div: 1}, + itd8{a: -128, b: -1, add: 127, sub: -127, mul: -128, div: -128}, + itd8{a: -128, b: 0, add: -128, sub: -128, mul: 0}, + itd8{a: -128, b: 1, add: -127, sub: 127, mul: -128, div: -128}, + itd8{a: -128, b: 126, add: -2, sub: 2, mul: 0, div: -1}, + itd8{a: -128, b: 127, add: -1, sub: 1, mul: -128, div: -1}, + itd8{a: -127, b: -128, add: 1, sub: 1, mul: -128, div: 0}, + itd8{a: -127, b: -127, add: 2, sub: 0, mul: 1, div: 1}, + itd8{a: -127, b: -1, add: -128, sub: -126, mul: 127, div: 127}, + itd8{a: -127, b: 0, add: -127, sub: -127, mul: 0}, + itd8{a: -127, b: 1, add: -126, sub: -128, mul: -127, div: -127}, + itd8{a: -127, b: 126, add: -1, sub: 3, mul: 126, div: -1}, + itd8{a: -127, b: 127, add: 0, sub: 2, mul: -1, div: -1}, + itd8{a: -1, b: -128, add: 127, sub: 127, mul: -128, div: 0}, + itd8{a: -1, b: -127, add: -128, sub: 126, mul: 127, div: 0}, + itd8{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1}, + itd8{a: -1, b: 0, add: -1, sub: -1, mul: 0}, + itd8{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1}, + itd8{a: -1, b: 126, add: 125, sub: -127, mul: -126, div: 0}, + itd8{a: -1, b: 127, add: 126, sub: -128, mul: -127, div: 0}, + itd8{a: 0, b: -128, add: -128, sub: -128, mul: 0, div: 0}, + itd8{a: 0, b: -127, add: -127, sub: 127, mul: 0, div: 0}, + itd8{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0}, + itd8{a: 0, b: 0, add: 0, sub: 0, mul: 0}, + itd8{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0}, + itd8{a: 0, b: 126, add: 126, sub: -126, mul: 0, div: 0}, + itd8{a: 0, b: 127, add: 127, sub: -127, mul: 0, div: 0}, + itd8{a: 1, b: -128, add: -127, sub: -127, mul: -128, div: 0}, + itd8{a: 1, b: -127, add: -126, sub: -128, mul: -127, div: 0}, + itd8{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1}, + itd8{a: 1, b: 0, add: 1, sub: 1, mul: 0}, + itd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1}, + itd8{a: 1, b: 126, add: 127, sub: -125, mul: 126, div: 0}, + itd8{a: 1, b: 127, add: -128, sub: -126, mul: 127, div: 0}, + itd8{a: 126, b: -128, add: -2, sub: -2, mul: 0, div: 0}, + itd8{a: 126, b: -127, add: -1, sub: -3, mul: 126, div: 0}, + itd8{a: 126, b: -1, add: 125, sub: 127, mul: -126, div: -126}, + itd8{a: 126, b: 0, add: 126, sub: 126, mul: 0}, + itd8{a: 126, b: 1, add: 127, sub: 125, mul: 126, div: 126}, + itd8{a: 126, b: 126, add: -4, sub: 0, mul: 4, div: 1}, + itd8{a: 126, b: 127, add: -3, sub: -1, mul: -126, div: 0}, + itd8{a: 127, b: -128, add: -1, sub: -1, mul: -128, div: 0}, + itd8{a: 127, b: -127, add: 0, sub: -2, mul: -1, div: -1}, + itd8{a: 127, b: -1, add: 126, sub: -128, mul: -127, div: -127}, + itd8{a: 127, b: 0, add: 127, sub: 127, mul: 0}, + itd8{a: 127, b: 1, add: -128, sub: 126, mul: 127, div: 127}, + itd8{a: 127, b: 126, add: -3, sub: 1, mul: -126, div: 1}, + itd8{a: 127, b: 127, add: -2, sub: 0, mul: 1, div: 1}, +} +var failed bool + +func main() { + + for _, v := range uint64_data { + if got := add_uint64_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_uint64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_uint64_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_uint64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_uint64_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_uint64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_uint64_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_uint64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range int64_data { + if got := add_int64_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_int64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_int64_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_int64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_int64_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_int64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_int64_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_int64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range uint32_data { + if got := add_uint32_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_uint32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_uint32_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_uint32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_uint32_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_uint32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_uint32_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_uint32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range int32_data { + if got := add_int32_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_int32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_int32_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_int32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_int32_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_int32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_int32_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_int32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range uint16_data { + if got := add_uint16_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_uint16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_uint16_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_uint16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_uint16_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_uint16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_uint16_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_uint16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range int16_data { + if got := add_int16_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_int16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_int16_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_int16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_int16_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_int16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_int16_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_int16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range uint8_data { + if got := add_uint8_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_uint8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_uint8_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_uint8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_uint8_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_uint8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_uint8_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_uint8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + for _, v := range int8_data { + if got := add_int8_ssa(v.a, v.b); got != v.add { + fmt.Printf("add_int8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add) + failed = true + } + if got := sub_int8_ssa(v.a, v.b); got != v.sub { + fmt.Printf("sub_int8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub) + failed = true + } + if v.b != 0 { + if got := div_int8_ssa(v.a, v.b); got != v.div { + fmt.Printf("div_int8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div) + failed = true + } + + } + if got := mul_int8_ssa(v.a, v.b); got != v.mul { + fmt.Printf("mul_int8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul) + failed = true + } + } + if failed { + panic("tests failed") + } +} diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO index d049bea872..1773dbbc98 100644 --- a/src/cmd/compile/internal/ssa/TODO +++ b/src/cmd/compile/internal/ssa/TODO @@ -5,7 +5,7 @@ Coverage -------- - Floating point numbers - Complex numbers -- Integer division +- Integer division (HMUL & MOD) - Fat objects (strings/slices/interfaces) vs. Phi - Defer? - Closure args @@ -49,6 +49,7 @@ Optimizations (better compiler) - OpStore uses 3 args. Increase the size of Value.argstorage to 3? - Constant cache - Reuseable slices (e.g. []int of size NumValues()) cached in Func +- Handle signed division overflow and sign extension earlier Regalloc -------- diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 9ea9781d93..0cde6f26d4 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -36,6 +36,15 @@ (Div32F x y) -> (DIVSS x y) (Div64F x y) -> (DIVSD x y) +(Div64 x y) -> (DIVQ x y) +(Div64u x y) -> (DIVQU x y) +(Div32 x y) -> (DIVL x y) +(Div32u x y) -> (DIVLU x y) +(Div16 x y) -> (DIVW x y) +(Div16u x y) -> (DIVWU x y) +(Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y)) +(Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)) + (And64 x y) -> (ANDQ x y) (And32 x y) -> (ANDL x y) (And16 x y) -> (ANDW x y) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 5aa5e60e33..220e5b01cd 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -72,7 +72,9 @@ func init() { // Common individual register masks var ( + ax = buildReg("AX") cx = buildReg("CX") + dx = buildReg("DX") x15 = buildReg("X15") gp = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15") fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15") @@ -97,6 +99,8 @@ func init() { gp21 = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: gponly, clobbers: flags} gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly, clobbers: flags} gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags} + gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, + clobbers: dx | flags} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} @@ -180,6 +184,14 @@ func init() { {name: "MULWconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint {name: "MULBconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint + {name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1 + {name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1 + {name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1 + + {name: "DIVQU", reg: gp11div, asm: "DIVQ"}, // arg0 / arg1 + {name: "DIVLU", reg: gp11div, asm: "DIVL"}, // arg0 / arg1 + {name: "DIVWU", reg: gp11div, asm: "DIVW"}, // arg0 / arg1 + {name: "ANDQ", reg: gp21, asm: "ANDQ"}, // arg0 & arg1 {name: "ANDL", reg: gp21, asm: "ANDL"}, // arg0 & arg1 {name: "ANDW", reg: gp21, asm: "ANDW"}, // arg0 & arg1 diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 1488e0f644..a0d8f8e000 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -37,6 +37,15 @@ var genericOps = []opData{ {name: "Div64F"}, // TODO: Div8, Div16, Div32, Div64 and unsigned + {name: "Div8"}, // arg0 / arg1 + {name: "Div8u"}, + {name: "Div16"}, + {name: "Div16u"}, + {name: "Div32"}, + {name: "Div32u"}, + {name: "Div64"}, + {name: "Div64u"}, + {name: "And8"}, // arg0 & arg1 {name: "And16"}, {name: "And32"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index cbabbfade5..44fd6e3737 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -93,6 +93,12 @@ const ( OpAMD64MULLconst OpAMD64MULWconst OpAMD64MULBconst + OpAMD64DIVQ + OpAMD64DIVL + OpAMD64DIVW + OpAMD64DIVQU + OpAMD64DIVLU + OpAMD64DIVWU OpAMD64ANDQ OpAMD64ANDL OpAMD64ANDW @@ -239,6 +245,14 @@ const ( OpMul64F OpDiv32F OpDiv64F + OpDiv8 + OpDiv8u + OpDiv16 + OpDiv16u + OpDiv32 + OpDiv32u + OpDiv64 + OpDiv64u OpAnd8 OpAnd16 OpAnd32 @@ -963,6 +977,90 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "DIVQ", + asm: x86.AIDIVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, + { + name: "DIVL", + asm: x86.AIDIVL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, + { + name: "DIVW", + asm: x86.AIDIVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, + { + name: "DIVQU", + asm: x86.ADIVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, + { + name: "DIVLU", + asm: x86.ADIVL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, + { + name: "DIVWU", + asm: x86.ADIVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1}, // .AX + {1, 65531}, // .AX .CX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 + }, + clobbers: 8589934596, // .DX .FLAGS + outputs: []regMask{ + 1, // .AX + }, + }, + }, { name: "ANDQ", asm: x86.AANDQ, @@ -2592,6 +2690,38 @@ var opcodeTable = [...]opInfo{ name: "Div64F", generic: true, }, + { + name: "Div8", + generic: true, + }, + { + name: "Div8u", + generic: true, + }, + { + name: "Div16", + generic: true, + }, + { + name: "Div16u", + generic: true, + }, + { + name: "Div32", + generic: true, + }, + { + name: "Div32u", + generic: true, + }, + { + name: "Div64", + generic: true, + }, + { + name: "Div64u", + generic: true, + }, { name: "And8", generic: true, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 75393ad58a..993838b537 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1688,6 +1688,60 @@ func rewriteValueAMD64(v *Value, config *Config) bool { goto endc395c0a53eeccf597e225a07b53047d1 endc395c0a53eeccf597e225a07b53047d1: ; + case OpDiv16: + // match: (Div16 x y) + // cond: + // result: (DIVW x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVW + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto endb60a86e606726640c84d3e1e5a5ce890 + endb60a86e606726640c84d3e1e5a5ce890: + ; + case OpDiv16u: + // match: (Div16u x y) + // cond: + // result: (DIVWU x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVWU + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto end6af9e212a865593e506bfdf7db67c9ec + end6af9e212a865593e506bfdf7db67c9ec: + ; + case OpDiv32: + // match: (Div32 x y) + // cond: + // result: (DIVL x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVL + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto endf20ac71407e57c2904684d3cc33cf697 + endf20ac71407e57c2904684d3cc33cf697: + ; case OpDiv32F: // match: (Div32F x y) // cond: @@ -1706,6 +1760,42 @@ func rewriteValueAMD64(v *Value, config *Config) bool { goto enddca0462c7b176c4138854d7d5627ab5b enddca0462c7b176c4138854d7d5627ab5b: ; + case OpDiv32u: + // match: (Div32u x y) + // cond: + // result: (DIVLU x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVLU + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto enda22604d23eeb1298008c97b817f60bbd + enda22604d23eeb1298008c97b817f60bbd: + ; + case OpDiv64: + // match: (Div64 x y) + // cond: + // result: (DIVQ x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVQ + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto end86490d9b337333dfc09a413e1e0120a9 + end86490d9b337333dfc09a413e1e0120a9: + ; case OpDiv64F: // match: (Div64F x y) // cond: @@ -1724,6 +1814,72 @@ func rewriteValueAMD64(v *Value, config *Config) bool { goto end12299d76db5144a60f564d34ba97eb43 end12299d76db5144a60f564d34ba97eb43: ; + case OpDiv64u: + // match: (Div64u x y) + // cond: + // result: (DIVQU x y) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVQU + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v.AddArg(x) + v.AddArg(y) + return true + } + goto endf871d8b397e5fad6a5b500cc0c759a8d + endf871d8b397e5fad6a5b500cc0c759a8d: + ; + case OpDiv8: + // match: (Div8 x y) + // cond: + // result: (DIVW (SignExt8to16 x) (SignExt8to16 y)) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVW + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v0 := b.NewValue0(v.Line, OpSignExt8to16, TypeInvalid) + v0.Type = config.Frontend().TypeInt16() + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpSignExt8to16, TypeInvalid) + v1.Type = config.Frontend().TypeInt16() + v1.AddArg(y) + v.AddArg(v1) + return true + } + goto ende25a7899b9c7a869f74226b4b6033084 + ende25a7899b9c7a869f74226b4b6033084: + ; + case OpDiv8u: + // match: (Div8u x y) + // cond: + // result: (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)) + { + x := v.Args[0] + y := v.Args[1] + v.Op = OpAMD64DIVWU + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + v0 := b.NewValue0(v.Line, OpZeroExt8to16, TypeInvalid) + v0.Type = config.Frontend().TypeUInt16() + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpZeroExt8to16, TypeInvalid) + v1.Type = config.Frontend().TypeUInt16() + v1.AddArg(y) + v.AddArg(v1) + return true + } + goto ende655b41d48feafc4d139b815a3b7b55c + ende655b41d48feafc4d139b815a3b7b55c: + ; case OpEq16: // match: (Eq16 x y) // cond: