1
0
mirror of https://github.com/golang/go synced 2024-10-05 19:21:21 -06:00

[dev.ssa] cmd/compile/internal/ssa: implement OHMUL

Adds support for high multiply which is used by the frontend when
rewriting const division.  The frontend currently only does this for 8,
16, and 32 bit integer arithmetic.

Change-Id: I9b6c6018f3be827a50ee6c185454ebc79b3094c8
Reviewed-on: https://go-review.googlesource.com/13696
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Todd Neal 2015-08-18 19:14:47 -05:00
parent a45f2d8f28
commit 67cbd5b51d
10 changed files with 4266 additions and 4 deletions

View File

@ -779,6 +779,13 @@ var opToSSA = map[opAndType]ssa.Op{
opAndType{ODIV, TFLOAT32}: ssa.OpDiv32F, opAndType{ODIV, TFLOAT32}: ssa.OpDiv32F,
opAndType{ODIV, TFLOAT64}: ssa.OpDiv64F, opAndType{ODIV, TFLOAT64}: ssa.OpDiv64F,
opAndType{OHMUL, TINT8}: ssa.OpHmul8,
opAndType{OHMUL, TUINT8}: ssa.OpHmul8u,
opAndType{OHMUL, TINT16}: ssa.OpHmul16,
opAndType{OHMUL, TUINT16}: ssa.OpHmul16u,
opAndType{OHMUL, TINT32}: ssa.OpHmul32,
opAndType{OHMUL, TUINT32}: ssa.OpHmul32u,
opAndType{ODIV, TINT8}: ssa.OpDiv8, opAndType{ODIV, TINT8}: ssa.OpDiv8,
opAndType{ODIV, TUINT8}: ssa.OpDiv8u, opAndType{ODIV, TUINT8}: ssa.OpDiv8u,
opAndType{ODIV, TINT16}: ssa.OpDiv16, opAndType{ODIV, TINT16}: ssa.OpDiv16,
@ -1201,7 +1208,7 @@ func (s *state) expr(n *Node) *ssa.Value {
a := s.expr(n.Left) a := s.expr(n.Left)
b := s.expr(n.Right) b := s.expr(n.Right)
return s.newValue2(s.ssaOp(n.Op, n.Left.Type), Types[TBOOL], a, b) return s.newValue2(s.ssaOp(n.Op, n.Left.Type), Types[TBOOL], a, b)
case OADD, OAND, OMUL, OOR, OSUB, ODIV, OXOR: case OADD, OAND, OMUL, OOR, OSUB, ODIV, OXOR, OHMUL:
a := s.expr(n.Left) a := s.expr(n.Left)
b := s.expr(n.Right) b := s.expr(n.Right)
return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b) return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b)
@ -2099,6 +2106,27 @@ func genValue(v *ssa.Value) {
j2.To.Val = Pc j2.To.Val = Pc
} }
case ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
// the frontend rewrites constant division by 8/16/32 bit integers into
// HMUL by a constant
// Arg[0] is already in AX as it's the only register we allow
// and DX is the only output we care about (the high bits)
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[1])
// IMULB puts the high portion in AH instead of DL,
// so move it to DL for consistency
if v.Type.Size() == 1 {
m := Prog(x86.AMOVB)
m.From.Type = obj.TYPE_REG
m.From.Reg = x86.REG_AH
m.To.Type = obj.TYPE_REG
m.To.Reg = x86.REG_DX
}
case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB, case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB,
ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB: ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB:

View File

@ -51,3 +51,6 @@ func TestFP(t *testing.T) { runTest(t, "fp_ssa.go") }
// TestArithmeticBoundary tests boundary results for arithmetic operations. // TestArithmeticBoundary tests boundary results for arithmetic operations.
func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") } func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") }
// TestArithmeticConst tests results for arithmetic operations against constants.
func TestArithmeticConst(t *testing.T) { runTest(t, "arithConst_ssa.go") }

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
package main package main
// test64BitConstMulti tests that rewrite rules don't fold 64 bit constants // test64BitConstMult tests that rewrite rules don't fold 64 bit constants
// into multiply instructions. // into multiply instructions.
func test64BitConstMult() { func test64BitConstMult() {
want := int64(103079215109) want := int64(103079215109)

View File

@ -5,7 +5,7 @@ Coverage
-------- --------
- Floating point numbers - Floating point numbers
- Complex numbers - Complex numbers
- Integer division (HMUL & MOD) - Integer division (MOD)
- Fat objects (strings/slices/interfaces) vs. Phi - Fat objects (strings/slices/interfaces) vs. Phi
- Defer? - Defer?
- Closure args - Closure args
@ -50,6 +50,7 @@ Optimizations (better compiler)
- Constant cache - Constant cache
- Reuseable slices (e.g. []int of size NumValues()) cached in Func - Reuseable slices (e.g. []int of size NumValues()) cached in Func
- Handle signed division overflow and sign extension earlier - Handle signed division overflow and sign extension earlier
- Implement 64 bit const division with high multiply, maybe in the frontend?
Regalloc Regalloc
-------- --------

View File

@ -45,6 +45,13 @@
(Div8 x y) -> (DIVW (SignExt8to16 <config.Frontend().TypeInt16()> x) (SignExt8to16 <config.Frontend().TypeInt16()> y)) (Div8 x y) -> (DIVW (SignExt8to16 <config.Frontend().TypeInt16()> x) (SignExt8to16 <config.Frontend().TypeInt16()> y))
(Div8u x y) -> (DIVWU (ZeroExt8to16 <config.Frontend().TypeUInt16()> x) (ZeroExt8to16 <config.Frontend().TypeUInt16()> y)) (Div8u x y) -> (DIVWU (ZeroExt8to16 <config.Frontend().TypeUInt16()> x) (ZeroExt8to16 <config.Frontend().TypeUInt16()> y))
(Hmul32 x y) -> (HMULL x y)
(Hmul32u x y) -> (HMULLU x y)
(Hmul16 x y) -> (HMULW x y)
(Hmul16u x y) -> (HMULWU x y)
(Hmul8 x y) -> (HMULB x y)
(Hmul8u x y) -> (HMULBU x y)
(And64 x y) -> (ANDQ x y) (And64 x y) -> (ANDQ x y)
(And32 x y) -> (ANDL x y) (And32 x y) -> (ANDL x y)
(And16 x y) -> (ANDW x y) (And16 x y) -> (ANDW x y)

View File

@ -101,6 +101,8 @@ func init() {
gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags} gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
clobbers: dx | flags} clobbers: dx | flags}
gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
clobbers: ax | flags}
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
@ -184,10 +186,16 @@ func init() {
{name: "MULWconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint {name: "MULWconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
{name: "MULBconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint {name: "MULBconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
{name: "HMULL", reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
{name: "HMULW", reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
{name: "HMULB", reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
{name: "HMULLU", reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
{name: "HMULWU", reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
{name: "HMULBU", reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width
{name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1 {name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
{name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1 {name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
{name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1 {name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
{name: "DIVQU", reg: gp11div, asm: "DIVQ"}, // arg0 / arg1 {name: "DIVQU", reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
{name: "DIVLU", reg: gp11div, asm: "DIVL"}, // arg0 / arg1 {name: "DIVLU", reg: gp11div, asm: "DIVL"}, // arg0 / arg1
{name: "DIVWU", reg: gp11div, asm: "DIVW"}, // arg0 / arg1 {name: "DIVWU", reg: gp11div, asm: "DIVW"}, // arg0 / arg1

View File

@ -37,6 +37,14 @@ var genericOps = []opData{
{name: "Div64F"}, {name: "Div64F"},
// TODO: Div8, Div16, Div32, Div64 and unsigned // TODO: Div8, Div16, Div32, Div64 and unsigned
{name: "Hmul8"}, // (arg0 * arg1) >> width
{name: "Hmul8u"},
{name: "Hmul16"},
{name: "Hmul16u"},
{name: "Hmul32"},
{name: "Hmul32u"},
// frontend currently doesn't generate a 64 bit hmul
{name: "Div8"}, // arg0 / arg1 {name: "Div8"}, // arg0 / arg1
{name: "Div8u"}, {name: "Div8u"},
{name: "Div16"}, {name: "Div16"},

View File

@ -93,6 +93,12 @@ const (
OpAMD64MULLconst OpAMD64MULLconst
OpAMD64MULWconst OpAMD64MULWconst
OpAMD64MULBconst OpAMD64MULBconst
OpAMD64HMULL
OpAMD64HMULW
OpAMD64HMULB
OpAMD64HMULLU
OpAMD64HMULWU
OpAMD64HMULBU
OpAMD64DIVQ OpAMD64DIVQ
OpAMD64DIVL OpAMD64DIVL
OpAMD64DIVW OpAMD64DIVW
@ -245,6 +251,12 @@ const (
OpMul64F OpMul64F
OpDiv32F OpDiv32F
OpDiv64F OpDiv64F
OpHmul8
OpHmul8u
OpHmul16
OpHmul16u
OpHmul32
OpHmul32u
OpDiv8 OpDiv8
OpDiv8u OpDiv8u
OpDiv16 OpDiv16
@ -977,6 +989,90 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "HMULL",
asm: x86.AIMULL,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULW",
asm: x86.AIMULW,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULB",
asm: x86.AIMULB,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULLU",
asm: x86.AMULL,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULWU",
asm: x86.AMULW,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULBU",
asm: x86.AMULB,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{ {
name: "DIVQ", name: "DIVQ",
asm: x86.AIDIVQ, asm: x86.AIDIVQ,
@ -2690,6 +2786,30 @@ var opcodeTable = [...]opInfo{
name: "Div64F", name: "Div64F",
generic: true, generic: true,
}, },
{
name: "Hmul8",
generic: true,
},
{
name: "Hmul8u",
generic: true,
},
{
name: "Hmul16",
generic: true,
},
{
name: "Hmul16u",
generic: true,
},
{
name: "Hmul32",
generic: true,
},
{
name: "Hmul32u",
generic: true,
},
{ {
name: "Div8", name: "Div8",
generic: true, generic: true,

View File

@ -2335,6 +2335,114 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
goto end22eaafbcfe70447f79d9b3e6cc395bbd goto end22eaafbcfe70447f79d9b3e6cc395bbd
end22eaafbcfe70447f79d9b3e6cc395bbd: end22eaafbcfe70447f79d9b3e6cc395bbd:
; ;
case OpHmul16:
// match: (Hmul16 x y)
// cond:
// result: (HMULW x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULW
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end1b9ff394bb3b06fc109637656b6875f5
end1b9ff394bb3b06fc109637656b6875f5:
;
case OpHmul16u:
// match: (Hmul16u x y)
// cond:
// result: (HMULWU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULWU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto endee9089e794a43f2ce1619a6ef61670f4
endee9089e794a43f2ce1619a6ef61670f4:
;
case OpHmul32:
// match: (Hmul32 x y)
// cond:
// result: (HMULL x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULL
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end7c83c91ef2634f0b1da4f49350b437b1
end7c83c91ef2634f0b1da4f49350b437b1:
;
case OpHmul32u:
// match: (Hmul32u x y)
// cond:
// result: (HMULLU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULLU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end3c4f36611dc8815aa2a63d4ec0eaa06d
end3c4f36611dc8815aa2a63d4ec0eaa06d:
;
case OpHmul8:
// match: (Hmul8 x y)
// cond:
// result: (HMULB x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULB
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end51b2cc9f1ed15314e68fc81024f281a7
end51b2cc9f1ed15314e68fc81024f281a7:
;
case OpHmul8u:
// match: (Hmul8u x y)
// cond:
// result: (HMULBU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULBU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto ende68d7b3a3c774cedc3522af9d635c39d
ende68d7b3a3c774cedc3522af9d635c39d:
;
case OpITab: case OpITab:
// match: (ITab (Load ptr mem)) // match: (ITab (Load ptr mem))
// cond: // cond: