1
0
mirror of https://github.com/golang/go synced 2024-10-05 16:41:21 -06:00

[dev.ssa] cmd/compile/internal/ssa: implement OHMUL

Adds support for high multiply which is used by the frontend when
rewriting const division.  The frontend currently only does this for 8,
16, and 32 bit integer arithmetic.

Change-Id: I9b6c6018f3be827a50ee6c185454ebc79b3094c8
Reviewed-on: https://go-review.googlesource.com/13696
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Todd Neal 2015-08-18 19:14:47 -05:00
parent a45f2d8f28
commit 67cbd5b51d
10 changed files with 4266 additions and 4 deletions

View File

@ -779,6 +779,13 @@ var opToSSA = map[opAndType]ssa.Op{
opAndType{ODIV, TFLOAT32}: ssa.OpDiv32F,
opAndType{ODIV, TFLOAT64}: ssa.OpDiv64F,
opAndType{OHMUL, TINT8}: ssa.OpHmul8,
opAndType{OHMUL, TUINT8}: ssa.OpHmul8u,
opAndType{OHMUL, TINT16}: ssa.OpHmul16,
opAndType{OHMUL, TUINT16}: ssa.OpHmul16u,
opAndType{OHMUL, TINT32}: ssa.OpHmul32,
opAndType{OHMUL, TUINT32}: ssa.OpHmul32u,
opAndType{ODIV, TINT8}: ssa.OpDiv8,
opAndType{ODIV, TUINT8}: ssa.OpDiv8u,
opAndType{ODIV, TINT16}: ssa.OpDiv16,
@ -1201,7 +1208,7 @@ func (s *state) expr(n *Node) *ssa.Value {
a := s.expr(n.Left)
b := s.expr(n.Right)
return s.newValue2(s.ssaOp(n.Op, n.Left.Type), Types[TBOOL], a, b)
case OADD, OAND, OMUL, OOR, OSUB, ODIV, OXOR:
case OADD, OAND, OMUL, OOR, OSUB, ODIV, OXOR, OHMUL:
a := s.expr(n.Left)
b := s.expr(n.Right)
return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b)
@ -2099,6 +2106,27 @@ func genValue(v *ssa.Value) {
j2.To.Val = Pc
}
case ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
// the frontend rewrites constant division by 8/16/32 bit integers into
// HMUL by a constant
// Arg[0] is already in AX as it's the only register we allow
// and DX is the only output we care about (the high bits)
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[1])
// IMULB puts the high portion in AH instead of DL,
// so move it to DL for consistency
if v.Type.Size() == 1 {
m := Prog(x86.AMOVB)
m.From.Type = obj.TYPE_REG
m.From.Reg = x86.REG_AH
m.To.Type = obj.TYPE_REG
m.To.Reg = x86.REG_DX
}
case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB,
ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB:

View File

@ -51,3 +51,6 @@ func TestFP(t *testing.T) { runTest(t, "fp_ssa.go") }
// TestArithmeticBoundary tests boundary results for arithmetic operations.
func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") }
// TestArithmeticConst tests results for arithmetic operations against constants.
func TestArithmeticConst(t *testing.T) { runTest(t, "arithConst_ssa.go") }

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
package main
// test64BitConstMulti tests that rewrite rules don't fold 64 bit constants
// test64BitConstMult tests that rewrite rules don't fold 64 bit constants
// into multiply instructions.
func test64BitConstMult() {
want := int64(103079215109)

View File

@ -5,7 +5,7 @@ Coverage
--------
- Floating point numbers
- Complex numbers
- Integer division (HMUL & MOD)
- Integer division (MOD)
- Fat objects (strings/slices/interfaces) vs. Phi
- Defer?
- Closure args
@ -50,6 +50,7 @@ Optimizations (better compiler)
- Constant cache
- Reuseable slices (e.g. []int of size NumValues()) cached in Func
- Handle signed division overflow and sign extension earlier
- Implement 64 bit const division with high multiply, maybe in the frontend?
Regalloc
--------

View File

@ -45,6 +45,13 @@
(Div8 x y) -> (DIVW (SignExt8to16 <config.Frontend().TypeInt16()> x) (SignExt8to16 <config.Frontend().TypeInt16()> y))
(Div8u x y) -> (DIVWU (ZeroExt8to16 <config.Frontend().TypeUInt16()> x) (ZeroExt8to16 <config.Frontend().TypeUInt16()> y))
(Hmul32 x y) -> (HMULL x y)
(Hmul32u x y) -> (HMULLU x y)
(Hmul16 x y) -> (HMULW x y)
(Hmul16u x y) -> (HMULWU x y)
(Hmul8 x y) -> (HMULB x y)
(Hmul8u x y) -> (HMULBU x y)
(And64 x y) -> (ANDQ x y)
(And32 x y) -> (ANDL x y)
(And16 x y) -> (ANDW x y)

View File

@ -101,6 +101,8 @@ func init() {
gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
clobbers: dx | flags}
gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
clobbers: ax | flags}
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
@ -184,10 +186,16 @@ func init() {
{name: "MULWconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
{name: "MULBconst", reg: gp11, asm: "IMULW"}, // arg0 * auxint
{name: "HMULL", reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
{name: "HMULW", reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
{name: "HMULB", reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
{name: "HMULLU", reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
{name: "HMULWU", reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
{name: "HMULBU", reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width
{name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
{name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
{name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
{name: "DIVQU", reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
{name: "DIVLU", reg: gp11div, asm: "DIVL"}, // arg0 / arg1
{name: "DIVWU", reg: gp11div, asm: "DIVW"}, // arg0 / arg1

View File

@ -37,6 +37,14 @@ var genericOps = []opData{
{name: "Div64F"},
// TODO: Div8, Div16, Div32, Div64 and unsigned
{name: "Hmul8"}, // (arg0 * arg1) >> width
{name: "Hmul8u"},
{name: "Hmul16"},
{name: "Hmul16u"},
{name: "Hmul32"},
{name: "Hmul32u"},
// frontend currently doesn't generate a 64 bit hmul
{name: "Div8"}, // arg0 / arg1
{name: "Div8u"},
{name: "Div16"},

View File

@ -93,6 +93,12 @@ const (
OpAMD64MULLconst
OpAMD64MULWconst
OpAMD64MULBconst
OpAMD64HMULL
OpAMD64HMULW
OpAMD64HMULB
OpAMD64HMULLU
OpAMD64HMULWU
OpAMD64HMULBU
OpAMD64DIVQ
OpAMD64DIVL
OpAMD64DIVW
@ -245,6 +251,12 @@ const (
OpMul64F
OpDiv32F
OpDiv64F
OpHmul8
OpHmul8u
OpHmul16
OpHmul16u
OpHmul32
OpHmul32u
OpDiv8
OpDiv8u
OpDiv16
@ -977,6 +989,90 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "HMULL",
asm: x86.AIMULL,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULW",
asm: x86.AIMULW,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULB",
asm: x86.AIMULB,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULLU",
asm: x86.AMULL,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULWU",
asm: x86.AMULW,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "HMULBU",
asm: x86.AMULB,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // .AX
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
clobbers: 8589934593, // .AX .FLAGS
outputs: []regMask{
4, // .DX
},
},
},
{
name: "DIVQ",
asm: x86.AIDIVQ,
@ -2690,6 +2786,30 @@ var opcodeTable = [...]opInfo{
name: "Div64F",
generic: true,
},
{
name: "Hmul8",
generic: true,
},
{
name: "Hmul8u",
generic: true,
},
{
name: "Hmul16",
generic: true,
},
{
name: "Hmul16u",
generic: true,
},
{
name: "Hmul32",
generic: true,
},
{
name: "Hmul32u",
generic: true,
},
{
name: "Div8",
generic: true,

View File

@ -2335,6 +2335,114 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
goto end22eaafbcfe70447f79d9b3e6cc395bbd
end22eaafbcfe70447f79d9b3e6cc395bbd:
;
case OpHmul16:
// match: (Hmul16 x y)
// cond:
// result: (HMULW x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULW
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end1b9ff394bb3b06fc109637656b6875f5
end1b9ff394bb3b06fc109637656b6875f5:
;
case OpHmul16u:
// match: (Hmul16u x y)
// cond:
// result: (HMULWU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULWU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto endee9089e794a43f2ce1619a6ef61670f4
endee9089e794a43f2ce1619a6ef61670f4:
;
case OpHmul32:
// match: (Hmul32 x y)
// cond:
// result: (HMULL x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULL
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end7c83c91ef2634f0b1da4f49350b437b1
end7c83c91ef2634f0b1da4f49350b437b1:
;
case OpHmul32u:
// match: (Hmul32u x y)
// cond:
// result: (HMULLU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULLU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end3c4f36611dc8815aa2a63d4ec0eaa06d
end3c4f36611dc8815aa2a63d4ec0eaa06d:
;
case OpHmul8:
// match: (Hmul8 x y)
// cond:
// result: (HMULB x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULB
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto end51b2cc9f1ed15314e68fc81024f281a7
end51b2cc9f1ed15314e68fc81024f281a7:
;
case OpHmul8u:
// match: (Hmul8u x y)
// cond:
// result: (HMULBU x y)
{
x := v.Args[0]
y := v.Args[1]
v.Op = OpAMD64HMULBU
v.AuxInt = 0
v.Aux = nil
v.resetArgs()
v.AddArg(x)
v.AddArg(y)
return true
}
goto ende68d7b3a3c774cedc3522af9d635c39d
ende68d7b3a3c774cedc3522af9d635c39d:
;
case OpITab:
// match: (ITab (Load ptr mem))
// cond: