From 90e0b40ab24e9dbd2404e597c76635f4b4e1ba2f Mon Sep 17 00:00:00 2001 From: Ruixin Bao Date: Tue, 27 Aug 2019 15:22:28 -0400 Subject: [PATCH] cmd/internal/obj/s390x: use 12 bit load and store instruction when possible on s390x Originally, we default to use load and store instruction with 20 bit displacement. However, that is not necessary. Some instructions have a displacement smaller than 12 bit. This CL allows the usage of 12 bit load and store instruction when that happens. This change also reduces the size of .text section in go binary by 19 KB. Some tests are also added to verify the functionality of the change. Change-Id: I13edea06ca653d4b9ffeaefe8d010bc2f065c2ba Reviewed-on: https://go-review.googlesource.com/c/go/+/194857 Reviewed-by: Michael Munday Run-TryBot: Michael Munday TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/asm/testdata/s390x.s | 44 ++++++++++++++++----- src/cmd/internal/obj/s390x/asmz.go | 47 ++++++++++++++++++++++- 2 files changed, 80 insertions(+), 11 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s index 4c1ca2361c..9952c5207f 100644 --- a/src/cmd/asm/internal/asm/testdata/s390x.s +++ b/src/cmd/asm/internal/asm/testdata/s390x.s @@ -33,12 +33,12 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- MOVWBR (R15), R9 // e390f000001e MOVD R1, n-8(SP) // e310f0100024 - MOVW R2, n-8(SP) // e320f0100050 - MOVH R3, n-8(SP) // e330f0100070 - MOVB R4, n-8(SP) // e340f0100072 - MOVWZ R5, n-8(SP) // e350f0100050 - MOVHZ R6, n-8(SP) // e360f0100070 - MOVBZ R7, n-8(SP) // e370f0100072 + MOVW R2, n-8(SP) // 5020f010 + MOVH R3, n-8(SP) // 4030f010 + MOVB R4, n-8(SP) // 4240f010 + MOVWZ R5, n-8(SP) // 5050f010 + MOVHZ R6, n-8(SP) // 4060f010 + MOVBZ R7, n-8(SP) // 4270f010 MOVDBR R8, n-8(SP) // e380f010002f MOVWBR R9, n-8(SP) // e390f010003e @@ -58,6 +58,20 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- MOVB $-128, -524288(R5) // eb8050008052 MOVB $1, -524289(R6) // c0a1fff7ffff41aa60009201a000 + // RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g: ST vs STY) + MOVW R1, 4095(R2)(R3) // 50132fff + MOVW R1, 4096(R2)(R3) // e31320000150 + MOVWZ R1, 4095(R2)(R3) // 50132fff + MOVWZ R1, 4096(R2)(R3) // e31320000150 + MOVH R1, 4095(R2)(R3) // 40132fff + MOVHZ R1, 4095(R2)(R3) // 40132fff + MOVH R1, 4096(R2)(R3) // e31320000170 + MOVHZ R1, 4096(R2)(R3) // e31320000170 + MOVB R1, 4095(R2)(R3) // 42132fff + MOVBZ R1, 4095(R2)(R3) // 42132fff + MOVB R1, 4096(R2)(R3) // e31320000172 + MOVBZ R1, 4096(R2)(R3) // e31320000172 + ADD R1, R2 // b9e81022 ADD R1, R2, R3 // b9e81032 ADD $8192, R1 // a71b2000 @@ -300,10 +314,22 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- FMOVS $0, F11 // b37400b0 FMOVD $0, F12 // b37500c0 - FMOVS (R1)(R2*1), F0 // ed0210000064 - FMOVS n-8(SP), F15 // edf0f0100064 - FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000ed8a80000065 + FMOVS (R1)(R2*1), F0 // 78021000 + FMOVS n-8(SP), F15 // 78f0f010 + FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000688a8000 FMOVD F4, F5 // 2854 + + // RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g. LD vs LDY) + FMOVD (R1), F0 // 68001000 + FMOVD 4095(R2), F13 // 68d02fff + FMOVD 4096(R2), F15 // edf020000165 + FMOVS 4095(R2)(R3), F13 // 78d32fff + FMOVS 4096(R2)(R4), F15 // edf420000164 + FMOVD F0, 4095(R1) // 60001fff + FMOVD F0, 4096(R1) // ed0010000167 + FMOVS F13, 4095(R2)(R3) // 70d32fff + FMOVS F13, 4096(R2)(R3) // edd320000166 + FADDS F0, F15 // b30a00f0 FADD F1, F14 // b31a00e1 FSUBS F2, F13 // b30b00d2 diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index d76eb25829..b6024ae481 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -3320,7 +3320,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { x2 = REGTMP d2 = 0 } - zRXY(c.zopstore(p.As), uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + // Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction. + if op, ok := c.zopstore12(p.As); ok && isU12(d2) { + zRX(op, uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } else { + zRXY(c.zopstore(p.As), uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } case 36: // mov mem reg (no relocation) d2 := c.regoff(&p.From) @@ -3337,7 +3342,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { x2 = REGTMP d2 = 0 } - zRXY(c.zopload(p.As), uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + // Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction. + if op, ok := c.zopload12(p.As); ok && isU12(d2) { + zRX(op, uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } else { + zRXY(c.zopload(p.As), uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) + } case 40: // word/byte wd := uint32(c.regoff(&p.From)) @@ -4215,6 +4225,22 @@ func (c *ctxtz) regoff(a *obj.Addr) int32 { return int32(c.vregoff(a)) } +// find if the displacement is within 12 bit +func isU12(displacement int32) bool { + return displacement >= 0 && displacement < DISP12 +} + +// zopload12 returns the RX op with 12 bit displacement for the given load +func (c *ctxtz) zopload12(a obj.As) (uint32, bool) { + switch a { + case AFMOVD: + return op_LD, true + case AFMOVS: + return op_LE, true + } + return 0, false +} + // zopload returns the RXY op for the given load func (c *ctxtz) zopload(a obj.As) uint32 { switch a { @@ -4253,6 +4279,23 @@ func (c *ctxtz) zopload(a obj.As) uint32 { return 0 } +// zopstore12 returns the RX op with 12 bit displacement for the given store +func (c *ctxtz) zopstore12(a obj.As) (uint32, bool) { + switch a { + case AFMOVD: + return op_STD, true + case AFMOVS: + return op_STE, true + case AMOVW, AMOVWZ: + return op_ST, true + case AMOVH, AMOVHZ: + return op_STH, true + case AMOVB, AMOVBZ: + return op_STC, true + } + return 0, false +} + // zopstore returns the RXY op for the given store func (c *ctxtz) zopstore(a obj.As) uint32 { switch a {