From 3ef07c412f068144554648c0d209bef444a2ee27 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Sun, 23 Oct 2016 16:11:13 -0400
Subject: [PATCH] cmd, runtime: remove s390x 3 operand immediate logical ops

These are emulated by the assembler and we don't need them.

Change-Id: I2b07c5315a5b642fdb5e50b468453260ae121164
Reviewed-on: https://go-review.googlesource.com/31758
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/cmd/compile/internal/s390x/ssa.go   | 11 ++--
 src/cmd/internal/obj/s390x/asmz.go      | 77 +++++++++----------------
 src/runtime/asm_s390x.s                 |  9 ++-
 src/runtime/internal/atomic/asm_s390x.s |  6 +-
 4 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go
index e8b7b4ba6e2..e2d3c2849ba 100644
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -809,12 +809,11 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
 		// defer returns in R3:
 		// 0 if we should continue executing
 		// 1 if we should jump to deferreturn call
-		p := gc.Prog(s390x.AAND)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 0xFFFFFFFF
-		p.Reg = s390x.REG_R3
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = s390x.REG_R3
+		p := gc.Prog(s390x.ACMPW)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = s390x.REG_R3
+		p.To.Type = obj.TYPE_CONST
+		p.To.Offset = 0
 		p = gc.Prog(s390x.ABNE)
 		p.To.Type = obj.TYPE_BRANCH
 		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go
index 80ae303f4f9..cc039bd1d22 100644
--- a/src/cmd/internal/obj/s390x/asmz.go
+++ b/src/cmd/internal/obj/s390x/asmz.go
@@ -161,7 +161,6 @@ var optab = []Optab{
 	Optab{AAND, C_REG, C_REG, C_NONE, C_REG, 6, 0},
 	Optab{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 0},
 	Optab{AAND, C_LCON, C_NONE, C_NONE, C_REG, 23, 0},
-	Optab{AAND, C_LCON, C_REG, C_NONE, C_REG, 23, 0},
 	Optab{AAND, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
 	Optab{AAND, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
 	Optab{AANDW, C_REG, C_REG, C_NONE, C_REG, 6, 0},
@@ -3063,57 +3062,37 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 			zRIE(_d, oprie, uint32(p.To.Reg), uint32(r), uint32(v), 0, 0, 0, 0, asm)
 		}
 
-	case 23: // 64-bit logical op $constant [reg] reg
-		// TODO(mundaym): remove the optional register and merge with case 24.
+	case 23: // 64-bit logical op $constant reg
+		// TODO(mundaym): merge with case 24.
 		v := vregoff(ctxt, &p.From)
-		var opcode uint32
-		r := p.Reg
-		if r == 0 {
-			r = p.To.Reg
-		}
-		if r == p.To.Reg {
-			switch p.As {
-			default:
-				ctxt.Diag("%v is not supported", p)
-			case AAND:
-				if v >= 0 { // needs zero extend
-					zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
-					zRRE(op_NGR, uint32(p.To.Reg), REGTMP, asm)
-				} else if int64(int16(v)) == v {
-					zRI(op_NILL, uint32(p.To.Reg), uint32(v), asm)
-				} else { //  r.To.Reg & 0xffffffff00000000 & uint32(v)
-					zRIL(_a, op_NILF, uint32(p.To.Reg), uint32(v), asm)
-				}
-			case AOR:
-				if int64(uint32(v)) != v { // needs sign extend
-					zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
-					zRRE(op_OGR, uint32(p.To.Reg), REGTMP, asm)
-				} else if int64(uint16(v)) == v {
-					zRI(op_OILL, uint32(p.To.Reg), uint32(v), asm)
-				} else {
-					zRIL(_a, op_OILF, uint32(p.To.Reg), uint32(v), asm)
-				}
-			case AXOR:
-				if int64(uint32(v)) != v { // needs sign extend
-					zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
-					zRRE(op_XGR, uint32(p.To.Reg), REGTMP, asm)
-				} else {
-					zRIL(_a, op_XILF, uint32(p.To.Reg), uint32(v), asm)
-				}
+		switch p.As {
+		default:
+			ctxt.Diag("%v is not supported", p)
+		case AAND:
+			if v >= 0 { // needs zero extend
+				zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
+				zRRE(op_NGR, uint32(p.To.Reg), REGTMP, asm)
+			} else if int64(int16(v)) == v {
+				zRI(op_NILL, uint32(p.To.Reg), uint32(v), asm)
+			} else { //  r.To.Reg & 0xffffffff00000000 & uint32(v)
+				zRIL(_a, op_NILF, uint32(p.To.Reg), uint32(v), asm)
 			}
-		} else {
-			switch p.As {
-			default:
-				ctxt.Diag("%v is not supported", p)
-			case AAND:
-				opcode = op_NGRK
-			case AOR:
-				opcode = op_OGRK
-			case AXOR:
-				opcode = op_XGRK
+		case AOR:
+			if int64(uint32(v)) != v { // needs sign extend
+				zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
+				zRRE(op_OGR, uint32(p.To.Reg), REGTMP, asm)
+			} else if int64(uint16(v)) == v {
+				zRI(op_OILL, uint32(p.To.Reg), uint32(v), asm)
+			} else {
+				zRIL(_a, op_OILF, uint32(p.To.Reg), uint32(v), asm)
+			}
+		case AXOR:
+			if int64(uint32(v)) != v { // needs sign extend
+				zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
+				zRRE(op_XGR, uint32(p.To.Reg), REGTMP, asm)
+			} else {
+				zRIL(_a, op_XILF, uint32(p.To.Reg), uint32(v), asm)
 			}
-			zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
-			zRRF(opcode, uint32(r), 0, uint32(p.To.Reg), REGTMP, asm)
 		}
 
 	case 24: // 32-bit logical op $constant reg
diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
index 198c565b79b..36fe56f60bd 100644
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@@ -918,12 +918,14 @@ notfoundr0:
 
 vectorimpl:
 	//if the address is not 16byte aligned, use loop for the header
-	AND	$15, R3, R8
+	MOVD	R3, R8
+	AND	$15, R8
 	CMPBGT	R8, $0, notaligned
 
 aligned:
 	ADD	R6, R4, R8
-	AND	$-16, R8, R7
+	MOVD	R8, R7
+	AND	$-16, R7
 	// replicate c across V17
 	VLVGB	$0, R5, V19
 	VREPB	$0, V19, V17
@@ -944,7 +946,8 @@ vectorloop:
 	RET
 
 notaligned:
-	AND	$-16, R3, R8
+	MOVD	R3, R8
+	AND	$-16, R8
 	ADD     $16, R8
 notalignedloop:
 	CMPBEQ	R3, R8, aligned
diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s
index c84718cb8f0..4884d1c620f 100644
--- a/src/runtime/internal/atomic/asm_s390x.s
+++ b/src/runtime/internal/atomic/asm_s390x.s
@@ -141,7 +141,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
 	MOVD    ptr+0(FP), R3
 	MOVBZ   val+8(FP), R4
 	// Calculate shift.
-	AND	$3, R3, R5
+	MOVD	R3, R5
+	AND	$3, R5
 	XOR	$3, R5 // big endian - flip direction
 	SLD	$3, R5 // MUL $8, R5
 	SLD	R5, R4
@@ -159,7 +160,8 @@ TEXT ·And8(SB), NOSPLIT, $0-9
 	MOVD    ptr+0(FP), R3
 	MOVBZ   val+8(FP), R4
 	// Calculate shift.
-	AND	$3, R3, R5
+	MOVD	R3, R5
+	AND	$3, R5
 	XOR	$3, R5 // big endian - flip direction
 	SLD	$3, R5 // MUL $8, R5
 	OR	$-256, R4 // create 0xffffffffffffffxx