From 4a1140472b3093edf9cd653666242393d7f4c2bd Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Seo Date: Thu, 30 Mar 2017 18:30:07 -0300 Subject: [PATCH] math/big: Unify divWW implementation for ppc64 and ppc64le. Starting in go1.9, the minimum processor requirement for ppc64 is POWER8. So it may now use the same divWW implementation as ppc64le. Updates #19074 Change-Id: If1a85f175cda89eee06a1024ccd468da6124c844 Reviewed-on: https://go-review.googlesource.com/39010 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick Reviewed-by: Lynn Boger --- src/math/big/arith_ppc64.s | 14 ---------- src/math/big/arith_ppc64le.s | 50 ------------------------------------ src/math/big/arith_ppc64x.s | 39 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 64 deletions(-) delete mode 100644 src/math/big/arith_ppc64.s delete mode 100644 src/math/big/arith_ppc64le.s diff --git a/src/math/big/arith_ppc64.s b/src/math/big/arith_ppc64.s deleted file mode 100644 index 47fe8f16fa..0000000000 --- a/src/math/big/arith_ppc64.s +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !math_big_pure_go,ppc64 - -#include "textflag.h" - -// This file provides fast assembly versions for the elementary -// arithmetic operations on vectors implemented in arith.go. - -TEXT ·divWW(SB), NOSPLIT, $0 - BR ·divWW_g(SB) - diff --git a/src/math/big/arith_ppc64le.s b/src/math/big/arith_ppc64le.s deleted file mode 100644 index b78cdfed9f..0000000000 --- a/src/math/big/arith_ppc64le.s +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !math_big_pure_go,ppc64le - -#include "textflag.h" - -// This file provides fast assembly versions for the elementary -// arithmetic operations on vectors implemented in arith.go. - -// func divWW(x1, x0, y Word) (q, r Word) -TEXT ·divWW(SB), NOSPLIT, $0 - MOVD x1+0(FP), R4 - MOVD x0+8(FP), R5 - MOVD y+16(FP), R6 - - CMPU R4, R6 - BGE divbigger - - // from the programmer's note in ch. 3 of the ISA manual, p.74 - DIVDEU R6, R4, R3 - DIVDU R6, R5, R7 - MULLD R6, R3, R8 - MULLD R6, R7, R20 - SUB R20, R5, R10 - ADD R7, R3, R3 - SUB R8, R10, R4 - CMPU R4, R10 - BLT adjust - CMPU R4, R6 - BLT end - -adjust: - MOVD $1, R21 - ADD R21, R3, R3 - SUB R6, R4, R4 - -end: - MOVD R3, q+24(FP) - MOVD R4, r+32(FP) - - RET - -divbigger: - MOVD $-1, R7 - MOVD R7, q+24(FP) - MOVD R7, r+32(FP) - RET - diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s index 3606dae068..ba4e4ab63a 100644 --- a/src/math/big/arith_ppc64x.s +++ b/src/math/big/arith_ppc64x.s @@ -173,5 +173,44 @@ end: MOVD R4, c+56(FP) RET +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB), NOSPLIT, $0 + MOVD x1+0(FP), R4 + MOVD x0+8(FP), R5 + MOVD y+16(FP), R6 + + CMPU R4, R6 + BGE divbigger + + // from the programmer's note in ch. 3 of the ISA manual, p.74 + DIVDEU R6, R4, R3 + DIVDU R6, R5, R7 + MULLD R6, R3, R8 + MULLD R6, R7, R20 + SUB R20, R5, R10 + ADD R7, R3, R3 + SUB R8, R10, R4 + CMPU R4, R10 + BLT adjust + CMPU R4, R6 + BLT end + +adjust: + MOVD $1, R21 + ADD R21, R3, R3 + SUB R6, R4, R4 + +end: + MOVD R3, q+24(FP) + MOVD R4, r+32(FP) + + RET + +divbigger: + MOVD $-1, R7 + MOVD R7, q+24(FP) + MOVD R7, r+32(FP) + RET + TEXT ·divWVW(SB), NOSPLIT, $0 BR ·divWVW_g(SB)