From 90391c2e8ad8da167aed53bad5857008a410d0c1 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Mon, 22 Jul 2024 15:53:37 +0800 Subject: [PATCH] math: add round assembly implementations on riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL reapplies CL 504737 and adds integer precision limitation check, since CL 504737 only checks whether floating point number is +-Inf or NaN. This CL is also ~7% faster than CL 504737. Updates #68322 goos: linux goarch: riscv64 pkg: math │ math.old.bench │ math.new.bench │ │ sec/op │ sec/op vs base │ Ceil 54.09n ± 0% 18.72n ± 0% -65.39% (p=0.000 n=10) Floor 40.72n ± 0% 18.72n ± 0% -54.03% (p=0.000 n=10) Round 20.73n ± 0% 20.73n ± 0% ~ (p=1.000 n=10) RoundToEven 24.07n ± 0% 24.07n ± 0% ~ (p=1.000 n=10) Trunc 38.72n ± 0% 18.72n ± 0% -51.65% (p=0.000 n=10) geomean 33.56n 20.09n -40.13% Change-Id: I06cfe2cb9e2535cd705d40b6650a7e71fedd906c Reviewed-on: https://go-review.googlesource.com/c/go/+/600075 Reviewed-by: Keith Randall Reviewed-by: Joel Sing Reviewed-by: Keith Randall Reviewed-by: Michael Knyszek LUCI-TryBot-Result: Go LUCI --- src/math/floor_asm.go | 2 +- src/math/floor_noasm.go | 2 +- src/math/floor_riscv64.s | 48 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 src/math/floor_riscv64.s diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go index fb419d6da2f..5cb45f5a7e8 100644 --- a/src/math/floor_asm.go +++ b/src/math/floor_asm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || s390x || wasm +//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || riscv64 || s390x || wasm package math diff --git a/src/math/floor_noasm.go b/src/math/floor_noasm.go index 5641c7ea0a4..6754ca8fc80 100644 --- a/src/math/floor_noasm.go +++ b/src/math/floor_noasm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm +//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !riscv64 && !s390x && !wasm package math diff --git a/src/math/floor_riscv64.s b/src/math/floor_riscv64.s new file mode 100644 index 00000000000..d9fe0ed8e28 --- /dev/null +++ b/src/math/floor_riscv64.s @@ -0,0 +1,48 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// RISC-V offered floating-point (FP) rounding by FP conversion instructions (FCVT) +// with rounding mode field. +// As Go spec expects FP rounding result in FP, we have to use FCVT integer +// back to FP (fp -> int -> fp). +// RISC-V only set Inexact flag during invalid FP-integer conversion without changing any data, +// on the other hand, RISC-V sets out of integer represent range yet valid FP into NaN. +// When it comes to integer-FP conversion, invalid FP like NaN, +-Inf will be +// converted into the closest valid FP, for example: +// +// `Floor(-Inf) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)` +// `Floor(18446744073709549568.0) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)` +// +// This ISA conversion limitation requires we skip all invalid or out of range FP +// before any normal rounding operations. + +#define ROUNDFN(NAME, MODE) \ +TEXT NAME(SB),NOSPLIT,$0; \ + MOVD x+0(FP), F10; \ + FMVXD F10, X10; \ + /* Drop all fraction bits */;\ + SRL $52, X10, X12; \ + /* Remove sign bit */; \ + AND $0x7FF, X12, X12;\ + /* Return either input is +-Inf, NaN(0x7FF) or out of precision limitation */;\ + /* 1023: bias of exponent, [-2^53, 2^53]: exactly integer represent range */;\ + MOV $1023+53, X11; \ + BLTU X11, X12, 4(PC);\ + FCVTLD.MODE F10, X11; \ + FCVTDL X11, F11; \ + /* RISC-V rounds negative values to +0, restore original sign */;\ + FSGNJD F10, F11, F10; \ + MOVD F10, ret+8(FP); \ + RET + +// func archFloor(x float64) float64 +ROUNDFN(·archFloor, RDN) + +// func archCeil(x float64) float64 +ROUNDFN(·archCeil, RUP) + +// func archTrunc(x float64) float64 +ROUNDFN(·archTrunc, RTZ)