From 90391c2e8ad8da167aed53bad5857008a410d0c1 Mon Sep 17 00:00:00 2001
From: Meng Zhuo <mzh@golangcn.org>
Date: Mon, 22 Jul 2024 15:53:37 +0800
Subject: [PATCH] math: add round assembly implementations on riscv64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL reapplies CL 504737 and adds integer precision
limitation check, since CL 504737 only checks whether
floating point number is +-Inf or NaN.

This CL is also ~7% faster than CL 504737.

Updates #68322

goos: linux
goarch: riscv64
pkg: math
            │ math.old.bench │           math.new.bench            │
            │     sec/op     │   sec/op     vs base                │
Ceil             54.09n ± 0%   18.72n ± 0%  -65.39% (p=0.000 n=10)
Floor            40.72n ± 0%   18.72n ± 0%  -54.03% (p=0.000 n=10)
Round            20.73n ± 0%   20.73n ± 0%        ~ (p=1.000 n=10)
RoundToEven      24.07n ± 0%   24.07n ± 0%        ~ (p=1.000 n=10)
Trunc            38.72n ± 0%   18.72n ± 0%  -51.65% (p=0.000 n=10)
geomean          33.56n        20.09n       -40.13%

Change-Id: I06cfe2cb9e2535cd705d40b6650a7e71fedd906c
Reviewed-on: https://go-review.googlesource.com/c/go/+/600075
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 src/math/floor_asm.go    |  2 +-
 src/math/floor_noasm.go  |  2 +-
 src/math/floor_riscv64.s | 48 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 src/math/floor_riscv64.s

diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go
index fb419d6da2f..5cb45f5a7e8 100644
--- a/src/math/floor_asm.go
+++ b/src/math/floor_asm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || s390x || wasm
+//go:build 386 || amd64 || arm64 || ppc64 || ppc64le || riscv64 || s390x || wasm
 
 package math
 
diff --git a/src/math/floor_noasm.go b/src/math/floor_noasm.go
index 5641c7ea0a4..6754ca8fc80 100644
--- a/src/math/floor_noasm.go
+++ b/src/math/floor_noasm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm
+//go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !riscv64 && !s390x && !wasm
 
 package math
 
diff --git a/src/math/floor_riscv64.s b/src/math/floor_riscv64.s
new file mode 100644
index 00000000000..d9fe0ed8e28
--- /dev/null
+++ b/src/math/floor_riscv64.s
@@ -0,0 +1,48 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// RISC-V offered floating-point (FP) rounding by FP conversion instructions (FCVT)
+// with rounding mode field.
+// As Go spec expects FP rounding result in FP, we have to use FCVT integer
+// back to FP (fp -> int -> fp).
+// RISC-V only set Inexact flag during invalid FP-integer conversion without changing any data,
+// on the other hand, RISC-V sets out of integer represent range yet valid FP into NaN.
+// When it comes to integer-FP conversion, invalid FP like NaN, +-Inf will be
+// converted into the closest valid FP, for example:
+//
+// `Floor(-Inf) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)`
+// `Floor(18446744073709549568.0) -> int64(0x7fffffffffffffff) -> float64(9.22e+18)`
+//
+// This ISA conversion limitation requires we skip all invalid or out of range FP
+// before any normal rounding operations.
+
+#define ROUNDFN(NAME, MODE) 	\
+TEXT NAME(SB),NOSPLIT,$0; 	\
+	MOVD	x+0(FP), F10; 	\
+	FMVXD	F10, X10;	\
+	/* Drop all fraction bits */;\
+	SRL	$52, X10, X12;	\
+	/* Remove sign bit */;	\
+	AND	$0x7FF, X12, X12;\
+	/* Return either input is +-Inf, NaN(0x7FF) or out of precision limitation */;\
+	/* 1023: bias of exponent, [-2^53, 2^53]: exactly integer represent range */;\
+	MOV	$1023+53, X11;	\
+	BLTU	X11, X12, 4(PC);\
+	FCVTLD.MODE F10, X11;	\
+	FCVTDL	X11, F11;	\
+	/* RISC-V rounds negative values to +0, restore original sign */;\
+	FSGNJD	F10, F11, F10;	\
+	MOVD	F10, ret+8(FP); \
+	RET
+
+// func archFloor(x float64) float64
+ROUNDFN(·archFloor, RDN)
+
+// func archCeil(x float64) float64
+ROUNDFN(·archCeil, RUP)
+
+// func archTrunc(x float64) float64
+ROUNDFN(·archTrunc, RTZ)