1
0
mirror of https://github.com/golang/go synced 2024-10-02 08:18:32 -06:00
go/src/runtime/os_darwin_arm64.go

16 lines
495 B
Go
Raw Normal View History

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
hash/crc32: optimize arm64 crc32 implementation ARMv8 defines crc32 instruction. Comparing to the original crc32 calculation, this patch makes use of crc32 instructions to do crc32 calculation instead of the multiple lookup table algorithms. ARMv8 provides IEEE and Castagnoli polynomials for crc32 calculation so that the perfomance of these two types of crc32 get significant improved. name old time/op new time/op delta CRC32/poly=IEEE/size=15/align=0-32 117ns ± 0% 38ns ± 0% -67.44% CRC32/poly=IEEE/size=15/align=1-32 117ns ± 0% 38ns ± 0% -67.52% CRC32/poly=IEEE/size=40/align=0-32 129ns ± 0% 41ns ± 0% -68.37% CRC32/poly=IEEE/size=40/align=1-32 129ns ± 0% 41ns ± 0% -68.29% CRC32/poly=IEEE/size=512/align=0-32 828ns ± 0% 246ns ± 0% -70.29% CRC32/poly=IEEE/size=512/align=1-32 828ns ± 0% 132ns ± 0% -84.06% CRC32/poly=IEEE/size=1kB/align=0-32 1.58µs ± 0% 0.46µs ± 0% -70.98% CRC32/poly=IEEE/size=1kB/align=1-32 1.58µs ± 0% 0.46µs ± 0% -70.92% CRC32/poly=IEEE/size=4kB/align=0-32 6.06µs ± 0% 1.74µs ± 0% -71.27% CRC32/poly=IEEE/size=4kB/align=1-32 6.10µs ± 0% 1.74µs ± 0% -71.44% CRC32/poly=IEEE/size=32kB/align=0-32 48.3µs ± 0% 13.7µs ± 0% -71.61% CRC32/poly=IEEE/size=32kB/align=1-32 48.3µs ± 0% 13.7µs ± 0% -71.60% CRC32/poly=Castagnoli/size=15/align=0-32 116ns ± 0% 38ns ± 0% -67.07% CRC32/poly=Castagnoli/size=15/align=1-32 116ns ± 0% 38ns ± 0% -66.90% CRC32/poly=Castagnoli/size=40/align=0-32 127ns ± 0% 40ns ± 0% -68.11% CRC32/poly=Castagnoli/size=40/align=1-32 127ns ± 0% 40ns ± 0% -68.11% CRC32/poly=Castagnoli/size=512/align=0-32 828ns ± 0% 132ns ± 0% -84.06% CRC32/poly=Castagnoli/size=512/align=1-32 827ns ± 0% 132ns ± 0% -84.04% CRC32/poly=Castagnoli/size=1kB/align=0-32 1.59µs ± 0% 0.22µs ± 0% -85.89% CRC32/poly=Castagnoli/size=1kB/align=1-32 1.58µs ± 0% 0.22µs ± 0% -85.79% CRC32/poly=Castagnoli/size=4kB/align=0-32 6.14µs ± 0% 0.77µs ± 0% -87.40% CRC32/poly=Castagnoli/size=4kB/align=1-32 6.06µs ± 0% 0.77µs ± 0% -87.25% CRC32/poly=Castagnoli/size=32kB/align=0-32 48.3µs ± 0% 5.9µs ± 0% -87.71% CRC32/poly=Castagnoli/size=32kB/align=1-32 48.4µs ± 0% 6.0µs ± 0% -87.69% CRC32/poly=Koopman/size=15/align=0-32 104ns ± 0% 104ns ± 0% +0.00% CRC32/poly=Koopman/size=15/align=1-32 104ns ± 0% 104ns ± 0% +0.00% CRC32/poly=Koopman/size=40/align=0-32 235ns ± 0% 235ns ± 0% +0.00% CRC32/poly=Koopman/size=40/align=1-32 235ns ± 0% 235ns ± 0% +0.00% CRC32/poly=Koopman/size=512/align=0-32 2.71µs ± 0% 2.71µs ± 0% -0.07% CRC32/poly=Koopman/size=512/align=1-32 2.71µs ± 0% 2.71µs ± 0% -0.04% CRC32/poly=Koopman/size=1kB/align=0-32 5.40µs ± 0% 5.39µs ± 0% -0.06% CRC32/poly=Koopman/size=1kB/align=1-32 5.40µs ± 0% 5.40µs ± 0% +0.02% CRC32/poly=Koopman/size=4kB/align=0-32 21.5µs ± 0% 21.5µs ± 0% -0.16% CRC32/poly=Koopman/size=4kB/align=1-32 21.5µs ± 0% 21.5µs ± 0% -0.05% CRC32/poly=Koopman/size=32kB/align=0-32 172µs ± 0% 172µs ± 0% -0.07% CRC32/poly=Koopman/size=32kB/align=1-32 172µs ± 0% 172µs ± 0% -0.01% name old speed new speed delta CRC32/poly=IEEE/size=15/align=0-32 128MB/s ± 0% 394MB/s ± 0% +207.95% CRC32/poly=IEEE/size=15/align=1-32 128MB/s ± 0% 394MB/s ± 0% +208.09% CRC32/poly=IEEE/size=40/align=0-32 310MB/s ± 0% 979MB/s ± 0% +216.07% CRC32/poly=IEEE/size=40/align=1-32 310MB/s ± 0% 979MB/s ± 0% +216.16% CRC32/poly=IEEE/size=512/align=0-32 618MB/s ± 0% 2074MB/s ± 0% +235.72% CRC32/poly=IEEE/size=512/align=1-32 618MB/s ± 0% 3852MB/s ± 0% +523.55% CRC32/poly=IEEE/size=1kB/align=0-32 646MB/s ± 0% 2225MB/s ± 0% +244.57% CRC32/poly=IEEE/size=1kB/align=1-32 647MB/s ± 0% 2225MB/s ± 0% +243.87% CRC32/poly=IEEE/size=4kB/align=0-32 676MB/s ± 0% 2352MB/s ± 0% +248.02% CRC32/poly=IEEE/size=4kB/align=1-32 672MB/s ± 0% 2352MB/s ± 0% +250.15% CRC32/poly=IEEE/size=32kB/align=0-32 678MB/s ± 0% 2387MB/s ± 0% +252.17% CRC32/poly=IEEE/size=32kB/align=1-32 678MB/s ± 0% 2388MB/s ± 0% +252.11% CRC32/poly=Castagnoli/size=15/align=0-32 129MB/s ± 0% 393MB/s ± 0% +205.51% CRC32/poly=Castagnoli/size=15/align=1-32 129MB/s ± 0% 390MB/s ± 0% +203.41% CRC32/poly=Castagnoli/size=40/align=0-32 314MB/s ± 0% 988MB/s ± 0% +215.04% CRC32/poly=Castagnoli/size=40/align=1-32 314MB/s ± 0% 987MB/s ± 0% +214.68% CRC32/poly=Castagnoli/size=512/align=0-32 618MB/s ± 0% 3860MB/s ± 0% +524.32% CRC32/poly=Castagnoli/size=512/align=1-32 619MB/s ± 0% 3859MB/s ± 0% +523.66% CRC32/poly=Castagnoli/size=1kB/align=0-32 645MB/s ± 0% 4568MB/s ± 0% +608.56% CRC32/poly=Castagnoli/size=1kB/align=1-32 650MB/s ± 0% 4567MB/s ± 0% +602.94% CRC32/poly=Castagnoli/size=4kB/align=0-32 667MB/s ± 0% 5297MB/s ± 0% +693.81% CRC32/poly=Castagnoli/size=4kB/align=1-32 676MB/s ± 0% 5297MB/s ± 0% +684.00% CRC32/poly=Castagnoli/size=32kB/align=0-32 678MB/s ± 0% 5519MB/s ± 0% +713.83% CRC32/poly=Castagnoli/size=32kB/align=1-32 677MB/s ± 0% 5497MB/s ± 0% +712.04% CRC32/poly=Koopman/size=15/align=0-32 143MB/s ± 0% 144MB/s ± 0% +0.27% CRC32/poly=Koopman/size=15/align=1-32 143MB/s ± 0% 144MB/s ± 0% +0.33% CRC32/poly=Koopman/size=40/align=0-32 169MB/s ± 0% 170MB/s ± 0% +0.12% CRC32/poly=Koopman/size=40/align=1-32 170MB/s ± 0% 170MB/s ± 0% +0.08% CRC32/poly=Koopman/size=512/align=0-32 189MB/s ± 0% 189MB/s ± 0% +0.07% CRC32/poly=Koopman/size=512/align=1-32 189MB/s ± 0% 189MB/s ± 0% +0.04% CRC32/poly=Koopman/size=1kB/align=0-32 190MB/s ± 0% 190MB/s ± 0% +0.05% CRC32/poly=Koopman/size=1kB/align=1-32 190MB/s ± 0% 190MB/s ± 0% -0.01% CRC32/poly=Koopman/size=4kB/align=0-32 190MB/s ± 0% 190MB/s ± 0% +0.15% CRC32/poly=Koopman/size=4kB/align=1-32 190MB/s ± 0% 191MB/s ± 0% +0.05% CRC32/poly=Koopman/size=32kB/align=0-32 191MB/s ± 0% 191MB/s ± 0% +0.06% CRC32/poly=Koopman/size=32kB/align=1-32 191MB/s ± 0% 191MB/s ± 0% +0.02% Also fix a bug of arm64 assembler The optimization is mainly contributed by Fangming.Fang <fangming.fang@arm.com> Change-Id: I900678c2e445d7e8ad9e2a9ab3305d649230905f Reviewed-on: https://go-review.googlesource.com/40074 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-16 21:31:09 -06:00
var supportCRC32 = false
//go:nosplit
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
// TODO: need more entropy to better seed fastrand.
return nanotime()
}