mirror of
https://github.com/golang/go
synced 2024-11-17 19:54:45 -07:00
math/big: add fast path for amd64 addVW for large z
This matches the pure Go fast path added in the previous commit. I will leave other architectures to those with ready access to hardware. name old time/op new time/op delta AddVW/1-8 3.60ns ± 3% 3.59ns ± 1% ~ (p=0.147 n=91+86) AddVW/2-8 3.92ns ± 1% 3.91ns ± 2% -0.36% (p=0.000 n=86+92) AddVW/3-8 4.33ns ± 5% 4.46ns ± 5% +2.94% (p=0.000 n=96+97) AddVW/4-8 4.76ns ± 5% 4.82ns ± 5% +1.28% (p=0.000 n=95+92) AddVW/5-8 5.40ns ± 1% 5.42ns ± 0% +0.47% (p=0.000 n=76+71) AddVW/10-8 8.03ns ± 1% 7.80ns ± 5% -2.90% (p=0.000 n=73+96) AddVW/100-8 43.8ns ± 5% 17.9ns ± 1% -59.12% (p=0.000 n=94+81) AddVW/1000-8 428ns ± 4% 85ns ± 6% -80.20% (p=0.000 n=96+99) AddVW/10000-8 4.22µs ± 2% 1.80µs ± 3% -57.32% (p=0.000 n=69+92) AddVW/100000-8 44.8µs ± 8% 31.5µs ± 3% -29.76% (p=0.000 n=99+90) name old time/op new time/op delta SubVW/1-8 3.53ns ± 2% 3.63ns ± 5% +2.97% (p=0.000 n=94+93) SubVW/2-8 4.33ns ± 5% 4.01ns ± 2% -7.36% (p=0.000 n=90+85) SubVW/3-8 4.32ns ± 2% 4.32ns ± 5% ~ (p=0.084 n=87+97) SubVW/4-8 4.70ns ± 2% 4.83ns ± 6% +2.77% (p=0.000 n=85+96) SubVW/5-8 5.84ns ± 1% 5.35ns ± 1% -8.35% (p=0.000 n=87+87) SubVW/10-8 8.01ns ± 4% 7.54ns ± 4% -5.84% (p=0.000 n=98+97) SubVW/100-8 43.9ns ± 5% 17.9ns ± 1% -59.20% (p=0.000 n=98+76) SubVW/1000-8 426ns ± 2% 85ns ± 3% -80.13% (p=0.000 n=90+98) SubVW/10000-8 4.24µs ± 2% 1.81µs ± 3% -57.28% (p=0.000 n=74+91) SubVW/100000-8 44.5µs ± 4% 31.5µs ± 2% -29.33% (p=0.000 n=84+91) Change-Id: I10dd361cbaca22197c27e7734c0f50065292afbb Reviewed-on: https://go-review.googlesource.com/c/go/+/164969 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
fe24837c4d
commit
4d10aba35e
@ -143,6 +143,8 @@ E2: NEGQ CX
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),NOSPLIT,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
CMPQ DI, $32
|
||||
JG large
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
@ -189,12 +191,16 @@ L3: // n > 0
|
||||
|
||||
E3: MOVQ CX, c+56(FP) // return c
|
||||
RET
|
||||
large:
|
||||
JMP ·addVWlarge(SB)
|
||||
|
||||
|
||||
// func subVW(z, x []Word, y Word) (c Word)
|
||||
// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
CMPQ DI, $32
|
||||
JG large
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
@ -242,6 +248,8 @@ L4: // n > 0
|
||||
|
||||
E4: MOVQ CX, c+56(FP) // return c
|
||||
RET
|
||||
large:
|
||||
JMP ·subVWlarge(SB)
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
|
Loading…
Reference in New Issue
Block a user