mirror of
https://github.com/golang/go
synced 2024-11-24 21:00:09 -07:00
runtime: optimize multiple EOR with EOR3 on ARM64
On ARM64 architecture, NEON intrinsic EOR3 is provided. EOR3 can do exclusive or with 3 operands, which allows us to reduce the number of assembly instructions and enhance the performance at the same time.
This commit is contained in:
parent
63daa774b5
commit
5e38af03ea
@ -632,8 +632,7 @@ aes33to64:
|
||||
AESE V2.B16, V6.B16
|
||||
AESE V3.B16, V7.B16
|
||||
|
||||
VEOR V6.B16, V4.B16, V4.B16
|
||||
VEOR V7.B16, V5.B16, V5.B16
|
||||
VEOR3 V6.B16, V4.B16, V7.B16, V4.B16
|
||||
VEOR V5.B16, V4.B16, V4.B16
|
||||
|
||||
VST1 [V4.D1], (R2)
|
||||
@ -703,13 +702,10 @@ aes65to128:
|
||||
AESE V6.B16, V14.B16
|
||||
AESE V7.B16, V15.B16
|
||||
|
||||
VEOR V12.B16, V8.B16, V8.B16
|
||||
VEOR V13.B16, V9.B16, V9.B16
|
||||
VEOR V14.B16, V10.B16, V10.B16
|
||||
VEOR V15.B16, V11.B16, V11.B16
|
||||
VEOR V10.B16, V8.B16, V8.B16
|
||||
VEOR V11.B16, V9.B16, V9.B16
|
||||
VEOR V9.B16, V8.B16, V8.B16
|
||||
VEOR3 V8.B16, V9.B16, V10.B16, V8.B16
|
||||
VEOR3 V11.B16, V12.B16, V13.B16, V11.B16
|
||||
VEOR3 V8.B16, V11.B16, V14.B16, V8.B16
|
||||
VEOR V8.B16, V15.B16, V8.B16
|
||||
|
||||
VST1 [V8.D1], (R2)
|
||||
RET
|
||||
@ -822,13 +818,10 @@ aesloop:
|
||||
AESE V14.B16, V6.B16
|
||||
AESE V15.B16, V7.B16
|
||||
|
||||
VEOR V0.B16, V1.B16, V0.B16
|
||||
VEOR V2.B16, V3.B16, V2.B16
|
||||
VEOR V4.B16, V5.B16, V4.B16
|
||||
VEOR V6.B16, V7.B16, V6.B16
|
||||
VEOR V0.B16, V2.B16, V0.B16
|
||||
VEOR V4.B16, V6.B16, V4.B16
|
||||
VEOR V4.B16, V0.B16, V0.B16
|
||||
VEOR3 V0.B16, V1.B16, V2.B16, V0.B16
|
||||
VEOR3 V3.B16, V4.B16, V5.B16, V3.B16
|
||||
VEOR3 V0.B16, V3.B16, V6.B16, V0.B16
|
||||
VEOR V0.B16, V7.B16, V0.B16
|
||||
|
||||
VST1 [V0.D1], (R2)
|
||||
RET
|
||||
|
Loading…
Reference in New Issue
Block a user