mirror of
https://github.com/golang/go
synced 2024-11-23 20:50:04 -07:00
crypto,internal/bytealg: fix assembly that clobbers BP
BP should be callee-save. It will be saved automatically if there is a nonzero frame size. Otherwise, we need to avoid this register. Change-Id: If3f551efa42d830c8793d9f0183cb8daad7a2ab5 Reviewed-on: https://go-review.googlesource.com/c/go/+/248260 Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
d303630622
commit
c6a11f0dd2
@ -1336,7 +1336,7 @@ TEXT p256SubInternal(SB),NOSPLIT,$0
|
||||
|
||||
RET
|
||||
/* ---------------------------------------*/
|
||||
TEXT p256MulInternal(SB),NOSPLIT,$0
|
||||
TEXT p256MulInternal(SB),NOSPLIT,$8
|
||||
MOVQ acc4, mul0
|
||||
MULQ t0
|
||||
MOVQ mul0, acc0
|
||||
@ -1519,7 +1519,7 @@ TEXT p256MulInternal(SB),NOSPLIT,$0
|
||||
|
||||
RET
|
||||
/* ---------------------------------------*/
|
||||
TEXT p256SqrInternal(SB),NOSPLIT,$0
|
||||
TEXT p256SqrInternal(SB),NOSPLIT,$8
|
||||
|
||||
MOVQ acc4, mul0
|
||||
MULQ acc5
|
||||
@ -2345,4 +2345,3 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48
|
||||
|
||||
RET
|
||||
/* ---------------------------------------*/
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
// Licence: I hereby disclaim the copyright on this code and place it
|
||||
// in the public domain.
|
||||
|
||||
TEXT ·block(SB),NOSPLIT,$0-32
|
||||
TEXT ·block(SB),NOSPLIT,$8-32
|
||||
MOVQ dig+0(FP), BP
|
||||
MOVQ p+8(FP), SI
|
||||
MOVQ p_len+16(FP), DX
|
||||
|
@ -8,7 +8,7 @@
|
||||
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||
MOVQ a_base+0(FP), DI
|
||||
MOVQ a_len+8(FP), DX
|
||||
MOVQ b_base+24(FP), BP
|
||||
MOVQ b_base+24(FP), R8
|
||||
MOVQ b_len+32(FP), AX
|
||||
MOVQ DI, R10
|
||||
LEAQ ret+48(FP), R11
|
||||
@ -17,7 +17,7 @@ TEXT ·Index(SB),NOSPLIT,$0-56
|
||||
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||
MOVQ a_base+0(FP), DI
|
||||
MOVQ a_len+8(FP), DX
|
||||
MOVQ b_base+16(FP), BP
|
||||
MOVQ b_base+16(FP), R8
|
||||
MOVQ b_len+24(FP), AX
|
||||
MOVQ DI, R10
|
||||
LEAQ ret+32(FP), R11
|
||||
@ -26,7 +26,7 @@ TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||
// AX: length of string, that we are searching for
|
||||
// DX: length of string, in which we are searching
|
||||
// DI: pointer to string, in which we are searching
|
||||
// BP: pointer to string, that we are searching for
|
||||
// R8: pointer to string, that we are searching for
|
||||
// R11: address, where to put return value
|
||||
// Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||
TEXT indexbody<>(SB),NOSPLIT,$0
|
||||
@ -37,11 +37,11 @@ TEXT indexbody<>(SB),NOSPLIT,$0
|
||||
no_sse42:
|
||||
CMPQ AX, $2
|
||||
JA _3_or_more
|
||||
MOVW (BP), BP
|
||||
MOVW (R8), R8
|
||||
LEAQ -1(DI)(DX*1), DX
|
||||
loop2:
|
||||
MOVW (DI), SI
|
||||
CMPW SI,BP
|
||||
CMPW SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -50,12 +50,12 @@ loop2:
|
||||
_3_or_more:
|
||||
CMPQ AX, $3
|
||||
JA _4_or_more
|
||||
MOVW 1(BP), BX
|
||||
MOVW (BP), BP
|
||||
MOVW 1(R8), BX
|
||||
MOVW (R8), R8
|
||||
LEAQ -2(DI)(DX*1), DX
|
||||
loop3:
|
||||
MOVW (DI), SI
|
||||
CMPW SI,BP
|
||||
CMPW SI,R8
|
||||
JZ partial_success3
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -72,11 +72,11 @@ partial_success3:
|
||||
_4_or_more:
|
||||
CMPQ AX, $4
|
||||
JA _5_or_more
|
||||
MOVL (BP), BP
|
||||
MOVL (R8), R8
|
||||
LEAQ -3(DI)(DX*1), DX
|
||||
loop4:
|
||||
MOVL (DI), SI
|
||||
CMPL SI,BP
|
||||
CMPL SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -87,11 +87,11 @@ _5_or_more:
|
||||
JA _8_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVL -4(BP)(AX*1), BX
|
||||
MOVL (BP), BP
|
||||
MOVL -4(R8)(AX*1), BX
|
||||
MOVL (R8), R8
|
||||
loop5to7:
|
||||
MOVL (DI), SI
|
||||
CMPL SI,BP
|
||||
CMPL SI,R8
|
||||
JZ partial_success5to7
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -108,11 +108,11 @@ partial_success5to7:
|
||||
_8_or_more:
|
||||
CMPQ AX, $8
|
||||
JA _9_or_more
|
||||
MOVQ (BP), BP
|
||||
MOVQ (R8), R8
|
||||
LEAQ -7(DI)(DX*1), DX
|
||||
loop8:
|
||||
MOVQ (DI), SI
|
||||
CMPQ SI,BP
|
||||
CMPQ SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -123,11 +123,11 @@ _9_or_more:
|
||||
JA _16_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVQ -8(BP)(AX*1), BX
|
||||
MOVQ (BP), BP
|
||||
MOVQ -8(R8)(AX*1), BX
|
||||
MOVQ (R8), R8
|
||||
loop9to15:
|
||||
MOVQ (DI), SI
|
||||
CMPQ SI,BP
|
||||
CMPQ SI,R8
|
||||
JZ partial_success9to15
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
@ -144,7 +144,7 @@ partial_success9to15:
|
||||
_16_or_more:
|
||||
CMPQ AX, $16
|
||||
JA _17_or_more
|
||||
MOVOU (BP), X1
|
||||
MOVOU (R8), X1
|
||||
LEAQ -15(DI)(DX*1), DX
|
||||
loop16:
|
||||
MOVOU (DI), X2
|
||||
@ -161,8 +161,8 @@ _17_or_more:
|
||||
JA _32_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVOU -16(BP)(AX*1), X0
|
||||
MOVOU (BP), X1
|
||||
MOVOU -16(R8)(AX*1), X0
|
||||
MOVOU (R8), X1
|
||||
loop17to31:
|
||||
MOVOU (DI), X2
|
||||
PCMPEQB X1,X2
|
||||
@ -188,7 +188,7 @@ partial_success17to31:
|
||||
_32_or_more:
|
||||
CMPQ AX, $32
|
||||
JA _33_to_63
|
||||
VMOVDQU (BP), Y1
|
||||
VMOVDQU (R8), Y1
|
||||
LEAQ -31(DI)(DX*1), DX
|
||||
loop32:
|
||||
VMOVDQU (DI), Y2
|
||||
@ -203,8 +203,8 @@ loop32:
|
||||
_33_to_63:
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
VMOVDQU -32(BP)(AX*1), Y0
|
||||
VMOVDQU (BP), Y1
|
||||
VMOVDQU -32(R8)(AX*1), Y0
|
||||
VMOVDQU (R8), Y1
|
||||
loop33to63:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
@ -241,10 +241,10 @@ sse42:
|
||||
// This value was determined experimentally and is the ~same
|
||||
// on Nehalem (first with SSE42) and Haswell.
|
||||
JAE _9_or_more
|
||||
LEAQ 16(BP), SI
|
||||
LEAQ 16(R8), SI
|
||||
TESTW $0xff0, SI
|
||||
JEQ no_sse42
|
||||
MOVOU (BP), X1
|
||||
MOVOU (R8), X1
|
||||
LEAQ -15(DI)(DX*1), SI
|
||||
MOVQ $16, R9
|
||||
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
|
||||
|
@ -212,7 +212,7 @@ TEXT runtime·walltime1(SB),NOSPLIT,$16-12
|
||||
// due to stack probes inserted to avoid stack/heap collisions.
|
||||
// See issue #20427.
|
||||
|
||||
MOVQ SP, BP // Save old SP; BP unchanged by C code.
|
||||
MOVQ SP, R12 // Save old SP; R12 unchanged by C code.
|
||||
|
||||
get_tls(CX)
|
||||
MOVQ g(CX), AX
|
||||
@ -250,7 +250,7 @@ noswitch:
|
||||
MOVQ 0(SP), AX // sec
|
||||
MOVQ 8(SP), DX // nsec
|
||||
ret:
|
||||
MOVQ BP, SP // Restore real SP
|
||||
MOVQ R12, SP // Restore real SP
|
||||
// Restore vdsoPC, vdsoSP
|
||||
// We don't worry about being signaled between the two stores.
|
||||
// If we are not in a signal handler, we'll restore vdsoSP to 0,
|
||||
@ -277,7 +277,7 @@ fallback:
|
||||
TEXT runtime·nanotime1(SB),NOSPLIT,$16-8
|
||||
// Switch to g0 stack. See comment above in runtime·walltime.
|
||||
|
||||
MOVQ SP, BP // Save old SP; BP unchanged by C code.
|
||||
MOVQ SP, R12 // Save old SP; R12 unchanged by C code.
|
||||
|
||||
get_tls(CX)
|
||||
MOVQ g(CX), AX
|
||||
@ -315,7 +315,7 @@ noswitch:
|
||||
MOVQ 0(SP), AX // sec
|
||||
MOVQ 8(SP), DX // nsec
|
||||
ret:
|
||||
MOVQ BP, SP // Restore real SP
|
||||
MOVQ R12, SP // Restore real SP
|
||||
// Restore vdsoPC, vdsoSP
|
||||
// We don't worry about being signaled between the two stores.
|
||||
// If we are not in a signal handler, we'll restore vdsoSP to 0,
|
||||
|
Loading…
Reference in New Issue
Block a user