1
0
mirror of https://github.com/golang/go synced 2024-11-17 23:54:51 -07:00

runtime: ensure memmove write pointer atomically on ARM64

If a pointer write is not atomic, if the GC is running
concurrently, it may observe a partially updated pointer, which
may point to unallocated or already dead memory. Most pointer
writes, like the store instructions generated by the compiler,
are already atomic. But we still need to be careful in places
like memmove. In memmove, we don't know which bits are pointers
(or too expensive to query), so we ensure that all aligned
pointer-sized units are written atomically.

Fixes #36101.

Change-Id: I1b3ca24c6b1ac8a8aaf9ee470115e9a89ec1b00b
Reviewed-on: https://go-review.googlesource.com/c/go/+/212626
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Cherry Zhang 2019-12-27 12:02:00 -05:00
parent a4c579e8f7
commit ffbc02761a

View File

@ -22,7 +22,7 @@ check:
CMP R3, R4
BLT backward
// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
// Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
// R3 and R4 are advanced as we copy.
// (There may be implementations of armv8 where copying by bytes until
@ -30,11 +30,12 @@ check:
// optimization, but the on the one tested so far (xgene) it did not
// make a significance difference.)
CBZ R7, noforwardlarge // Do we need to do any doubleword-by-doubleword copying?
CBZ R7, noforwardlarge // Do we need to do any quadword copying?
ADD R3, R7, R9 // R9 points just past where we copy by word
forwardlargeloop:
// Copy 32 bytes at a time.
LDP.P 32(R4), (R8, R10)
STP.P (R8, R10), 32(R3)
LDP -16(R4), (R11, R12)
@ -43,10 +44,26 @@ forwardlargeloop:
CBNZ R7, forwardlargeloop
noforwardlarge:
CBNZ R6, forwardtail // Do we need to do any byte-by-byte copying?
CBNZ R6, forwardtail // Do we need to copy any tail bytes?
RET
forwardtail:
// There are R6 <= 31 bytes remaining to copy.
// This is large enough to still contain pointers,
// which must be copied atomically.
// Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0
LDP.P 16(R4), (R8, R10)
STP.P (R8, R10), 16(R3)
TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0
MOVD.P 8(R4), R8
MOVD.P R8, 8(R3)
AND $7, R6
CBNZ R6, 2(PC)
RET
ADD R3, R6, R9 // R9 points just past the destination memory
forwardtailloop:
@ -90,7 +107,7 @@ copy1:
RET
backward:
// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
// Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
@ -99,13 +116,28 @@ backward:
CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying?
SUB R6, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
AND $7, R6, R12
CBZ R12, backwardtaillarge
SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
backwardtailloop:
// Copy sub-pointer-size tail.
MOVBU.W -1(R4), R8
MOVBU.W R8, -1(R3)
CMP R9, R3
BNE backwardtailloop
backwardtaillarge:
// Do 8/16-byte write if possible.
// See comment at forwardtail.
TBZ $3, R6, 3(PC)
MOVD.W -8(R4), R8
MOVD.W R8, -8(R3)
TBZ $4, R6, 3(PC)
LDP.W -16(R4), (R8, R10)
STP.W (R8, R10), -16(R3)
nobackwardtail:
CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying?
RET