mirror of
https://github.com/golang/go
synced 2024-11-07 14:46:14 -07:00
289d34a465
Change-Id: Ib0469232a2b69a869e58d5d24990ad74ac96ea56
GitHub-Last-Rev: eb38e049ee
GitHub-Pull-Request: golang/go#44805
Reviewed-on: https://go-review.googlesource.com/c/go/+/299109
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
173 lines
4.1 KiB
ArmAsm
173 lines
4.1 KiB
ArmAsm
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ppc64 ppc64le
|
|
|
|
#include "textflag.h"
|
|
|
|
// See memmove Go doc for important implementation constraints.
|
|
|
|
// func memmove(to, from unsafe.Pointer, n uintptr)
|
|
|
|
// target address
|
|
#define TGT R3
|
|
// source address
|
|
#define SRC R4
|
|
// length to move
|
|
#define LEN R5
|
|
// number of doublewords
|
|
#define DWORDS R6
|
|
// number of bytes < 8
|
|
#define BYTES R7
|
|
// const 16 used as index
|
|
#define IDX16 R8
|
|
// temp used for copies, etc.
|
|
#define TMP R9
|
|
// number of 32 byte chunks
|
|
#define QWORDS R10
|
|
|
|
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
|
|
MOVD to+0(FP), TGT
|
|
MOVD from+8(FP), SRC
|
|
MOVD n+16(FP), LEN
|
|
|
|
// Determine if there are doublewords to
|
|
// copy so a more efficient move can be done
|
|
check:
|
|
ANDCC $7, LEN, BYTES // R7: bytes to copy
|
|
SRD $3, LEN, DWORDS // R6: double words to copy
|
|
MOVFL CR0, CR3 // save CR from ANDCC
|
|
CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
|
|
|
|
// Determine overlap by subtracting dest - src and comparing against the
|
|
// length. This catches the cases where src and dest are in different types
|
|
// of storage such as stack and static to avoid doing backward move when not
|
|
// necessary.
|
|
|
|
SUB SRC, TGT, TMP // dest - src
|
|
CMPU TMP, LEN, CR2 // < len?
|
|
BC 12, 8, backward // BLT CR2 backward
|
|
|
|
// Copying forward if no overlap.
|
|
|
|
BC 12, 6, checkbytes // BEQ CR1, checkbytes
|
|
SRDCC $2, DWORDS, QWORDS // 32 byte chunks?
|
|
BEQ lt32gt8 // < 32 bytes
|
|
|
|
// Prepare for moves of 32 bytes at a time.
|
|
|
|
forward32setup:
|
|
DCBTST (TGT) // prepare data cache
|
|
DCBT (SRC)
|
|
MOVD QWORDS, CTR // Number of 32 byte chunks
|
|
MOVD $16, IDX16 // 16 for index
|
|
|
|
forward32:
|
|
LXVD2X (R0)(SRC), VS32 // load 16 bytes
|
|
LXVD2X (IDX16)(SRC), VS33 // load 16 bytes
|
|
ADD $32, SRC
|
|
STXVD2X VS32, (R0)(TGT) // store 16 bytes
|
|
STXVD2X VS33, (IDX16)(TGT)
|
|
ADD $32,TGT // bump up for next set
|
|
BC 16, 0, forward32 // continue
|
|
ANDCC $3, DWORDS // remaining doublewords
|
|
BEQ checkbytes // only bytes remain
|
|
|
|
lt32gt8:
|
|
// At this point >= 8 and < 32
|
|
// Move 16 bytes if possible
|
|
CMP DWORDS, $2
|
|
BLT lt16
|
|
LXVD2X (R0)(SRC), VS32
|
|
ADD $-2, DWORDS
|
|
STXVD2X VS32, (R0)(TGT)
|
|
ADD $16, SRC
|
|
ADD $16, TGT
|
|
|
|
lt16: // Move 8 bytes if possible
|
|
CMP DWORDS, $1
|
|
BLT checkbytes
|
|
MOVD 0(SRC), TMP
|
|
ADD $8, SRC
|
|
MOVD TMP, 0(TGT)
|
|
ADD $8, TGT
|
|
checkbytes:
|
|
BC 12, 14, LR // BEQ lr
|
|
lt8: // Move word if possible
|
|
CMP BYTES, $4
|
|
BLT lt4
|
|
MOVWZ 0(SRC), TMP
|
|
ADD $-4, BYTES
|
|
MOVW TMP, 0(TGT)
|
|
ADD $4, SRC
|
|
ADD $4, TGT
|
|
lt4: // Move halfword if possible
|
|
CMP BYTES, $2
|
|
BLT lt2
|
|
MOVHZ 0(SRC), TMP
|
|
ADD $-2, BYTES
|
|
MOVH TMP, 0(TGT)
|
|
ADD $2, SRC
|
|
ADD $2, TGT
|
|
lt2: // Move last byte if 1 left
|
|
CMP BYTES, $1
|
|
BC 12, 0, LR // ble lr
|
|
MOVBZ 0(SRC), TMP
|
|
MOVBZ TMP, 0(TGT)
|
|
RET
|
|
|
|
backward:
|
|
// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
|
|
// R3 and R4 are advanced to the end of the destination/source buffers
|
|
// respectively and moved back as we copy.
|
|
|
|
ADD LEN, SRC, SRC // end of source
|
|
ADD TGT, LEN, TGT // end of dest
|
|
|
|
BEQ nobackwardtail // earlier condition
|
|
|
|
MOVD BYTES, CTR // bytes to move
|
|
|
|
backwardtailloop:
|
|
MOVBZ -1(SRC), TMP // point to last byte
|
|
SUB $1,SRC
|
|
MOVBZ TMP, -1(TGT)
|
|
SUB $1,TGT
|
|
BC 16, 0, backwardtailloop // bndz
|
|
|
|
nobackwardtail:
|
|
BC 4, 5, LR // ble CR1 lr
|
|
|
|
backwardlarge:
|
|
MOVD DWORDS, CTR
|
|
SUB TGT, SRC, TMP // Use vsx if moving
|
|
CMP TMP, $32 // at least 32 byte chunks
|
|
BLT backwardlargeloop // and distance >= 32
|
|
SRDCC $2,DWORDS,QWORDS // 32 byte chunks
|
|
BNE backward32setup
|
|
|
|
backwardlargeloop:
|
|
MOVD -8(SRC), TMP
|
|
SUB $8,SRC
|
|
MOVD TMP, -8(TGT)
|
|
SUB $8,TGT
|
|
BC 16, 0, backwardlargeloop // bndz
|
|
RET
|
|
|
|
backward32setup:
|
|
MOVD QWORDS, CTR // set up loop ctr
|
|
MOVD $16, IDX16 // 32 bytes at a time
|
|
|
|
backward32loop:
|
|
SUB $32, TGT
|
|
SUB $32, SRC
|
|
LXVD2X (R0)(TGT), VS32 // load 16 bytes
|
|
LXVD2X (IDX16)(TGT), VS33
|
|
STXVD2X VS32, (R0)(SRC) // store 16 bytes
|
|
STXVD2X VS33, (IDX16)(SRC)
|
|
BC 16, 0, backward32loop // bndz
|
|
BC 4, 5, LR // ble CR1 lr
|
|
MOVD DWORDS, CTR
|
|
BR backwardlargeloop
|