1
0
mirror of https://github.com/golang/go synced 2024-11-20 03:14:43 -07:00
go/src/runtime/memmove_arm.s
Rob Pike 69ddb7a408 [dev.cc] all: edit assembly source for ARM to be more regular
Several .s files for ARM had several properties the new assembler will not support.
These include:

- mentioning SP or PC as a hardware register
	These are always pseudo-registers except that in some contexts
	they're not, and it's confusing because the context should not affect
	which register you mean. Change the references to the hardware
	registers to be explicit: R13 for SP, R15 for PC.
- constant creation using assignment
	The files say a=b when they could instead say #define a b.
	There is no reason to have both mechanisms.
- R(0) to refer to R0.
	Some macros use this to a great extent. Again, it's easy just to
	use a #define to rename a register.

Change-Id: I002335ace8e876c5b63c71c2560533eb835346d2
Reviewed-on: https://go-review.googlesource.com/4822
Reviewed-by: Dave Cheney <dave@cheney.net>
2015-02-13 23:08:51 +00:00

262 lines
5.8 KiB
ArmAsm

// Inferno's libkern/memmove-arm.s
// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
// Portions Copyright 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "textflag.h"
// TE or TS are spilled to the stack during bulk register moves.
#define TS R0
#define TE R8
// Warning: the linker will use R11 to synthesize certain instructions. Please
// take care and double check with objdump.
#define FROM R11
#define N R12
#define TMP R12 /* N and TMP don't overlap */
#define TMP1 R5
#define RSHIFT R5
#define LSHIFT R6
#define OFFSET R7
#define BR0 R0 /* shared with TS */
#define BW0 R1
#define BR1 R1
#define BW1 R2
#define BR2 R2
#define BW2 R3
#define BR3 R3
#define BW3 R4
#define FW0 R1
#define FR0 R2
#define FW1 R2
#define FR1 R3
#define FW2 R3
#define FR2 R4
#define FW3 R4
#define FR3 R8 /* shared with TE */
TEXT runtime·memmove(SB), NOSPLIT, $4-12
_memmove:
MOVW to+0(FP), TS
MOVW from+4(FP), FROM
MOVW n+8(FP), N
ADD N, TS, TE /* to end pointer */
CMP FROM, TS
BLS _forward
_back:
ADD N, FROM /* from end pointer */
CMP $4, N /* need at least 4 bytes to copy */
BLT _b1tail
_b4align: /* align destination on 4 */
AND.S $3, TE, TMP
BEQ _b4aligned
MOVBU.W -1(FROM), TMP /* pre-indexed */
MOVBU.W TMP, -1(TE) /* pre-indexed */
B _b4align
_b4aligned: /* is source now aligned? */
AND.S $3, FROM, TMP
BNE _bunaligned
ADD $31, TS, TMP /* do 32-byte chunks if possible */
MOVW TS, savedts-4(SP)
_b32loop:
CMP TMP, TE
BLS _b4tail
MOVM.DB.W (FROM), [R0-R7]
MOVM.DB.W [R0-R7], (TE)
B _b32loop
_b4tail: /* do remaining words if possible */
MOVW savedts-4(SP), TS
ADD $3, TS, TMP
_b4loop:
CMP TMP, TE
BLS _b1tail
MOVW.W -4(FROM), TMP1 /* pre-indexed */
MOVW.W TMP1, -4(TE) /* pre-indexed */
B _b4loop
_b1tail: /* remaining bytes */
CMP TE, TS
BEQ _return
MOVBU.W -1(FROM), TMP /* pre-indexed */
MOVBU.W TMP, -1(TE) /* pre-indexed */
B _b1tail
_forward:
CMP $4, N /* need at least 4 bytes to copy */
BLT _f1tail
_f4align: /* align destination on 4 */
AND.S $3, TS, TMP
BEQ _f4aligned
MOVBU.P 1(FROM), TMP /* implicit write back */
MOVBU.P TMP, 1(TS) /* implicit write back */
B _f4align
_f4aligned: /* is source now aligned? */
AND.S $3, FROM, TMP
BNE _funaligned
SUB $31, TE, TMP /* do 32-byte chunks if possible */
MOVW TE, savedte-4(SP)
_f32loop:
CMP TMP, TS
BHS _f4tail
MOVM.IA.W (FROM), [R1-R8]
MOVM.IA.W [R1-R8], (TS)
B _f32loop
_f4tail:
MOVW savedte-4(SP), TE
SUB $3, TE, TMP /* do remaining words if possible */
_f4loop:
CMP TMP, TS
BHS _f1tail
MOVW.P 4(FROM), TMP1 /* implicit write back */
MOVW.P TMP1, 4(TS) /* implicit write back */
B _f4loop
_f1tail:
CMP TS, TE
BEQ _return
MOVBU.P 1(FROM), TMP /* implicit write back */
MOVBU.P TMP, 1(TS) /* implicit write back */
B _f1tail
_return:
MOVW to+0(FP), R0
RET
_bunaligned:
CMP $2, TMP /* is TMP < 2 ? */
MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */
MOVW.LT $24, LSHIFT
MOVW.LT $1, OFFSET
MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */
MOVW.EQ $16, LSHIFT
MOVW.EQ $2, OFFSET
MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */
MOVW.GT $8, LSHIFT
MOVW.GT $3, OFFSET
ADD $16, TS, TMP /* do 16-byte chunks if possible */
CMP TMP, TE
BLS _b1tail
BIC $3, FROM /* align source */
MOVW TS, savedts-4(SP)
MOVW (FROM), BR0 /* prime first block register */
_bu16loop:
CMP TMP, TE
BLS _bu1tail
MOVW BR0<<LSHIFT, BW3
MOVM.DB.W (FROM), [BR0-BR3]
ORR BR3>>RSHIFT, BW3
MOVW BR3<<LSHIFT, BW2
ORR BR2>>RSHIFT, BW2
MOVW BR2<<LSHIFT, BW1
ORR BR1>>RSHIFT, BW1
MOVW BR1<<LSHIFT, BW0
ORR BR0>>RSHIFT, BW0
MOVM.DB.W [BW0-BW3], (TE)
B _bu16loop
_bu1tail:
MOVW savedts-4(SP), TS
ADD OFFSET, FROM
B _b1tail
_funaligned:
CMP $2, TMP
MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */
MOVW.LT $24, LSHIFT
MOVW.LT $3, OFFSET
MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */
MOVW.EQ $16, LSHIFT
MOVW.EQ $2, OFFSET
MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */
MOVW.GT $8, LSHIFT
MOVW.GT $1, OFFSET
SUB $16, TE, TMP /* do 16-byte chunks if possible */
CMP TMP, TS
BHS _f1tail
BIC $3, FROM /* align source */
MOVW TE, savedte-4(SP)
MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */
_fu16loop:
CMP TMP, TS
BHS _fu1tail
MOVW FR3>>RSHIFT, FW0
MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
ORR FR0<<LSHIFT, FW0
MOVW FR0>>RSHIFT, FW1
ORR FR1<<LSHIFT, FW1
MOVW FR1>>RSHIFT, FW2
ORR FR2<<LSHIFT, FW2
MOVW FR2>>RSHIFT, FW3
ORR FR3<<LSHIFT, FW3
MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
B _fu16loop
_fu1tail:
MOVW savedte-4(SP), TE
SUB OFFSET, FROM
B _f1tail