mirror of
https://github.com/golang/go
synced 2024-10-04 16:31:22 -06:00
8303a13bb8
On Plan 9, the kernel disallows the use of floating point instructions while handling a note. Previously, we worked around this by using a simple loop in place of memmove. When I added that work-around, I verified that all paths from the note handler didn't end up calling memmove. Now that memclr is using SSE instructions, the same process will have to be done again. Instead of doing that, however, this CL just punts and uses unoptimized functions everywhere on Plan 9. LGTM=rsc R=rsc, 0intro CC=golang-codereviews https://golang.org/cl/73830044
128 lines
2.3 KiB
ArmAsm
128 lines
2.3 KiB
ArmAsm
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build !plan9
|
|
|
|
#include "../../cmd/ld/textflag.h"
|
|
|
|
// void runtime·memclr(void*, uintptr)
|
|
TEXT runtime·memclr(SB), NOSPLIT, $0-8
|
|
MOVL ptr+0(FP), DI
|
|
MOVL n+4(FP), BX
|
|
XORL AX, AX
|
|
|
|
// MOVOU seems always faster than REP STOSL.
|
|
clr_tail:
|
|
TESTL BX, BX
|
|
JEQ clr_0
|
|
CMPL BX, $2
|
|
JBE clr_1or2
|
|
CMPL BX, $4
|
|
JBE clr_3or4
|
|
CMPL BX, $8
|
|
JBE clr_5through8
|
|
CMPL BX, $16
|
|
JBE clr_9through16
|
|
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
|
|
JEQ nosse2
|
|
PXOR X0, X0
|
|
CMPL BX, $32
|
|
JBE clr_17through32
|
|
CMPL BX, $64
|
|
JBE clr_33through64
|
|
CMPL BX, $128
|
|
JBE clr_65through128
|
|
CMPL BX, $256
|
|
JBE clr_129through256
|
|
// TODO: use branch table and BSR to make this just a single dispatch
|
|
|
|
clr_loop:
|
|
MOVOU X0, 0(DI)
|
|
MOVOU X0, 16(DI)
|
|
MOVOU X0, 32(DI)
|
|
MOVOU X0, 48(DI)
|
|
MOVOU X0, 64(DI)
|
|
MOVOU X0, 80(DI)
|
|
MOVOU X0, 96(DI)
|
|
MOVOU X0, 112(DI)
|
|
MOVOU X0, 128(DI)
|
|
MOVOU X0, 144(DI)
|
|
MOVOU X0, 160(DI)
|
|
MOVOU X0, 176(DI)
|
|
MOVOU X0, 192(DI)
|
|
MOVOU X0, 208(DI)
|
|
MOVOU X0, 224(DI)
|
|
MOVOU X0, 240(DI)
|
|
SUBL $256, BX
|
|
ADDL $256, DI
|
|
CMPL BX, $256
|
|
JAE clr_loop
|
|
JMP clr_tail
|
|
|
|
clr_1or2:
|
|
MOVB AX, (DI)
|
|
MOVB AX, -1(DI)(BX*1)
|
|
clr_0:
|
|
RET
|
|
clr_3or4:
|
|
MOVW AX, (DI)
|
|
MOVW AX, -2(DI)(BX*1)
|
|
RET
|
|
clr_5through8:
|
|
MOVL AX, (DI)
|
|
MOVL AX, -4(DI)(BX*1)
|
|
RET
|
|
clr_9through16:
|
|
MOVL AX, (DI)
|
|
MOVL AX, 4(DI)
|
|
MOVL AX, -8(DI)(BX*1)
|
|
MOVL AX, -4(DI)(BX*1)
|
|
RET
|
|
clr_17through32:
|
|
MOVOU X0, (DI)
|
|
MOVOU X0, -16(DI)(BX*1)
|
|
RET
|
|
clr_33through64:
|
|
MOVOU X0, (DI)
|
|
MOVOU X0, 16(DI)
|
|
MOVOU X0, -32(DI)(BX*1)
|
|
MOVOU X0, -16(DI)(BX*1)
|
|
RET
|
|
clr_65through128:
|
|
MOVOU X0, (DI)
|
|
MOVOU X0, 16(DI)
|
|
MOVOU X0, 32(DI)
|
|
MOVOU X0, 48(DI)
|
|
MOVOU X0, -64(DI)(BX*1)
|
|
MOVOU X0, -48(DI)(BX*1)
|
|
MOVOU X0, -32(DI)(BX*1)
|
|
MOVOU X0, -16(DI)(BX*1)
|
|
RET
|
|
clr_129through256:
|
|
MOVOU X0, (DI)
|
|
MOVOU X0, 16(DI)
|
|
MOVOU X0, 32(DI)
|
|
MOVOU X0, 48(DI)
|
|
MOVOU X0, 64(DI)
|
|
MOVOU X0, 80(DI)
|
|
MOVOU X0, 96(DI)
|
|
MOVOU X0, 112(DI)
|
|
MOVOU X0, -128(DI)(BX*1)
|
|
MOVOU X0, -112(DI)(BX*1)
|
|
MOVOU X0, -96(DI)(BX*1)
|
|
MOVOU X0, -80(DI)(BX*1)
|
|
MOVOU X0, -64(DI)(BX*1)
|
|
MOVOU X0, -48(DI)(BX*1)
|
|
MOVOU X0, -32(DI)(BX*1)
|
|
MOVOU X0, -16(DI)(BX*1)
|
|
RET
|
|
nosse2:
|
|
MOVL BX, CX
|
|
SHRL $2, CX
|
|
REP
|
|
STOSL
|
|
ANDL $3, BX
|
|
JNE clr_tail
|
|
RET
|