mirror of
https://github.com/golang/go
synced 2024-11-25 07:57:56 -07:00
bytes: move IndexByte assembly to pkg runtime
Per suggestion from Russ in February. Then strings.IndexByte can be implemented in terms of the shared code in pkg runtime. Update #3751 R=golang-dev, r CC=golang-dev https://golang.org/cl/12289043
This commit is contained in:
parent
39679ca88f
commit
e2a1bd68b3
@ -1,17 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
TEXT ·IndexByte(SB),7,$0
|
||||
MOVL s+0(FP), SI
|
||||
MOVL s_len+4(FP), CX
|
||||
MOVB c+12(FP), AL
|
||||
MOVL SI, DI
|
||||
CLD; REPN; SCASB
|
||||
JZ 3(PC)
|
||||
MOVL $-1, ret+16(FP)
|
||||
RET
|
||||
SUBL SI, DI
|
||||
SUBL $1, DI
|
||||
MOVL DI, ret+16(FP)
|
||||
RET
|
@ -1,91 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
TEXT ·IndexByte(SB),7,$0
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), BX
|
||||
MOVB c+24(FP), AL
|
||||
MOVQ SI, DI
|
||||
|
||||
CMPQ BX, $16
|
||||
JLT small
|
||||
|
||||
// round up to first 16-byte boundary
|
||||
TESTQ $15, SI
|
||||
JZ aligned
|
||||
MOVQ SI, CX
|
||||
ANDQ $~15, CX
|
||||
ADDQ $16, CX
|
||||
|
||||
// search the beginning
|
||||
SUBQ SI, CX
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
|
||||
// DI is 16-byte aligned; get ready to search using SSE instructions
|
||||
aligned:
|
||||
// round down to last 16-byte boundary
|
||||
MOVQ BX, R11
|
||||
ADDQ SI, R11
|
||||
ANDQ $~15, R11
|
||||
|
||||
// shuffle X0 around so that each byte contains c
|
||||
MOVD AX, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PSHUFL $0, X0, X0
|
||||
JMP condition
|
||||
|
||||
sse:
|
||||
// move the next 16-byte chunk of the buffer into X1
|
||||
MOVO (DI), X1
|
||||
// compare bytes in X0 to X1
|
||||
PCMPEQB X0, X1
|
||||
// take the top bit of each byte in X1 and put the result in DX
|
||||
PMOVMSKB X1, DX
|
||||
TESTL DX, DX
|
||||
JNZ ssesuccess
|
||||
ADDQ $16, DI
|
||||
|
||||
condition:
|
||||
CMPQ DI, R11
|
||||
JLT sse
|
||||
|
||||
// search the end
|
||||
MOVQ SI, CX
|
||||
ADDQ BX, CX
|
||||
SUBQ R11, CX
|
||||
// if CX == 0, the zero flag will be set and we'll end up
|
||||
// returning a false success
|
||||
JZ failure
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
|
||||
failure:
|
||||
MOVQ $-1, ret+32(FP)
|
||||
RET
|
||||
|
||||
// handle for lengths < 16
|
||||
small:
|
||||
MOVQ BX, CX
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
MOVQ $-1, ret+32(FP)
|
||||
RET
|
||||
|
||||
// we've found the chunk containing the byte
|
||||
// now just figure out which specific byte it is
|
||||
ssesuccess:
|
||||
// get the index of the least significant set bit
|
||||
BSFW DX, DX
|
||||
SUBQ SI, DI
|
||||
ADDQ DI, DX
|
||||
MOVQ DX, ret+32(FP)
|
||||
RET
|
||||
|
||||
success:
|
||||
SUBQ SI, DI
|
||||
SUBL $1, DI
|
||||
MOVQ DI, ret+32(FP)
|
||||
RET
|
@ -1,56 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
TEXT ·IndexByte(SB),7,$0
|
||||
MOVW s+0(FP), R0
|
||||
MOVW s_len+4(FP), R1
|
||||
MOVBU c+12(FP), R2 // byte to find
|
||||
MOVW R0, R4 // store base for later
|
||||
ADD R0, R1 // end
|
||||
|
||||
_loop:
|
||||
CMP R0, R1
|
||||
B.EQ _notfound
|
||||
MOVBU.P 1(R0), R3
|
||||
CMP R2, R3
|
||||
B.NE _loop
|
||||
|
||||
SUB $1, R0 // R0 will be one beyond the position we want
|
||||
SUB R4, R0 // remove base
|
||||
MOVW R0, ret+16(FP)
|
||||
RET
|
||||
|
||||
_notfound:
|
||||
MOVW $-1, R0
|
||||
MOVW R0, ret+16(FP)
|
||||
RET
|
||||
|
||||
TEXT ·Equal(SB),7,$0
|
||||
MOVW a_len+4(FP), R1
|
||||
MOVW b_len+16(FP), R3
|
||||
|
||||
CMP R1, R3 // unequal lengths are not equal
|
||||
B.NE _notequal
|
||||
|
||||
MOVW a+0(FP), R0
|
||||
MOVW b+12(FP), R2
|
||||
ADD R0, R1 // end
|
||||
|
||||
_next:
|
||||
CMP R0, R1
|
||||
B.EQ _equal // reached the end
|
||||
MOVBU.P 1(R0), R4
|
||||
MOVBU.P 1(R2), R5
|
||||
CMP R4, R5
|
||||
B.EQ _next
|
||||
|
||||
_notequal:
|
||||
MOVW $0, R0
|
||||
MOVBU R0, ret+24(FP)
|
||||
RET
|
||||
|
||||
_equal:
|
||||
MOVW $1, R0
|
||||
MOVBU R0, ret+24(FP)
|
||||
RET
|
5
src/pkg/bytes/bytes.s
Normal file
5
src/pkg/bytes/bytes.s
Normal file
@ -0,0 +1,5 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file is here just to make the go tool happy.
|
@ -7,13 +7,13 @@ package bytes
|
||||
//go:noescape
|
||||
|
||||
// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
|
||||
func IndexByte(s []byte, c byte) int // asm_$GOARCH.s
|
||||
func IndexByte(s []byte, c byte) int // ../runtime/asm_$GOARCH.s
|
||||
|
||||
//go:noescape
|
||||
|
||||
// Equal returns a boolean reporting whether a == b.
|
||||
// A nil argument is equivalent to an empty slice.
|
||||
func Equal(a, b []byte) bool // asm_arm.s or ../runtime/asm_{386,amd64}.s
|
||||
func Equal(a, b []byte) bool // ../runtime/asm_$GOARCH.s
|
||||
|
||||
//go:noescape
|
||||
|
||||
|
@ -1117,6 +1117,20 @@ TEXT bytes·Compare(SB),7,$0-28
|
||||
MOVL AX, res+24(FP)
|
||||
RET
|
||||
|
||||
TEXT bytes·IndexByte(SB),7,$0
|
||||
MOVL s+0(FP), SI
|
||||
MOVL s_len+4(FP), CX
|
||||
MOVB c+12(FP), AL
|
||||
MOVL SI, DI
|
||||
CLD; REPN; SCASB
|
||||
JZ 3(PC)
|
||||
MOVL $-1, ret+16(FP)
|
||||
RET
|
||||
SUBL SI, DI
|
||||
SUBL $1, DI
|
||||
MOVL DI, ret+16(FP)
|
||||
RET
|
||||
|
||||
// input:
|
||||
// SI = a
|
||||
// DI = b
|
||||
|
@ -908,19 +908,6 @@ TEXT runtime·memeq(SB),7,$0-24
|
||||
MOVQ count+16(FP), BX
|
||||
JMP runtime·memeqbody(SB)
|
||||
|
||||
TEXT bytes·Equal(SB),7,$0-49
|
||||
MOVQ a_len+8(FP), BX
|
||||
MOVQ b_len+32(FP), CX
|
||||
XORQ AX, AX
|
||||
CMPQ BX, CX
|
||||
JNE eqret
|
||||
MOVQ a+0(FP), SI
|
||||
MOVQ b+24(FP), DI
|
||||
CALL runtime·memeqbody(SB)
|
||||
eqret:
|
||||
MOVB AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
// a in SI
|
||||
// b in DI
|
||||
// count in BX
|
||||
@ -1142,3 +1129,104 @@ cmp_allsame:
|
||||
SETEQ CX // 1 if alen == blen
|
||||
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
|
||||
RET
|
||||
|
||||
TEXT bytes·IndexByte(SB),7,$0
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), BX
|
||||
MOVB c+24(FP), AL
|
||||
MOVQ SI, DI
|
||||
|
||||
CMPQ BX, $16
|
||||
JLT indexbyte_small
|
||||
|
||||
// round up to first 16-byte boundary
|
||||
TESTQ $15, SI
|
||||
JZ aligned
|
||||
MOVQ SI, CX
|
||||
ANDQ $~15, CX
|
||||
ADDQ $16, CX
|
||||
|
||||
// search the beginning
|
||||
SUBQ SI, CX
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
|
||||
// DI is 16-byte aligned; get ready to search using SSE instructions
|
||||
aligned:
|
||||
// round down to last 16-byte boundary
|
||||
MOVQ BX, R11
|
||||
ADDQ SI, R11
|
||||
ANDQ $~15, R11
|
||||
|
||||
// shuffle X0 around so that each byte contains c
|
||||
MOVD AX, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PSHUFL $0, X0, X0
|
||||
JMP condition
|
||||
|
||||
sse:
|
||||
// move the next 16-byte chunk of the buffer into X1
|
||||
MOVO (DI), X1
|
||||
// compare bytes in X0 to X1
|
||||
PCMPEQB X0, X1
|
||||
// take the top bit of each byte in X1 and put the result in DX
|
||||
PMOVMSKB X1, DX
|
||||
TESTL DX, DX
|
||||
JNZ ssesuccess
|
||||
ADDQ $16, DI
|
||||
|
||||
condition:
|
||||
CMPQ DI, R11
|
||||
JLT sse
|
||||
|
||||
// search the end
|
||||
MOVQ SI, CX
|
||||
ADDQ BX, CX
|
||||
SUBQ R11, CX
|
||||
// if CX == 0, the zero flag will be set and we'll end up
|
||||
// returning a false success
|
||||
JZ failure
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
|
||||
failure:
|
||||
MOVQ $-1, ret+32(FP)
|
||||
RET
|
||||
|
||||
// handle for lengths < 16
|
||||
indexbyte_small:
|
||||
MOVQ BX, CX
|
||||
REPN; SCASB
|
||||
JZ success
|
||||
MOVQ $-1, ret+32(FP)
|
||||
RET
|
||||
|
||||
// we've found the chunk containing the byte
|
||||
// now just figure out which specific byte it is
|
||||
ssesuccess:
|
||||
// get the index of the least significant set bit
|
||||
BSFW DX, DX
|
||||
SUBQ SI, DI
|
||||
ADDQ DI, DX
|
||||
MOVQ DX, ret+32(FP)
|
||||
RET
|
||||
|
||||
success:
|
||||
SUBQ SI, DI
|
||||
SUBL $1, DI
|
||||
MOVQ DI, ret+32(FP)
|
||||
RET
|
||||
|
||||
TEXT bytes·Equal(SB),7,$0-49
|
||||
MOVQ a_len+8(FP), BX
|
||||
MOVQ b_len+32(FP), CX
|
||||
XORQ AX, AX
|
||||
CMPQ BX, CX
|
||||
JNE eqret
|
||||
MOVQ a+0(FP), SI
|
||||
MOVQ b+24(FP), DI
|
||||
CALL runtime·memeqbody(SB)
|
||||
eqret:
|
||||
MOVB AX, ret+48(FP)
|
||||
RET
|
||||
|
@ -514,3 +514,57 @@ _next:
|
||||
|
||||
MOVW $0, R0
|
||||
RET
|
||||
|
||||
// TODO: share code with memeq?
|
||||
TEXT bytes·Equal(SB),7,$0
|
||||
MOVW a_len+4(FP), R1
|
||||
MOVW b_len+16(FP), R3
|
||||
|
||||
CMP R1, R3 // unequal lengths are not equal
|
||||
B.NE _notequal
|
||||
|
||||
MOVW a+0(FP), R0
|
||||
MOVW b+12(FP), R2
|
||||
ADD R0, R1 // end
|
||||
|
||||
_byteseq_next:
|
||||
CMP R0, R1
|
||||
B.EQ _equal // reached the end
|
||||
MOVBU.P 1(R0), R4
|
||||
MOVBU.P 1(R2), R5
|
||||
CMP R4, R5
|
||||
B.EQ _byteseq_next
|
||||
|
||||
_notequal:
|
||||
MOVW $0, R0
|
||||
MOVBU R0, ret+24(FP)
|
||||
RET
|
||||
|
||||
_equal:
|
||||
MOVW $1, R0
|
||||
MOVBU R0, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT bytes·IndexByte(SB),7,$0
|
||||
MOVW s+0(FP), R0
|
||||
MOVW s_len+4(FP), R1
|
||||
MOVBU c+12(FP), R2 // byte to find
|
||||
MOVW R0, R4 // store base for later
|
||||
ADD R0, R1 // end
|
||||
|
||||
_loop:
|
||||
CMP R0, R1
|
||||
B.EQ _notfound
|
||||
MOVBU.P 1(R0), R3
|
||||
CMP R2, R3
|
||||
B.NE _loop
|
||||
|
||||
SUB $1, R0 // R0 will be one beyond the position we want
|
||||
SUB R4, R0 // remove base
|
||||
MOVW R0, ret+16(FP)
|
||||
RET
|
||||
|
||||
_notfound:
|
||||
MOVW $-1, R0
|
||||
MOVW R0, ret+16(FP)
|
||||
RET
|
||||
|
Loading…
Reference in New Issue
Block a user