diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s index de00983c7b9..fde00da0eac 100644 --- a/src/internal/bytealg/indexbyte_riscv64.s +++ b/src/internal/bytealg/indexbyte_riscv64.s @@ -10,33 +10,14 @@ TEXT ·IndexByte(SB),NOSPLIT,$0-40 // X11 = b_len // X12 = b_cap (unused) // X13 = byte to find - AND $0xff, X13 - MOV X10, X12 // store base for later - ADD X10, X11 // end - SUB $1, X10 - -loop: - ADD $1, X10 - BEQ X10, X11, notfound - MOVBU (X10), X14 - BNE X13, X14, loop - - SUB X12, X10 // remove base - RET - -notfound: - MOV $-1, X10 - RET - -TEXT ·IndexByteString(SB),NOSPLIT,$0-32 - // X10 = b_base - // X11 = b_len - // X12 = byte to find - AND $0xff, X12 + AND $0xff, X13, X12 // x12 byte to look for MOV X10, X13 // store base for later - ADD X10, X11 // end - SUB $1, X10 + SLTI $24, X11, X14 + ADD X10, X11 // end + BEQZ X14, bigBody + + SUB $1, X10 loop: ADD $1, X10 BEQ X10, X11, notfound @@ -49,3 +30,110 @@ loop: notfound: MOV $-1, X10 RET + +bigBody: + JMP indexByteBig<>(SB) + +TEXT ·IndexByteString(SB),NOSPLIT,$0-32 + // X10 = b_base + // X11 = b_len + // X12 = byte to find + + AND $0xff, X12 // x12 byte to look for + MOV X10, X13 // store base for later + + SLTI $24, X11, X14 + ADD X10, X11 // end + BEQZ X14, bigBody + + SUB $1, X10 +loop: + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X12, X14, loop + + SUB X13, X10 // remove base + RET + +notfound: + MOV $-1, X10 + RET + +bigBody: + JMP indexByteBig<>(SB) + +TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0 + // On entry + // X10 = b_base + // X11 = end + // X12 = byte to find + // X13 = b_base + // X11 is at least 16 bytes > X10 + + // On exit + // X10 = index of first instance of sought byte, if found, or -1 otherwise + + // Process the first few bytes until we get to an 8 byte boundary + // No need to check for end here as we have at least 16 bytes in + // the buffer. + +unalignedloop: + AND $7, X10, X14 + BEQZ X14, aligned + MOVBU (X10), X14 + BEQ X12, X14, found + ADD $1, X10 + JMP unalignedloop + +aligned: + AND $~7, X11, X15 // X15 = end of aligned data + + // We have at least 9 bytes left + + // Use 'Determine if a word has a byte equal to n' bit hack from + // https://graphics.stanford.edu/~seander/bithacks.html to determine + // whether the byte is present somewhere in the next 8 bytes of the + // array. + + MOV $0x0101010101010101, X16 + SLLI $7, X16, X17 // X17 = 0x8080808080808080 + + MUL X12, X16, X18 // broadcast X12 to every byte in X18 + +alignedloop: + MOV (X10), X14 + XOR X14, X18, X19 + + // If the LSB in X12 is present somewhere in the 8 bytes we've just + // loaded into X14 then at least one of the bytes in X19 will be 0 + // after the XOR. If any of the bytes in X19 are zero then + // + // ((X19 - X16) & (~X19) & X17) + // + // will be non-zero. The expression will evaluate to zero if none of + // the bytes in X19 are zero, i.e., X12 is not present in X14. + + SUB X16, X19, X20 + ANDN X19, X17, X21 + AND X20, X21 + BNEZ X21, tailloop // If X21 != 0 X12 is present in X14 + ADD $8, X10 + BNE X10, X15, alignedloop + +tailloop: + SUB $1, X10 + +loop: + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X12, X14, loop + +found: + SUB X13, X10 // remove base + RET + +notfound: + MOV $-1, X10 + RET