mirror of
https://github.com/golang/go
synced 2024-11-19 16:24:45 -07:00
runtime: improve IndexByte for ppc64x
This change adds a better implementation of IndexByte in asm that uses the vector registers/instructions on ppc64x. benchmark old ns/op new ns/op delta BenchmarkIndexByte/10-8 9.70 9.37 -3.40% BenchmarkIndexByte/32-8 10.9 10.9 +0.00% BenchmarkIndexByte/4K-8 254 92.8 -63.46% BenchmarkIndexByte/4M-8 249246 118435 -52.48% BenchmarkIndexByte/64M-8 10737987 7383096 -31.24% benchmark old MB/s new MB/s speedup BenchmarkIndexByte/10-8 1030.63 1067.24 1.04x BenchmarkIndexByte/32-8 2922.69 2928.53 1.00x BenchmarkIndexByte/4K-8 16065.95 44156.45 2.75x BenchmarkIndexByte/4M-8 16827.96 35414.21 2.10x BenchmarkIndexByte/64M-8 6249.67 9089.53 1.45x Change-Id: I81dbdd620f7bb4e395ce4d1f2a14e8e91e39f9a1 Reviewed-on: https://go-review.googlesource.com/71710 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
4fcc835971
commit
be943df588
6
src/cmd/asm/internal/asm/testdata/ppc64.s
vendored
6
src/cmd/asm/internal/asm/testdata/ppc64.s
vendored
@ -932,6 +932,12 @@ label1:
|
|||||||
// <mnemonic> VRT,VRA,VRB,VRC
|
// <mnemonic> VRT,VRA,VRB,VRC
|
||||||
VPERM V3, V2, V1, V0
|
VPERM V3, V2, V1, V0
|
||||||
|
|
||||||
|
// Vector bit permute, VX-form
|
||||||
|
// <MNEMONIC> VRA,VRB,VRT produces
|
||||||
|
// <mnemonic> VRT,VRA,VRB
|
||||||
|
VBPERMQ V3,V1,V2
|
||||||
|
VBPERMD V3,V1,V2
|
||||||
|
|
||||||
// Vector select, VA-form
|
// Vector select, VA-form
|
||||||
// <MNEMONIC> VRA,VRB,VRC,VRT produces
|
// <MNEMONIC> VRA,VRB,VRC,VRT produces
|
||||||
// <mnemonic> VRT,VRA,VRB,VRC
|
// <mnemonic> VRT,VRA,VRB,VRC
|
||||||
|
@ -859,6 +859,8 @@ const (
|
|||||||
AVCMPNEZB
|
AVCMPNEZB
|
||||||
AVCMPNEZBCC
|
AVCMPNEZBCC
|
||||||
AVPERM
|
AVPERM
|
||||||
|
AVBPERMQ
|
||||||
|
AVBPERMD
|
||||||
AVSEL
|
AVSEL
|
||||||
AVSPLT
|
AVSPLT
|
||||||
AVSPLTB
|
AVSPLTB
|
||||||
|
@ -474,6 +474,8 @@ var Anames = []string{
|
|||||||
"VCMPNEZB",
|
"VCMPNEZB",
|
||||||
"VCMPNEZBCC",
|
"VCMPNEZBCC",
|
||||||
"VPERM",
|
"VPERM",
|
||||||
|
"VBPERMQ",
|
||||||
|
"VBPERMD",
|
||||||
"VSEL",
|
"VSEL",
|
||||||
"VSPLT",
|
"VSPLT",
|
||||||
"VSPLTB",
|
"VSPLTB",
|
||||||
|
@ -421,6 +421,9 @@ var optab = []Optab{
|
|||||||
/* Vector permute */
|
/* Vector permute */
|
||||||
{AVPERM, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector permute, va-form */
|
{AVPERM, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector permute, va-form */
|
||||||
|
|
||||||
|
/* Vector bit permute */
|
||||||
|
{AVBPERMQ, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector bit permute, vx-form */
|
||||||
|
|
||||||
/* Vector select */
|
/* Vector select */
|
||||||
{AVSEL, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector select, va-form */
|
{AVSEL, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector select, va-form */
|
||||||
|
|
||||||
@ -1378,6 +1381,9 @@ func buildop(ctxt *obj.Link) {
|
|||||||
case AVPERM: /* vperm */
|
case AVPERM: /* vperm */
|
||||||
opset(AVPERM, r0)
|
opset(AVPERM, r0)
|
||||||
|
|
||||||
|
case AVBPERMQ: /* vbpermq, vbpermd */
|
||||||
|
opset(AVBPERMD, r0)
|
||||||
|
|
||||||
case AVSEL: /* vsel */
|
case AVSEL: /* vsel */
|
||||||
opset(AVSEL, r0)
|
opset(AVSEL, r0)
|
||||||
|
|
||||||
@ -4165,6 +4171,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
|
|||||||
case AVSRAD:
|
case AVSRAD:
|
||||||
return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */
|
return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */
|
||||||
|
|
||||||
|
case AVBPERMQ:
|
||||||
|
return OPVC(4, 1356, 0, 0) /* vbpermq - v2.07 */
|
||||||
|
case AVBPERMD:
|
||||||
|
return OPVC(4, 1484, 0, 0) /* vbpermd - v3.00 */
|
||||||
|
|
||||||
case AVCLZB:
|
case AVCLZB:
|
||||||
return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */
|
return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */
|
||||||
case AVCLZH:
|
case AVCLZH:
|
||||||
|
@ -1084,24 +1084,17 @@ TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
|
|||||||
|
|
||||||
TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
|
TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||||
DCBT (R3) // Prepare cache line.
|
DCBT (R3) // Prepare cache line.
|
||||||
MOVD R3,R10 // Save base address for calculating the index later.
|
MOVD R3,R17 // Save base address for calculating the index later.
|
||||||
RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8.
|
RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8.
|
||||||
RLDIMI $8,R5,$48,R5 // Replicating the byte across the register.
|
RLDIMI $8,R5,$48,R5 // Replicating the byte across the register.
|
||||||
|
ADD R4,R3,R7 // Last acceptable address in R7.
|
||||||
// Calculate last acceptable address and check for possible overflow
|
|
||||||
// using a saturated add.
|
|
||||||
// Overflows set last acceptable address to 0xffffffffffffffff.
|
|
||||||
ADD R4,R3,R7
|
|
||||||
SUBC R3,R7,R6
|
|
||||||
SUBE R0,R0,R9
|
|
||||||
MOVW R9,R6
|
|
||||||
OR R6,R7,R7
|
|
||||||
|
|
||||||
RLDIMI $16,R5,$32,R5
|
RLDIMI $16,R5,$32,R5
|
||||||
CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently.
|
CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently.
|
||||||
MOVD $-1,R9
|
MOVD $-1,R9
|
||||||
WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28).
|
WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28).
|
||||||
RLDIMI $32,R5,$0,R5
|
RLDIMI $32,R5,$0,R5
|
||||||
|
MOVD R7,R10 // Save last acceptable address in R10 for later.
|
||||||
ADD $-1,R7,R7
|
ADD $-1,R7,R7
|
||||||
#ifdef GOARCH_ppc64le
|
#ifdef GOARCH_ppc64le
|
||||||
SLD R6,R9,R9 // Prepare mask for Little Endian
|
SLD R6,R9,R9 // Prepare mask for Little Endian
|
||||||
@ -1110,56 +1103,142 @@ TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
|
|||||||
#endif
|
#endif
|
||||||
BLE small_string // Jump to the small string case if it's <32 bytes.
|
BLE small_string // Jump to the small string case if it's <32 bytes.
|
||||||
|
|
||||||
// Case for length >32 bytes
|
// If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
|
||||||
|
// in V0, V1 and V10, then branch to the preloop.
|
||||||
|
ANDCC $63,R3,R11
|
||||||
|
BEQ CR0,qw_align
|
||||||
|
RLDICL $0,R3,$61,R11
|
||||||
|
|
||||||
MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8.
|
MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8.
|
||||||
CMPB R12,R5,R3 // Check for a match.
|
CMPB R12,R5,R3 // Check for a match.
|
||||||
AND R9,R3,R3 // Mask bytes below s_base
|
AND R9,R3,R3 // Mask bytes below s_base
|
||||||
RLDICL $0,R7,$61,R4 // length-1
|
RLDICL $0,R7,$61,R6 // length-1
|
||||||
RLDICR $0,R7,$60,R7 // Last doubleword in R7
|
RLDICR $0,R7,$60,R7 // Last doubleword in R7
|
||||||
CMPU R3,$0,CR7 // If we have a match, jump to the final computation
|
CMPU R3,$0,CR7 // If we have a match, jump to the final computation
|
||||||
BNE CR7,done
|
BNE CR7,done
|
||||||
|
ADD $8,R8,R8
|
||||||
|
ADD $-8,R4,R4
|
||||||
|
ADD R4,R11,R4
|
||||||
|
|
||||||
// Check for doubleword alignment and jump to the loop setup if aligned.
|
// Check for quadword alignment
|
||||||
MOVFL R8,CR7
|
ANDCC $15,R8,R11
|
||||||
BC 12,28,loop_setup
|
BEQ CR0,qw_align
|
||||||
|
|
||||||
// Not aligned, so handle the second doubleword
|
// Not aligned, so handle the next doubleword
|
||||||
MOVDU 8(R8),R12
|
MOVD 0(R8),R12
|
||||||
CMPB R12,R5,R3
|
CMPB R12,R5,R3
|
||||||
CMPU R3,$0,CR7
|
CMPU R3,$0,CR7
|
||||||
BNE CR7,done
|
BNE CR7,done
|
||||||
|
ADD $8,R8,R8
|
||||||
|
ADD $-8,R4,R4
|
||||||
|
|
||||||
loop_setup:
|
// Either quadword aligned or 64-byte at this point. We can use LVX.
|
||||||
// We are now aligned to a 16-byte boundary. We will load two doublewords
|
qw_align:
|
||||||
// per loop iteration. The last doubleword is in R7, so our loop counter
|
|
||||||
// starts at (R7-R8)/16.
|
|
||||||
SUB R8,R7,R6
|
|
||||||
SRD $4,R6,R6
|
|
||||||
MOVD R6,CTR
|
|
||||||
|
|
||||||
// Note: when we have an align directive, align this loop to 32 bytes so
|
// Set up auxiliary data for the vectorized algorithm.
|
||||||
// it fits in a single icache sector.
|
VSPLTISB $0,V0 // Replicate 0 across V0
|
||||||
|
VSPLTISB $3,V10 // Use V10 as control for VBPERMQ
|
||||||
|
MTVRD R5,V1
|
||||||
|
LVSL (R0+R0),V11
|
||||||
|
VSLB V11,V10,V10
|
||||||
|
VSPLTB $7,V1,V1 // Replicate byte across V1
|
||||||
|
CMPU R4, $64 // If len <= 64, don't use the vectorized loop
|
||||||
|
BLE tail
|
||||||
|
|
||||||
|
// We will load 4 quardwords per iteration in the loop, so check for
|
||||||
|
// 64-byte alignment. If 64-byte aligned, then branch to the preloop.
|
||||||
|
ANDCC $63,R8,R11
|
||||||
|
BEQ CR0,preloop
|
||||||
|
|
||||||
|
// Not 64-byte aligned. Load one quadword at a time until aligned.
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6 // Check for byte in V4
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
ADD $16,R8,R8
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
|
||||||
|
ANDCC $63,R8,R11
|
||||||
|
BEQ CR0,preloop
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6 // Check for byte in V4
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
ADD $16,R8,R8
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
|
||||||
|
ANDCC $63,R8,R11
|
||||||
|
BEQ CR0,preloop
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6 // Check for byte in V4
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
ADD $16,R8,R8
|
||||||
|
|
||||||
|
// 64-byte aligned. Prepare for the main loop.
|
||||||
|
preloop:
|
||||||
|
CMPU R4,$64
|
||||||
|
BLE tail // If len <= 64, don't use the vectorized loop
|
||||||
|
|
||||||
|
// We are now aligned to a 64-byte boundary. We will load 4 quadwords
|
||||||
|
// per loop iteration. The last doubleword is in R10, so our loop counter
|
||||||
|
// starts at (R10-R8)/64.
|
||||||
|
SUB R8,R10,R6
|
||||||
|
SRD $6,R6,R9 // Loop counter in R9
|
||||||
|
MOVD R9,CTR
|
||||||
|
|
||||||
|
MOVD $16,R11 // Load offsets for the vector loads
|
||||||
|
MOVD $32,R9
|
||||||
|
MOVD $48,R7
|
||||||
|
|
||||||
|
// Main loop we will load 64 bytes per iteration
|
||||||
loop:
|
loop:
|
||||||
// Load two doublewords, then compare and merge in a single register. We
|
LVX (R8+R0),V2 // Load 4 16-byte vectors
|
||||||
// will check two doublewords per iteration, then find out which of them
|
LVX (R11+R8),V3
|
||||||
// contains the byte later. This speeds up the search.
|
LVX (R9+R8),V4
|
||||||
MOVD 8(R8),R12
|
LVX (R7+R8),V5
|
||||||
MOVDU 16(R8),R11
|
VCMPEQUB V1,V2,V6 // Look for byte in each vector
|
||||||
CMPB R12,R5,R3
|
VCMPEQUB V1,V3,V7
|
||||||
CMPB R11,R5,R9
|
VCMPEQUB V1,V4,V8
|
||||||
OR R3,R9,R6
|
VCMPEQUB V1,V5,V9
|
||||||
CMPU R6,$0,CR7
|
VOR V6,V7,V11 // Compress the result in a single vector
|
||||||
BNE CR7,found
|
VOR V8,V9,V12
|
||||||
BC 16,0,loop
|
VOR V11,V12,V11
|
||||||
|
VCMPEQUBCC V0,V11,V11 // Check for byte
|
||||||
|
BGE CR6,found
|
||||||
|
ADD $64,R8,R8
|
||||||
|
BC 16,0,loop // bdnz loop
|
||||||
|
|
||||||
// Counter zeroed, but we may have another doubleword to read
|
// Handle the tailing bytes or R4 <= 64
|
||||||
CMPU R8,R7
|
RLDICL $0,R6,$58,R4
|
||||||
BEQ notfound
|
tail:
|
||||||
|
CMPU R4,$0
|
||||||
|
BEQ notfound
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
ADD $16,R8,R8
|
||||||
|
CMPU R4,$16,CR6
|
||||||
|
BLE CR6,notfound
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
|
||||||
MOVDU 8(R8),R12
|
LVX (R8+R0),V4
|
||||||
CMPB R12,R5,R3
|
VCMPEQUBCC V1,V4,V6
|
||||||
CMPU R3,$0,CR6
|
BNE CR6,found_qw_align
|
||||||
BNE CR6,done
|
ADD $16,R8,R8
|
||||||
|
CMPU R4,$16,CR6
|
||||||
|
BLE CR6,notfound
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
ADD $16,R8,R8
|
||||||
|
CMPU R4,$16,CR6
|
||||||
|
BLE CR6,notfound
|
||||||
|
ADD $-16,R4,R4
|
||||||
|
|
||||||
|
LVX (R8+R0),V4
|
||||||
|
VCMPEQUBCC V1,V4,V6
|
||||||
|
BNE CR6,found_qw_align
|
||||||
|
|
||||||
notfound:
|
notfound:
|
||||||
MOVD $-1,R3
|
MOVD $-1,R3
|
||||||
@ -1167,15 +1246,68 @@ notfound:
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
found:
|
found:
|
||||||
// One of the doublewords from the loop contains the byte we are looking
|
// We will now compress the results into a single doubleword,
|
||||||
// for. Check the first doubleword and adjust the address if found.
|
// so it can be moved to a GPR for the final index calculation.
|
||||||
CMPU R3,$0,CR6
|
|
||||||
ADD $-8,R8,R8
|
|
||||||
BNE CR6,done
|
|
||||||
|
|
||||||
// Not found, so it must be in the second doubleword of the merged pair.
|
// The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
|
||||||
MOVD R9,R3
|
// first bit of each byte into bits 48-63.
|
||||||
ADD $8,R8,R8
|
VBPERMQ V6,V10,V6
|
||||||
|
VBPERMQ V7,V10,V7
|
||||||
|
VBPERMQ V8,V10,V8
|
||||||
|
VBPERMQ V9,V10,V9
|
||||||
|
|
||||||
|
// Shift each 16-bit component into its correct position for
|
||||||
|
// merging into a single doubleword.
|
||||||
|
#ifdef GOARCH_ppc64le
|
||||||
|
VSLDOI $2,V7,V7,V7
|
||||||
|
VSLDOI $4,V8,V8,V8
|
||||||
|
VSLDOI $6,V9,V9,V9
|
||||||
|
#else
|
||||||
|
VSLDOI $6,V6,V6,V6
|
||||||
|
VSLDOI $4,V7,V7,V7
|
||||||
|
VSLDOI $2,V8,V8,V8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Merge V6-V9 into a single doubleword and move to a GPR.
|
||||||
|
VOR V6,V7,V11
|
||||||
|
VOR V8,V9,V4
|
||||||
|
VOR V4,V11,V4
|
||||||
|
MFVRD V4,R3
|
||||||
|
|
||||||
|
#ifdef GOARCH_ppc64le
|
||||||
|
ADD $-1,R3,R11
|
||||||
|
ANDN R3,R11,R11
|
||||||
|
POPCNTD R11,R11 // Count trailing zeros (Little Endian).
|
||||||
|
#else
|
||||||
|
CNTLZD R3,R11 // Count leading zeros (Big Endian).
|
||||||
|
#endif
|
||||||
|
ADD R8,R11,R3 // Calculate byte address
|
||||||
|
|
||||||
|
return:
|
||||||
|
SUB R17,R3
|
||||||
|
MOVD R3,(R14)
|
||||||
|
RET
|
||||||
|
|
||||||
|
found_qw_align:
|
||||||
|
// Use the same algorithm as above. Compress the result into
|
||||||
|
// a single doubleword and move it to a GPR for the final
|
||||||
|
// calculation.
|
||||||
|
VBPERMQ V6,V10,V6
|
||||||
|
|
||||||
|
#ifdef GOARCH_ppc64le
|
||||||
|
MFVRD V6,R3
|
||||||
|
ADD $-1,R3,R11
|
||||||
|
ANDN R3,R11,R11
|
||||||
|
POPCNTD R11,R11
|
||||||
|
#else
|
||||||
|
VSLDOI $6,V6,V6,V6
|
||||||
|
MFVRD V6,R3
|
||||||
|
CNTLZD R3,R11
|
||||||
|
#endif
|
||||||
|
ADD R8,R11,R3
|
||||||
|
CMPU R11,R4
|
||||||
|
BLT return
|
||||||
|
BR notfound
|
||||||
|
|
||||||
done:
|
done:
|
||||||
// At this point, R3 has 0xFF in the same position as the byte we are
|
// At this point, R3 has 0xFF in the same position as the byte we are
|
||||||
@ -1191,17 +1323,10 @@ done:
|
|||||||
CMPU R8,R7 // Check if we are at the last doubleword.
|
CMPU R8,R7 // Check if we are at the last doubleword.
|
||||||
SRD $3,R11 // Convert trailing zeros to bytes.
|
SRD $3,R11 // Convert trailing zeros to bytes.
|
||||||
ADD R11,R8,R3
|
ADD R11,R8,R3
|
||||||
CMPU R11,R4,CR7 // If at the last doubleword, check the byte offset.
|
CMPU R11,R6,CR7 // If at the last doubleword, check the byte offset.
|
||||||
BNE return
|
BNE return
|
||||||
BLE CR7,return
|
BLE CR7,return
|
||||||
MOVD $-1,R3
|
BR notfound
|
||||||
MOVD R3,(R14)
|
|
||||||
RET
|
|
||||||
|
|
||||||
return:
|
|
||||||
SUB R10,R3 // Calculate index.
|
|
||||||
MOVD R3,(R14)
|
|
||||||
RET
|
|
||||||
|
|
||||||
small_string:
|
small_string:
|
||||||
// We unroll this loop for better performance.
|
// We unroll this loop for better performance.
|
||||||
@ -1212,9 +1337,9 @@ small_string:
|
|||||||
CMPB R12,R5,R3 // Check for a match.
|
CMPB R12,R5,R3 // Check for a match.
|
||||||
AND R9,R3,R3 // Mask bytes below s_base.
|
AND R9,R3,R3 // Mask bytes below s_base.
|
||||||
CMPU R3,$0,CR7 // If we have a match, jump to the final computation.
|
CMPU R3,$0,CR7 // If we have a match, jump to the final computation.
|
||||||
RLDICL $0,R7,$61,R4 // length-1
|
RLDICL $0,R7,$61,R6 // length-1
|
||||||
RLDICR $0,R7,$60,R7 // Last doubleword in R7.
|
RLDICR $0,R7,$60,R7 // Last doubleword in R7.
|
||||||
CMPU R8,R7
|
CMPU R8,R7
|
||||||
BNE CR7,done
|
BNE CR7,done
|
||||||
BEQ notfound // Hit length.
|
BEQ notfound // Hit length.
|
||||||
|
|
||||||
@ -1242,7 +1367,6 @@ small_string:
|
|||||||
MOVDU 8(R8),R12
|
MOVDU 8(R8),R12
|
||||||
CMPB R12,R5,R3
|
CMPB R12,R5,R3
|
||||||
CMPU R3,$0,CR6
|
CMPU R3,$0,CR6
|
||||||
CMPU R8,R7
|
|
||||||
BNE CR6,done
|
BNE CR6,done
|
||||||
BR notfound
|
BR notfound
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user