diff --git a/src/crypto/aes/asm_ppc64le.s b/src/crypto/aes/asm_ppc64le.s index a69cb78c8b..f3a96a3a17 100644 --- a/src/crypto/aes/asm_ppc64le.s +++ b/src/crypto/aes/asm_ppc64le.s @@ -13,8 +13,8 @@ // Original code can be found at the link below: // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl -// I changed some function names in order to be more likely to go standards. -// For instance, function aes_p8_set_{en,de}crypt_key become +// Some function names were changed to be consistent with Go function +// names. For instance, function aes_p8_set_{en,de}crypt_key become // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts // and a new session was created (doEncryptKeyAsm). This was necessary to // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. @@ -50,452 +50,451 @@ #define BLK_ROUNDS R6 #define BLK_IDX R7 -DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON -DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON -DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 -DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 -DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK -DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK -DATA ·rcon+0x30(SB)/8, $0x0000000000000000 -DATA ·rcon+0x38(SB)/8, $0x0000000000000000 +DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON +DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON +DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 +DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 +DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK +DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK +DATA ·rcon+0x30(SB)/8, $0x0000000000000000 +DATA ·rcon+0x38(SB)/8, $0x0000000000000000 GLOBL ·rcon(SB), RODATA, $64 // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int -TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 +TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 // Load the arguments inside the registers - MOVD key+0(FP), INP - MOVD keylen+8(FP), BITS - MOVD enc+16(FP), OUT - JMP ·doEncryptKeyAsm(SB) + MOVD key+0(FP), INP + MOVD keylen+8(FP), BITS + MOVD enc+16(FP), OUT + JMP ·doEncryptKeyAsm(SB) // This text is used both setEncryptKeyAsm and setDecryptKeyAsm -TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 +TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm // Check arguments - MOVD $-1, PTR // li 6,-1 exit code to -1 (255) - CMPU INP, $0 // cmpldi r3,0 input key pointer set? - BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort - CMPU OUT, $0 // cmpldi r5,0 output key pointer set? - BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort - MOVD $-2, PTR // li 6,-2 exit code to -2 (254) - CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 - BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort - CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 - BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort - ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 - BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort + MOVD $-1, PTR // li 6,-1 exit code to -1 (255) + CMPU INP, $0 // cmpldi r3,0 input key pointer set? + BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort + CMPU OUT, $0 // cmpldi r5,0 output key pointer set? + BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort + MOVD $-2, PTR // li 6,-2 exit code to -2 (254) + CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 + BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort + CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 + BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort + ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 + BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort - MOVD $·rcon(SB), PTR // PTR point to rcon addr + MOVD $·rcon(SB), PTR // PTR point to rcon addr // Get key from memory and write aligned into VR - NEG INP, R9 // neg 9,3 R9 is ~INP + 1 - LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 - ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr - LVSR (R9)(R0), KEY // lvsr 3,0,9 - MOVD $0x20, R8 // li 8,0x20 R8 = 32 - CMPW BITS, $192 // cmpwi 4,192 Key size == 192? - LVX (INP)(R0), IN1 // lvx 2,0,3 - VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask - LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON - VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap - LVX (PTR)(R8), MASK // lvx 5,8,6 - ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON - VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align - MOVD $8, CNT // li 7,8 CNT = 8 - VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) - MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) + NEG INP, R9 // neg 9,3 R9 is ~INP + 1 + LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 + ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr + LVSR (R9)(R0), KEY // lvsr 3,0,9 + MOVD $0x20, R8 // li 8,0x20 R8 = 32 + CMPW BITS, $192 // cmpwi 4,192 Key size == 192? + LVX (INP)(R0), IN1 // lvx 2,0,3 + VSPLTISB $0x0f, MASK// vspltisb 5,0x0f 0x0f0f0f0f... mask + LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON + VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap + LVX (PTR)(R8), MASK // lvx 5,8,6 + ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON + VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align + MOVD $8, CNT // li 7,8 CNT = 8 + VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) + MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) - LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 - VSPLTISB $-1, OUTMASK // vspltisb 9,-1 - LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 - VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 + LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 + VSPLTISB $-1, OUTMASK // vspltisb 9,-1 + LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 + VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 - BLT loop128 // blt .Loop128 - ADD $8, INP, INP // addi 3,3,8 - BEQ l192 // beq .L192 - ADD $8, INP, INP // addi 3,3,8 - JMP l256 // b .L256 + BLT loop128 // blt .Loop128 + ADD $8, INP, INP // addi 3,3,8 + BEQ l192 // beq .L192 + ADD $8, INP, INP // addi 3,3,8 + JMP l256 // b .L256 loop128: // Key schedule (Round 1 to 8) - VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output - ADD $16, OUT, OUT // addi 5,5,16 Point to the next round + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output + ADD $16, OUT, OUT // addi 5,5,16 Point to the next round - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 - VXOR IN0, KEY, IN0 // vxor 1,1,3 - BC 0x10, 0, loop128 // bdnz .Loop128 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + BC 0x10, 0, loop128 // bdnz .Loop128 - LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys + LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys // Key schedule (Round 9) - VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 - ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 + ADD $16, OUT, OUT // addi 5,5,16 // Key schedule (Round 10) - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 - VXOR IN0, KEY, IN0 // vxor 1,1,3 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 - VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 - ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 + ADD $16, OUT, OUT // addi 5,5,16 // Key schedule (Round 11) - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VXOR IN0, KEY, IN0 // vxor 1,1,3 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 - ADD $15, OUT, INP // addi 3,5,15 - ADD $0x50, OUT, OUT // addi 5,5,0x50 + ADD $15, OUT, INP // addi 3,5,15 + ADD $0x50, OUT, OUT // addi 5,5,0x50 - MOVD $10, ROUNDS // li 8,10 - JMP done // b .Ldone + MOVD $10, ROUNDS // li 8,10 + JMP done // b .Ldone l192: - LVX (INP)(R0), TMP // lvx 6,0,3 - MOVD $4, CNT // li 7,4 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $16, OUT, OUT // addi 5,5,16 - VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 - VSPLTISB $8, KEY // vspltisb 3,8 - MOVD CNT, CTR // mtctr 7 - VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 + LVX (INP)(R0), TMP // lvx 6,0,3 + MOVD $4, CNT // li 7,4 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 + VSPLTISB $8, KEY // vspltisb 3,8 + MOVD CNT, CTR // mtctr 7 + VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 loop192: - VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 - VSPLTW $3, IN0, TMP // vspltw 6,1,3 - VXOR TMP, IN1, TMP // vxor 6,6,2 - VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 - VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 - VXOR IN1, TMP, IN1 // vxor 2,2,6 - VXOR IN0, KEY, IN0 // vxor 1,1,3 - VXOR IN1, KEY, IN1 // vxor 2,2,3 - VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 + VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 + VSPLTW $3, IN0, TMP // vspltw 6,1,3 + VXOR TMP, IN1, TMP // vxor 6,6,2 + VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VXOR IN1, KEY, IN1 // vxor 2,2,3 + VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 - VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 - VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $16, OUT, OUT // addi 5,5,16 + VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 - VSPLTW $3, IN0, TMP // vspltw 6,1,3 - VXOR TMP, IN1, TMP // vxor 6,6,2 - VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 - VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 - VXOR IN1, TMP, IN1 // vxor 2,2,6 - VXOR IN0, KEY, IN0 // vxor 1,1,3 - VXOR IN1, KEY, IN1 // vxor 2,2,3 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $15, OUT, INP // addi 3,5,15 - ADD $16, OUT, OUT // addi 5,5,16 - BC 0x10, 0, loop192 // bdnz .Loop192 + VSPLTW $3, IN0, TMP // vspltw 6,1,3 + VXOR TMP, IN1, TMP // vxor 6,6,2 + VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VXOR IN1, KEY, IN1 // vxor 2,2,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $15, OUT, INP // addi 3,5,15 + ADD $16, OUT, OUT // addi 5,5,16 + BC 0x10, 0, loop192 // bdnz .Loop192 - MOVD $12, ROUNDS // li 8,12 - ADD $0x20, OUT, OUT // addi 5,5,0x20 - JMP done // b .Ldone + MOVD $12, ROUNDS // li 8,12 + ADD $0x20, OUT, OUT // addi 5,5,0x20 + BR done // b .Ldone l256: - LVX (INP)(R0), TMP // lvx 6,0,3 - MOVD $7, CNT // li 7,7 - MOVD $14, ROUNDS // li 8,14 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $16, OUT, OUT // addi 5,5,16 - VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 - MOVD CNT, CTR // mtctr 7 + LVX (INP)(R0), TMP // lvx 6,0,3 + MOVD $7, CNT // li 7,7 + MOVD $14, ROUNDS // li 8,14 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 + MOVD CNT, CTR // mtctr 7 loop256: - VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 - VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 - VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN0, TMP, IN0 // vxor 1,1,6 - VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 - VXOR IN0, KEY, IN0 // vxor 1,1,3 - VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 - VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 - VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 - STVX STAGE, (OUT+R0) // stvx 7,0,5 - ADD $15, OUT, INP // addi 3,5,15 - ADD $16, OUT, OUT // addi 5,5,16 - BC 0x12, 0, done // bdz .Ldone + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $15, OUT, INP // addi 3,5,15 + ADD $16, OUT, OUT // addi 5,5,16 + BC 0x12, 0, done // bdz .Ldone - VSPLTW $3, IN0, KEY // vspltw 3,1,3 - VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 - VSBOX KEY, KEY // vsbox 3,3 + VSPLTW $3, IN0, KEY // vspltw 3,1,3 + VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 + VSBOX KEY, KEY // vsbox 3,3 - VXOR IN1, TMP, IN1 // vxor 2,2,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN1, TMP, IN1 // vxor 2,2,6 - VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 - VXOR IN1, TMP, IN1 // vxor 2,2,6 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN1, TMP, IN1 // vxor 2,2,6 - VXOR IN1, KEY, IN1 // vxor 2,2,3 - JMP loop256 // b .Loop256 + VXOR IN1, KEY, IN1 // vxor 2,2,3 + JMP loop256 // b .Loop256 done: - LVX (INP)(R0), IN1 // lvx 2,0,3 - VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 - STVX IN1, (INP+R0) // stvx 2,0,3 - MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) - MOVW ROUNDS, 0(OUT) // stw 8,0(5) + LVX (INP)(R0), IN1 // lvx 2,0,3 + VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 + STVX IN1, (INP+R0) // stvx 2,0,3 + MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) + MOVW ROUNDS, 0(OUT) // stw 8,0(5) enc_key_abort: - MOVD PTR, INP // mr 3,6 set exit code with PTR value - MOVD INP, ret+24(FP) // Put return value into the FP - RET // blr + MOVD PTR, INP // mr 3,6 set exit code with PTR value + MOVD INP, ret+24(FP) // Put return value into the FP + RET // blr // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int -TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 +TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 // Load the arguments inside the registers - MOVD key+0(FP), INP - MOVD keylen+8(FP), BITS - MOVD dec+16(FP), OUT + MOVD key+0(FP), INP + MOVD keylen+8(FP), BITS + MOVD dec+16(FP), OUT - MOVD LR, R10 // mflr 10 - CALL ·doEncryptKeyAsm(SB) - MOVD R10, LR // mtlr 10 + MOVD LR, R10 // mflr 10 + CALL ·doEncryptKeyAsm(SB) + MOVD R10, LR // mtlr 10 - CMPW INP, $0 // cmpwi 3,0 exit 0 = ok - BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort + CMPW INP, $0 // cmpwi 3,0 exit 0 = ok + BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode - SLW $4, ROUNDS, CNT // slwi 7,8,4 - SUB $240, OUT, INP // subi 3,5,240 - SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 - ADD R7, INP, OUT // add 5,3,7 - MOVD ROUNDS, CTR // mtctr 8 + SLW $4, ROUNDS, CNT // slwi 7,8,4 + SUB $240, OUT, INP // subi 3,5,240 + SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 + ADD R7, INP, OUT // add 5,3,7 + MOVD ROUNDS, CTR // mtctr 8 -// dec_key will invert the key sequence in order to be used for decrypt + // dec_key will invert the key sequence in order to be used for decrypt dec_key: - MOVWZ 0(INP), TEMP // lwz 0, 0(3) - MOVWZ 4(INP), R6 // lwz 6, 4(3) - MOVWZ 8(INP), R7 // lwz 7, 8(3) - MOVWZ 12(INP), R8 // lwz 8, 12(3) - ADD $16, INP, INP // addi 3,3,16 - MOVWZ 0(OUT), R9 // lwz 9, 0(5) - MOVWZ 4(OUT), R10 // lwz 10,4(5) - MOVWZ 8(OUT), R11 // lwz 11,8(5) - MOVWZ 12(OUT), R12 // lwz 12,12(5) - MOVW TEMP, 0(OUT) // stw 0, 0(5) - MOVW R6, 4(OUT) // stw 6, 4(5) - MOVW R7, 8(OUT) // stw 7, 8(5) - MOVW R8, 12(OUT) // stw 8, 12(5) - SUB $16, OUT, OUT // subi 5,5,16 - MOVW R9, -16(INP) // stw 9, -16(3) - MOVW R10, -12(INP) // stw 10,-12(3) - MOVW R11, -8(INP) // stw 11,-8(3) - MOVW R12, -4(INP) // stw 12,-4(3) - BC 0x10, 0, dec_key // bdnz .Ldeckey + MOVWZ 0(INP), TEMP // lwz 0, 0(3) + MOVWZ 4(INP), R6 // lwz 6, 4(3) + MOVWZ 8(INP), R7 // lwz 7, 8(3) + MOVWZ 12(INP), R8 // lwz 8, 12(3) + ADD $16, INP, INP // addi 3,3,16 + MOVWZ 0(OUT), R9 // lwz 9, 0(5) + MOVWZ 4(OUT), R10 // lwz 10,4(5) + MOVWZ 8(OUT), R11 // lwz 11,8(5) + MOVWZ 12(OUT), R12 // lwz 12,12(5) + MOVW TEMP, 0(OUT) // stw 0, 0(5) + MOVW R6, 4(OUT) // stw 6, 4(5) + MOVW R7, 8(OUT) // stw 7, 8(5) + MOVW R8, 12(OUT) // stw 8, 12(5) + SUB $16, OUT, OUT // subi 5,5,16 + MOVW R9, -16(INP) // stw 9, -16(3) + MOVW R10, -12(INP) // stw 10,-12(3) + MOVW R11, -8(INP) // stw 11,-8(3) + MOVW R12, -4(INP) // stw 12,-4(3) + BC 0x10, 0, dec_key // bdnz .Ldeckey - XOR R3, R3, R3 // xor 3,3,3 Clean R3 + XOR R3, R3, R3 // xor 3,3,3 Clean R3 dec_key_abort: - MOVD R3, ret+24(FP) // Put return value into the FP - RET // blr - + MOVD R3, ret+24(FP) // Put return value into the FP + RET // blr // func encryptBlockAsm(dst, src *byte, enc *uint32) -TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 +TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 // Load the arguments inside the registers - MOVD dst+0(FP), BLK_OUT - MOVD src+8(FP), BLK_INP - MOVD enc+16(FP), BLK_KEY + MOVD dst+0(FP), BLK_OUT + MOVD src+8(FP), BLK_INP + MOVD enc+16(FP), BLK_KEY - MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) - MOVD $15, BLK_IDX // li 7,15 + MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) + MOVD $15, BLK_IDX // li 7,15 - LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 - NEG BLK_OUT, R11 // neg 11,4 - LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 - LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 - VSPLTISB $0x0f, RCON // vspltisb 4,0x0f - LVSR (R11)(R0), KEY // lvsr 3,0,11 - VXOR IN1, RCON, IN1 // vxor 2,2,4 - MOVD $16, BLK_IDX // li 7,16 - VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 - LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 - LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 - SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 + NEG BLK_OUT, R11 // neg 11,4 + LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 + LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 + VSPLTISB $0x0f, RCON // vspltisb 4,0x0f + LVSR (R11)(R0), KEY // lvsr 3,0,11 + VXOR IN1, RCON, IN1 // vxor 2,2,4 + MOVD $16, BLK_IDX // li 7,16 + VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 + LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 + LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 + SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VXOR ZERO, IN0, ZERO // vxor 0,0,1 - LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - MOVD BLK_ROUNDS, CTR // mtctr 6 + VXOR ZERO, IN0, ZERO // vxor 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + MOVD BLK_ROUNDS, CTR // mtctr 6 loop_enc: - VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 - VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 - LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - BC 0x10, 0, loop_enc // bdnz .Loop_enc + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + BC 0x10, 0, loop_enc // bdnz .Loop_enc - VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 - VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 - VSPLTISB $-1, IN1 // vspltisb 2,-1 - VXOR IN0, IN0, IN0 // vxor 1,1,1 - MOVD $15, BLK_IDX // li 7,15 - VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 - VXOR KEY, RCON, KEY // vxor 3,3,4 - LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 - VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 - VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 - LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 - STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 - VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 - STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 - - RET // blr + VSPLTISB $-1, IN1 // vspltisb 2,-1 + VXOR IN0, IN0, IN0 // vxor 1,1,1 + MOVD $15, BLK_IDX // li 7,15 + VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 + VXOR KEY, RCON, KEY // vxor 3,3,4 + LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 + VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 + VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 + LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 + STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 + VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 + STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 + RET // blr // func decryptBlockAsm(dst, src *byte, dec *uint32) -TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 +TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 // Load the arguments inside the registers - MOVD dst+0(FP), BLK_OUT - MOVD src+8(FP), BLK_INP - MOVD dec+16(FP), BLK_KEY + MOVD dst+0(FP), BLK_OUT + MOVD src+8(FP), BLK_INP + MOVD dec+16(FP), BLK_KEY - MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) - MOVD $15, BLK_IDX // li 7,15 + MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) + MOVD $15, BLK_IDX // li 7,15 - LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 - NEG BLK_OUT, R11 // neg 11,4 - LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 - LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 - VSPLTISB $0x0f, RCON // vspltisb 4,0x0f - LVSR (R11)(R0), KEY // lvsr 3,0,11 - VXOR IN1, RCON, IN1 // vxor 2,2,4 - MOVD $16, BLK_IDX // li 7,16 - VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 - LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 - LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 - SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 + NEG BLK_OUT, R11 // neg 11,4 + LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 + LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 + VSPLTISB $0x0f, RCON // vspltisb 4,0x0f + LVSR (R11)(R0), KEY // lvsr 3,0,11 + VXOR IN1, RCON, IN1 // vxor 2,2,4 + MOVD $16, BLK_IDX // li 7,16 + VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 + LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 + LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 + SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VXOR ZERO, IN0, ZERO // vxor 0,0,1 - LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - MOVD BLK_ROUNDS, CTR // mtctr 6 + VXOR ZERO, IN0, ZERO // vxor 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + MOVD BLK_ROUNDS, CTR // mtctr 6 loop_dec: - VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 - VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 - LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 - ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 - BC 0x10, 0, loop_dec // bdnz .Loop_dec + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + BC 0x10, 0, loop_dec // bdnz .Loop_dec - VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 - VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 - LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 - VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 - VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 - VSPLTISB $-1, IN1 // vspltisb 2,-1 - VXOR IN0, IN0, IN0 // vxor 1,1,1 - MOVD $15, BLK_IDX // li 7,15 - VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 - VXOR KEY, RCON, KEY // vxor 3,3,4 - LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 - VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 - VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 - LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 - STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 - VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 - STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 + VSPLTISB $-1, IN1 // vspltisb 2,-1 + VXOR IN0, IN0, IN0 // vxor 1,1,1 + MOVD $15, BLK_IDX // li 7,15 + VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 + VXOR KEY, RCON, KEY // vxor 3,3,4 + LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 + VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 + VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 + LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 + STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 + VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 + STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 + + RET // blr - RET // blr