2009-08-14 12:53:27 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2009-08-18 11:41:26 -06:00
|
|
|
// This file provides fast assembly versions for the elementary
|
|
|
|
// arithmetic operations on vectors implemented in arith.go.
|
2009-08-15 12:43:54 -06:00
|
|
|
|
2009-08-14 12:53:27 -06:00
|
|
|
// TODO(gri) - experiment with unrolled loops for faster execution
|
|
|
|
|
2010-05-19 10:36:50 -06:00
|
|
|
// func mulWW(x, y Word) (z1, z0 Word)
|
|
|
|
TEXT ·mulWW(SB),7,$0
|
|
|
|
MOVQ x+0(FP), AX
|
|
|
|
MULQ y+8(FP)
|
|
|
|
MOVQ DX, z1+16(FP)
|
|
|
|
MOVQ AX, z0+24(FP)
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
|
|
// func divWW(x1, x0, y Word) (q, r Word)
|
|
|
|
TEXT ·divWW(SB),7,$0
|
|
|
|
MOVQ x1+0(FP), DX
|
|
|
|
MOVQ x0+8(FP), AX
|
|
|
|
DIVQ y+16(FP)
|
|
|
|
MOVQ AX, q+24(FP)
|
|
|
|
MOVQ DX, r+32(FP)
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func addVV(z, x, y []Word) (c Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·addVV(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), R9
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
|
|
|
MOVQ $0, DX // c = 0
|
2009-08-14 12:53:27 -06:00
|
|
|
JMP E1
|
|
|
|
|
|
|
|
L1: MOVQ (R8)(BX*8), AX
|
|
|
|
RCRQ $1, DX
|
|
|
|
ADCQ (R9)(BX*8), AX
|
|
|
|
RCLQ $1, DX
|
|
|
|
MOVQ AX, (R10)(BX*8)
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-14 12:53:27 -06:00
|
|
|
|
|
|
|
E1: CMPQ BX, R11 // i < n
|
|
|
|
JL L1
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ DX, c+48(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func subVV(z, x, y []Word) (c Word)
|
2009-08-14 12:53:27 -06:00
|
|
|
// (same as addVV_s except for SBBQ instead of ADCQ and label names)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·subVV(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), R9
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
|
|
|
MOVQ $0, DX // c = 0
|
2009-08-14 12:53:27 -06:00
|
|
|
JMP E2
|
|
|
|
|
|
|
|
L2: MOVQ (R8)(BX*8), AX
|
|
|
|
RCRQ $1, DX
|
|
|
|
SBBQ (R9)(BX*8), AX
|
|
|
|
RCLQ $1, DX
|
|
|
|
MOVQ AX, (R10)(BX*8)
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E2: CMPQ BX, R11 // i < n
|
2009-08-14 12:53:27 -06:00
|
|
|
JL L2
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ DX, c+48(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func addVW(z, x []Word, y Word) (c Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·addVW(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), AX // c = y
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
2009-08-14 12:53:27 -06:00
|
|
|
JMP E3
|
|
|
|
|
|
|
|
L3: ADDQ (R8)(BX*8), AX
|
|
|
|
MOVQ AX, (R10)(BX*8)
|
|
|
|
RCLQ $1, AX
|
|
|
|
ANDQ $1, AX
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E3: CMPQ BX, R11 // i < n
|
2009-08-14 12:53:27 -06:00
|
|
|
JL L3
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ AX, c+40(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func subVW(z, x []Word, y Word) (c Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·subVW(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), AX // c = y
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
2009-08-14 12:53:27 -06:00
|
|
|
JMP E4
|
|
|
|
|
|
|
|
L4: MOVQ (R8)(BX*8), DX // TODO(gri) is there a reverse SUBQ?
|
|
|
|
SUBQ AX, DX
|
|
|
|
MOVQ DX, (R10)(BX*8)
|
|
|
|
RCLQ $1, AX
|
|
|
|
ANDQ $1, AX
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E4: CMPQ BX, R11 // i < n
|
2009-08-14 12:53:27 -06:00
|
|
|
JL L4
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ AX, c+40(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func shlVW(z, x []Word, s Word) (c Word)
|
2010-04-30 10:29:11 -06:00
|
|
|
TEXT ·shlVW(SB),7,$0
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), BX // i = n
|
2010-05-06 19:20:01 -06:00
|
|
|
SUBL $1, BX // i--
|
|
|
|
JL X8b // i < 0 (n <= 0)
|
|
|
|
|
|
|
|
// n > 0
|
2010-04-30 10:29:11 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ s+32(FP), CX
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
|
|
|
|
MOVQ $0, DX
|
|
|
|
SHLQ CX, DX:AX // w1>>ŝ
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ DX, c+40(FP)
|
2010-04-30 10:29:11 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
CMPL BX, $0
|
|
|
|
JLE X8a // i <= 0
|
2010-04-30 10:29:11 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
// i > 0
|
|
|
|
L8: MOVQ AX, DX // w = w1
|
|
|
|
MOVQ -8(R8)(BX*8), AX // w1 = x[i-1]
|
|
|
|
SHLQ CX, DX:AX // w<<s | w1>>ŝ
|
|
|
|
MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ
|
|
|
|
SUBL $1, BX // i--
|
|
|
|
JG L8 // i > 0
|
2010-04-30 10:29:11 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
// i <= 0
|
|
|
|
X8a: SHLQ CX, AX // w1<<s
|
|
|
|
MOVQ AX, (R10) // z[0] = w1<<s
|
|
|
|
RET
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
X8b: MOVQ $0, c+40(FP)
|
2010-04-30 10:29:11 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func shrVW(z, x []Word, s Word) (c Word)
|
2010-04-30 10:29:11 -06:00
|
|
|
TEXT ·shrVW(SB),7,$0
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
SUBL $1, R11 // n--
|
|
|
|
JL X9b // n < 0 (n <= 0)
|
|
|
|
|
|
|
|
// n > 0
|
2010-04-30 10:29:11 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ s+32(FP), CX
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ (R8), AX // w1 = x[0]
|
|
|
|
MOVQ $0, DX
|
|
|
|
SHRQ CX, DX:AX // w1<<ŝ
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ DX, c+40(FP)
|
2010-05-06 19:20:01 -06:00
|
|
|
|
|
|
|
MOVQ $0, BX // i = 0
|
2010-04-30 10:29:11 -06:00
|
|
|
JMP E9
|
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
// i < n-1
|
|
|
|
L9: MOVQ AX, DX // w = w1
|
|
|
|
MOVQ 8(R8)(BX*8), AX // w1 = x[i+1]
|
|
|
|
SHRQ CX, DX:AX // w>>s | w1<<ŝ
|
|
|
|
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
|
|
|
|
ADDL $1, BX // i++
|
|
|
|
|
|
|
|
E9: CMPQ BX, R11
|
|
|
|
JL L9 // i < n-1
|
|
|
|
|
|
|
|
// i >= n-1
|
|
|
|
X9a: SHRQ CX, AX // w1>>s
|
|
|
|
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
|
|
|
|
RET
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
X9b: MOVQ $0, c+40(FP)
|
2010-05-06 19:20:01 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·mulAddVWW(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), R9
|
|
|
|
MOVQ r+40(FP), CX // c = r
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
2009-08-18 11:06:15 -06:00
|
|
|
JMP E5
|
|
|
|
|
|
|
|
L5: MOVQ (R8)(BX*8), AX
|
|
|
|
MULQ R9
|
|
|
|
ADDQ CX, AX
|
|
|
|
ADCQ $0, DX
|
|
|
|
MOVQ AX, (R10)(BX*8)
|
|
|
|
MOVQ DX, CX
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-18 11:06:15 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E5: CMPQ BX, R11 // i < n
|
2009-08-18 11:06:15 -06:00
|
|
|
JL L5
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ CX, c+48(FP)
|
2009-08-18 11:06:15 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// func addMulVVW(z, x []Word, y Word) (c Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·addMulVVW(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ x+16(FP), R8
|
|
|
|
MOVQ y+32(FP), R9
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), R11
|
2010-05-06 19:20:01 -06:00
|
|
|
MOVQ $0, BX // i = 0
|
|
|
|
MOVQ $0, CX // c = 0
|
2009-08-15 12:43:54 -06:00
|
|
|
JMP E6
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2009-08-15 12:43:54 -06:00
|
|
|
L6: MOVQ (R8)(BX*8), AX
|
2009-08-14 12:53:27 -06:00
|
|
|
MULQ R9
|
|
|
|
ADDQ CX, AX
|
|
|
|
ADCQ $0, DX
|
2010-05-19 10:36:50 -06:00
|
|
|
ADDQ AX, (R10)(BX*8)
|
|
|
|
ADCQ $0, DX
|
2009-08-14 12:53:27 -06:00
|
|
|
MOVQ DX, CX
|
2010-05-06 19:20:01 -06:00
|
|
|
ADDL $1, BX // i++
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E6: CMPQ BX, R11 // i < n
|
2009-08-15 12:43:54 -06:00
|
|
|
JL L6
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ CX, c+40(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
// divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
2010-04-19 20:07:22 -06:00
|
|
|
TEXT ·divWVW(SB),7,$0
|
2009-08-26 13:55:54 -06:00
|
|
|
MOVQ z+0(FP), R10
|
2010-05-07 19:26:31 -06:00
|
|
|
MOVQ xn+16(FP), DX // r = xn
|
|
|
|
MOVQ x+24(FP), R8
|
|
|
|
MOVQ y+40(FP), R9
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVL n+8(FP), BX // i = n
|
2009-08-15 12:43:54 -06:00
|
|
|
JMP E7
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2009-08-15 12:43:54 -06:00
|
|
|
L7: MOVQ (R8)(BX*8), AX
|
2009-08-14 12:53:27 -06:00
|
|
|
DIVQ R9
|
|
|
|
MOVQ AX, (R10)(BX*8)
|
|
|
|
|
2010-05-06 19:20:01 -06:00
|
|
|
E7: SUBL $1, BX // i--
|
|
|
|
JGE L7 // i >= 0
|
2009-08-14 12:53:27 -06:00
|
|
|
|
2010-05-08 14:52:36 -06:00
|
|
|
MOVQ DX, r+48(FP)
|
2009-08-14 12:53:27 -06:00
|
|
|
RET
|