mirror of
https://github.com/golang/go
synced 2024-11-20 03:54:40 -07:00
big: more cleanup
- pass []Word instead of *Word to core arithmetic functions - remove dead code R=rsc CC=golang-dev https://golang.org/cl/1154042
This commit is contained in:
parent
56ca697269
commit
61eb0e71f2
@ -8,19 +8,23 @@
|
||||
|
||||
package big
|
||||
|
||||
import "unsafe"
|
||||
// TODO(gri) Decide if Word needs to remain exported.
|
||||
|
||||
type Word uintptr
|
||||
|
||||
const (
|
||||
_S = uintptr(unsafe.Sizeof(Word(0))) // TODO(gri) should Sizeof return a uintptr?
|
||||
_logW = (0x650 >> _S) & 7
|
||||
_W = 1 << _logW
|
||||
_B = 1 << _W
|
||||
_M = _B - 1
|
||||
_W2 = _W / 2
|
||||
_B2 = 1 << _W2
|
||||
_M2 = _B2 - 1
|
||||
// Compute the size _S of a Word in bytes.
|
||||
_m = ^Word(0)
|
||||
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
|
||||
_S = 1 << _logS
|
||||
|
||||
_W = _S << 3 // word size in bits
|
||||
_B = 1 << _W // digit base
|
||||
_M = _B - 1 // digit mask
|
||||
|
||||
_W2 = _W / 2 // half word size in bits
|
||||
_B2 = 1 << _W2 // half digit base
|
||||
_M2 = _B2 - 1 // half digit mask
|
||||
)
|
||||
|
||||
|
||||
@ -280,109 +284,104 @@ func divWW_g(x1, x0, y Word) (q, r Word) {
|
||||
}
|
||||
|
||||
|
||||
func (p *Word) at(i int) *Word {
|
||||
return (*Word)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(i)*_S))
|
||||
}
|
||||
|
||||
|
||||
func addVV(z, x, y *Word, n int) (c Word)
|
||||
func addVV_g(z, x, y *Word, n int) (c Word) {
|
||||
func addVV(z, x, y []Word, n int) (c Word)
|
||||
func addVV_g(z, x, y []Word, n int) (c Word) {
|
||||
for i := 0; i < n; i++ {
|
||||
c, *z.at(i) = addWW_g(*x.at(i), *y.at(i), c)
|
||||
c, z[i] = addWW_g(x[i], y[i], c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func subVV(z, x, y *Word, n int) (c Word)
|
||||
func subVV_g(z, x, y *Word, n int) (c Word) {
|
||||
func subVV(z, x, y []Word, n int) (c Word)
|
||||
func subVV_g(z, x, y []Word, n int) (c Word) {
|
||||
for i := 0; i < n; i++ {
|
||||
c, *z.at(i) = subWW_g(*x.at(i), *y.at(i), c)
|
||||
c, z[i] = subWW_g(x[i], y[i], c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func addVW(z, x *Word, y Word, n int) (c Word)
|
||||
func addVW_g(z, x *Word, y Word, n int) (c Word) {
|
||||
func addVW(z, x []Word, y Word, n int) (c Word)
|
||||
func addVW_g(z, x []Word, y Word, n int) (c Word) {
|
||||
c = y
|
||||
for i := 0; i < n; i++ {
|
||||
c, *z.at(i) = addWW_g(*x.at(i), c, 0)
|
||||
c, z[i] = addWW_g(x[i], c, 0)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func subVW(z, x *Word, y Word, n int) (c Word)
|
||||
func subVW_g(z, x *Word, y Word, n int) (c Word) {
|
||||
func subVW(z, x []Word, y Word, n int) (c Word)
|
||||
func subVW_g(z, x []Word, y Word, n int) (c Word) {
|
||||
c = y
|
||||
for i := 0; i < n; i++ {
|
||||
c, *z.at(i) = subWW_g(*x.at(i), c, 0)
|
||||
c, z[i] = subWW_g(x[i], c, 0)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func shlVW(z, x *Word, s Word, n int) (c Word)
|
||||
func shlVW_g(z, x *Word, s Word, n int) (c Word) {
|
||||
func shlVW(z, x []Word, s Word, n int) (c Word)
|
||||
func shlVW_g(z, x []Word, s Word, n int) (c Word) {
|
||||
if n > 0 {
|
||||
ŝ := _W - s
|
||||
w1 := *x.at(n - 1)
|
||||
w1 := x[n-1]
|
||||
c = w1 >> ŝ
|
||||
for i := n - 1; i > 0; i-- {
|
||||
w := w1
|
||||
w1 = *x.at(i - 1)
|
||||
*z.at(i) = w<<s | w1>>ŝ
|
||||
w1 = x[i-1]
|
||||
z[i] = w<<s | w1>>ŝ
|
||||
}
|
||||
*z.at(0) = w1 << s
|
||||
z[0] = w1 << s
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func shrVW(z, x *Word, s Word, n int) (c Word)
|
||||
func shrVW_g(z, x *Word, s Word, n int) (c Word) {
|
||||
func shrVW(z, x []Word, s Word, n int) (c Word)
|
||||
func shrVW_g(z, x []Word, s Word, n int) (c Word) {
|
||||
if n > 0 {
|
||||
ŝ := _W - s
|
||||
w1 := *x.at(0)
|
||||
w1 := x[0]
|
||||
c = w1 << ŝ
|
||||
for i := 0; i < n-1; i++ {
|
||||
w := w1
|
||||
w1 = *x.at(i + 1)
|
||||
*z.at(i) = w>>s | w1<<ŝ
|
||||
w1 = x[i+1]
|
||||
z[i] = w>>s | w1<<ŝ
|
||||
}
|
||||
*z.at(n - 1) = w1 >> s
|
||||
z[n-1] = w1 >> s
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
|
||||
func mulAddVWW_g(z, x *Word, y, r Word, n int) (c Word) {
|
||||
func mulAddVWW(z, x []Word, y, r Word, n int) (c Word)
|
||||
func mulAddVWW_g(z, x []Word, y, r Word, n int) (c Word) {
|
||||
c = r
|
||||
for i := 0; i < n; i++ {
|
||||
c, *z.at(i) = mulAddWWW_g(*x.at(i), y, c)
|
||||
c, z[i] = mulAddWWW_g(x[i], y, c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func addMulVVW(z, x *Word, y Word, n int) (c Word)
|
||||
func addMulVVW_g(z, x *Word, y Word, n int) (c Word) {
|
||||
func addMulVVW(z, x []Word, y Word, n int) (c Word)
|
||||
func addMulVVW_g(z, x []Word, y Word, n int) (c Word) {
|
||||
for i := 0; i < n; i++ {
|
||||
z1, z0 := mulAddWWW_g(*x.at(i), y, *z.at(i))
|
||||
c, *z.at(i) = addWW_g(z0, c, 0)
|
||||
z1, z0 := mulAddWWW_g(x[i], y, z[i])
|
||||
c, z[i] = addWW_g(z0, c, 0)
|
||||
c += z1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
func divWVW(z *Word, xn Word, x *Word, y Word, n int) (r Word)
|
||||
func divWVW_g(z *Word, xn Word, x *Word, y Word, n int) (r Word) {
|
||||
func divWVW(z []Word, xn Word, x []Word, y Word, n int) (r Word)
|
||||
func divWVW_g(z []Word, xn Word, x []Word, y Word, n int) (r Word) {
|
||||
r = xn
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
*z.at(i), r = divWW_g(r, *x.at(i), y)
|
||||
z[i], r = divWW_g(r, x[i], y)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -5,12 +5,12 @@
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// func addVV(z, x, y *Word, n int) (c Word)
|
||||
// func addVV(z, x, y []Word, n int) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), CX
|
||||
MOVL n+12(FP), BP
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), CX
|
||||
MOVL n+36(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
MOVL $0, DX // c = 0
|
||||
JMP E1
|
||||
@ -25,17 +25,17 @@ L1: MOVL (SI)(BX*4), AX
|
||||
E1: CMPL BX, BP // i < n
|
||||
JL L1
|
||||
|
||||
MOVL DX, c+16(FP)
|
||||
MOVL DX, c+40(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y *Word, n int) (c Word)
|
||||
// func subVV(z, x, y []Word, n int) (c Word)
|
||||
// (same as addVV except for SBBL instead of ADCL and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), CX
|
||||
MOVL n+12(FP), BP
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), CX
|
||||
MOVL n+36(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
MOVL $0, DX // c = 0
|
||||
JMP E2
|
||||
@ -50,16 +50,16 @@ L2: MOVL (SI)(BX*4), AX
|
||||
E2: CMPL BX, BP // i < n
|
||||
JL L2
|
||||
|
||||
MOVL DX, c+16(FP)
|
||||
MOVL DX, c+40(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func addVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), AX // c = y
|
||||
MOVL n+12(FP), BP
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), AX // c = y
|
||||
MOVL n+28(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E3
|
||||
|
||||
@ -72,16 +72,16 @@ L3: ADDL (SI)(BX*4), AX
|
||||
E3: CMPL BX, BP // i < n
|
||||
JL L3
|
||||
|
||||
MOVL AX, c+16(FP)
|
||||
MOVL AX, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func subVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), AX // c = y
|
||||
MOVL n+12(FP), BP
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), AX // c = y
|
||||
MOVL n+28(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E4
|
||||
|
||||
@ -95,24 +95,24 @@ L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
|
||||
E4: CMPL BX, BP // i < n
|
||||
JL L4
|
||||
|
||||
MOVL AX, c+16(FP)
|
||||
MOVL AX, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shlVW(z, x *Word, s Word, n int) (c Word)
|
||||
// func shlVW(z, x []Word, s Word, n int) (c Word)
|
||||
TEXT ·shlVW(SB),7,$0
|
||||
MOVL n+12(FP), BX // i = n
|
||||
MOVL n+28(FP), BX // i = n
|
||||
SUBL $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL s+8(FP), CX
|
||||
MOVL x+12(FP), SI
|
||||
MOVL s+24(FP), CX
|
||||
MOVL (SI)(BX*4), AX // w1 = x[n-1]
|
||||
MOVL $0, DX
|
||||
SHLL CX, DX:AX // w1>>ŝ
|
||||
MOVL DX, c+16(FP)
|
||||
MOVL DX, c+32(FP)
|
||||
|
||||
CMPL BX, $0
|
||||
JLE X8a // i <= 0
|
||||
@ -130,24 +130,24 @@ X8a: SHLL CX, AX // w1<<s
|
||||
MOVL AX, (DI) // z[0] = w1<<s
|
||||
RET
|
||||
|
||||
X8b: MOVL $0, c+16(FP)
|
||||
X8b: MOVL $0, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVW(z, x *Word, s Word, n int) (c Word)
|
||||
// func shrVW(z, x []Word, s Word, n int) (c Word)
|
||||
TEXT ·shrVW(SB),7,$0
|
||||
MOVL n+12(FP), BP
|
||||
MOVL n+28(FP), BP
|
||||
SUBL $1, BP // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL s+8(FP), CX
|
||||
MOVL x+12(FP), SI
|
||||
MOVL s+24(FP), CX
|
||||
MOVL (SI), AX // w1 = x[0]
|
||||
MOVL $0, DX
|
||||
SHRL CX, DX:AX // w1<<ŝ
|
||||
MOVL DX, c+16(FP)
|
||||
MOVL DX, c+32(FP)
|
||||
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E9
|
||||
@ -167,17 +167,17 @@ X9a: SHRL CX, AX // w1>>s
|
||||
MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
|
||||
RET
|
||||
|
||||
X9b: MOVL $0, c+16(FP)
|
||||
X9b: MOVL $0, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
|
||||
// func mulAddVWW(z, x []Word, y, r Word, n int) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), BP
|
||||
MOVL r+12(FP), CX // c = r
|
||||
MOVL n+16(FP), BX
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), BP
|
||||
MOVL r+28(FP), CX // c = r
|
||||
MOVL n+32(FP), BX
|
||||
LEAL (DI)(BX*4), DI
|
||||
LEAL (SI)(BX*4), SI
|
||||
NEGL BX // i = -n
|
||||
@ -194,16 +194,16 @@ L5: MOVL (SI)(BX*4), AX
|
||||
E5: CMPL BX, $0 // i < 0
|
||||
JL L5
|
||||
|
||||
MOVL CX, c+20(FP)
|
||||
MOVL CX, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addMulVVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func addMulVVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+4(FP), SI
|
||||
MOVL y+8(FP), BP
|
||||
MOVL n+12(FP), BX
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), BP
|
||||
MOVL n+28(FP), BX
|
||||
LEAL (DI)(BX*4), DI
|
||||
LEAL (SI)(BX*4), SI
|
||||
NEGL BX // i = -n
|
||||
@ -223,17 +223,17 @@ L6: MOVL (SI)(BX*4), AX
|
||||
E6: CMPL BX, $0 // i < 0
|
||||
JL L6
|
||||
|
||||
MOVL CX, c+16(FP)
|
||||
MOVL CX, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
|
||||
// divWVW(z* Word, xn Word, x []Word, y Word, n int) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL xn+4(FP), DX // r = xn
|
||||
MOVL x+8(FP), SI
|
||||
MOVL y+12(FP), CX
|
||||
MOVL n+16(FP), BX // i = n
|
||||
MOVL xn+12(FP), DX // r = xn
|
||||
MOVL x+16(FP), SI
|
||||
MOVL y+28(FP), CX
|
||||
MOVL n+32(FP), BX // i = n
|
||||
JMP E7
|
||||
|
||||
L7: MOVL (SI)(BX*4), AX
|
||||
@ -243,5 +243,5 @@ L7: MOVL (SI)(BX*4), AX
|
||||
E7: SUBL $1, BX // i--
|
||||
JGE L7 // i >= 0
|
||||
|
||||
MOVL DX, r+20(FP)
|
||||
MOVL DX, r+36(FP)
|
||||
RET
|
||||
|
@ -7,12 +7,12 @@
|
||||
|
||||
// TODO(gri) - experiment with unrolled loops for faster execution
|
||||
|
||||
// func addVV(z, x, y *Word, n int) (c Word)
|
||||
// func addVV(z, x, y []Word, n int) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), R9
|
||||
MOVL n+24(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), R9
|
||||
MOVL n+48(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, DX // c = 0
|
||||
JMP E1
|
||||
@ -27,17 +27,17 @@ L1: MOVQ (R8)(BX*8), AX
|
||||
E1: CMPQ BX, R11 // i < n
|
||||
JL L1
|
||||
|
||||
MOVQ DX, c+32(FP)
|
||||
MOVQ DX, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y *Word, n int) (c Word)
|
||||
// func subVV(z, x, y []Word, n int) (c Word)
|
||||
// (same as addVV_s except for SBBQ instead of ADCQ and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), R9
|
||||
MOVL n+24(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), R9
|
||||
MOVL n+48(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, DX // c = 0
|
||||
JMP E2
|
||||
@ -52,16 +52,16 @@ L2: MOVQ (R8)(BX*8), AX
|
||||
E2: CMPQ BX, R11 // i < n
|
||||
JL L2
|
||||
|
||||
MOVQ DX, c+32(FP)
|
||||
MOVQ DX, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func addVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), AX // c = y
|
||||
MOVL n+24(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), AX // c = y
|
||||
MOVL n+40(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E3
|
||||
|
||||
@ -74,16 +74,16 @@ L3: ADDQ (R8)(BX*8), AX
|
||||
E3: CMPQ BX, R11 // i < n
|
||||
JL L3
|
||||
|
||||
MOVQ AX, c+32(FP)
|
||||
MOVQ AX, c+48(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func subVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), AX // c = y
|
||||
MOVL n+24(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), AX // c = y
|
||||
MOVL n+40(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E4
|
||||
|
||||
@ -97,24 +97,24 @@ L4: MOVQ (R8)(BX*8), DX // TODO(gri) is there a reverse SUBQ?
|
||||
E4: CMPQ BX, R11 // i < n
|
||||
JL L4
|
||||
|
||||
MOVQ AX, c+32(FP)
|
||||
MOVQ AX, c+48(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shlVW(z, x *Word, s Word, n int) (c Word)
|
||||
// func shlVW(z, x []Word, s Word, n int) (c Word)
|
||||
TEXT ·shlVW(SB),7,$0
|
||||
MOVL n+24(FP), BX // i = n
|
||||
MOVL n+40(FP), BX // i = n
|
||||
SUBL $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ s+16(FP), CX
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ s+32(FP), CX
|
||||
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
|
||||
MOVQ $0, DX
|
||||
SHLQ CX, DX:AX // w1>>ŝ
|
||||
MOVQ DX, c+32(FP)
|
||||
MOVQ DX, c+48(FP)
|
||||
|
||||
CMPL BX, $0
|
||||
JLE X8a // i <= 0
|
||||
@ -132,24 +132,24 @@ X8a: SHLQ CX, AX // w1<<s
|
||||
MOVQ AX, (R10) // z[0] = w1<<s
|
||||
RET
|
||||
|
||||
X8b: MOVQ $0, c+32(FP)
|
||||
X8b: MOVQ $0, c+48(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVW(z, x *Word, s Word, n int) (c Word)
|
||||
// func shrVW(z, x []Word, s Word, n int) (c Word)
|
||||
TEXT ·shrVW(SB),7,$0
|
||||
MOVL n+24(FP), R11
|
||||
MOVL n+40(FP), R11
|
||||
SUBL $1, R11 // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ s+16(FP), CX
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ s+32(FP), CX
|
||||
MOVQ (R8), AX // w1 = x[0]
|
||||
MOVQ $0, DX
|
||||
SHRQ CX, DX:AX // w1<<ŝ
|
||||
MOVQ DX, c+32(FP)
|
||||
MOVQ DX, c+48(FP)
|
||||
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E9
|
||||
@ -169,41 +169,17 @@ X9a: SHRQ CX, AX // w1>>s
|
||||
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
|
||||
RET
|
||||
|
||||
X9b: MOVQ $0, c+32(FP)
|
||||
X9b: MOVQ $0, c+48(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVW(z, x *Word, s Word, n int) (c Word)
|
||||
TEXT ·shrVW_(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ s+16(FP), CX
|
||||
MOVL n+24(FP), BX // i = n
|
||||
MOVQ $0, AX // c = 0
|
||||
JMP E9_
|
||||
|
||||
L9_: MOVQ (R8)(BX*8), DX
|
||||
MOVQ DX, R12
|
||||
SHRQ CX, DX:AX
|
||||
MOVQ DX, (R10)(BX*8)
|
||||
MOVQ R12, AX
|
||||
|
||||
E9_: SUBL $1, BX // i--
|
||||
JGE L9_
|
||||
|
||||
MOVQ $0, DX
|
||||
SHRQ CX, DX:AX
|
||||
MOVQ DX, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
|
||||
// func mulAddVWW(z, x []Word, y, r Word, n int) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), R9
|
||||
MOVQ r+24(FP), CX // c = r
|
||||
MOVL n+32(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), R9
|
||||
MOVQ r+40(FP), CX // c = r
|
||||
MOVL n+48(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E5
|
||||
|
||||
@ -218,16 +194,16 @@ L5: MOVQ (R8)(BX*8), AX
|
||||
E5: CMPQ BX, R11 // i < n
|
||||
JL L5
|
||||
|
||||
MOVQ CX, c+40(FP)
|
||||
MOVQ CX, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addMulVVW(z, x *Word, y Word, n int) (c Word)
|
||||
// func addMulVVW(z, x []Word, y Word, n int) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+8(FP), R8
|
||||
MOVQ y+16(FP), R9
|
||||
MOVL n+24(FP), R11
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+32(FP), R9
|
||||
MOVL n+40(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, CX // c = 0
|
||||
JMP E6
|
||||
@ -245,17 +221,17 @@ L6: MOVQ (R8)(BX*8), AX
|
||||
E6: CMPQ BX, R11 // i < n
|
||||
JL L6
|
||||
|
||||
MOVQ CX, c+32(FP)
|
||||
MOVQ CX, c+48(FP)
|
||||
RET
|
||||
|
||||
|
||||
// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
|
||||
// divWVW(z []Word, xn Word, x []Word, y Word, n int) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ xn+8(FP), DX // r = xn
|
||||
MOVQ x+16(FP), R8
|
||||
MOVQ y+24(FP), R9
|
||||
MOVL n+32(FP), BX // i = n
|
||||
MOVQ xn+16(FP), DX // r = xn
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+40(FP), R9
|
||||
MOVL n+48(FP), BX // i = n
|
||||
JMP E7
|
||||
|
||||
L7: MOVQ (R8)(BX*8), AX
|
||||
@ -265,5 +241,5 @@ L7: MOVQ (R8)(BX*8), AX
|
||||
E7: SUBL $1, BX // i--
|
||||
JGE L7 // i >= 0
|
||||
|
||||
MOVQ DX, r+40(FP)
|
||||
MOVQ DX, r+56(FP)
|
||||
RET
|
||||
|
@ -52,15 +52,7 @@ func TestFunWW(t *testing.T) {
|
||||
}
|
||||
|
||||
|
||||
func addr(x nat) *Word {
|
||||
if len(x) == 0 {
|
||||
return nil
|
||||
}
|
||||
return &x[0]
|
||||
}
|
||||
|
||||
|
||||
type funVV func(z, x, y *Word, n int) (c Word)
|
||||
type funVV func(z, x, y []Word, n int) (c Word)
|
||||
type argVV struct {
|
||||
z, x, y nat
|
||||
c Word
|
||||
@ -82,7 +74,7 @@ var sumVV = []argVV{
|
||||
func testFunVV(t *testing.T, msg string, f funVV, a argVV) {
|
||||
n := len(a.z)
|
||||
z := make(nat, n)
|
||||
c := f(addr(z), addr(a.x), addr(a.y), n)
|
||||
c := f(z, a.x, a.y, n)
|
||||
for i, zi := range z {
|
||||
if zi != a.z[i] {
|
||||
t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
|
||||
@ -116,7 +108,7 @@ func TestFunVV(t *testing.T) {
|
||||
}
|
||||
|
||||
|
||||
type funVW func(z, x *Word, y Word, n int) (c Word)
|
||||
type funVW func(z, x []Word, y Word, n int) (c Word)
|
||||
type argVW struct {
|
||||
z, x nat
|
||||
y Word
|
||||
@ -181,7 +173,7 @@ var rshVW = []argVW{
|
||||
func testFunVW(t *testing.T, msg string, f funVW, a argVW) {
|
||||
n := len(a.z)
|
||||
z := make(nat, n)
|
||||
c := f(addr(z), addr(a.x), a.y, n)
|
||||
c := f(z, a.x, a.y, n)
|
||||
for i, zi := range z {
|
||||
if zi != a.z[i] {
|
||||
t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
|
||||
@ -219,7 +211,7 @@ func TestFunVW(t *testing.T) {
|
||||
}
|
||||
|
||||
|
||||
type funVWW func(z, x *Word, y, r Word, n int) (c Word)
|
||||
type funVWW func(z, x []Word, y, r Word, n int) (c Word)
|
||||
type argVWW struct {
|
||||
z, x nat
|
||||
y, r Word
|
||||
@ -256,7 +248,7 @@ var prodVWW = []argVWW{
|
||||
func testFunVWW(t *testing.T, msg string, f funVWW, a argVWW) {
|
||||
n := len(a.z)
|
||||
z := make(nat, n)
|
||||
c := f(addr(z), addr(a.x), a.y, a.r, n)
|
||||
c := f(z, a.x, a.y, a.r, n)
|
||||
for i, zi := range z {
|
||||
if zi != a.z[i] {
|
||||
t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
|
||||
@ -272,7 +264,7 @@ func testFunVWW(t *testing.T, msg string, f funVWW, a argVWW) {
|
||||
// TODO(gri) mulAddVWW and divWVW are symmetric operations but
|
||||
// their signature is not symmetric. Try to unify.
|
||||
|
||||
type funWVW func(z *Word, xn Word, x *Word, y Word, n int) (r Word)
|
||||
type funWVW func(z []Word, xn Word, x []Word, y Word, n int) (r Word)
|
||||
type argWVW struct {
|
||||
z nat
|
||||
xn Word
|
||||
@ -284,7 +276,7 @@ type argWVW struct {
|
||||
func testFunWVW(t *testing.T, msg string, f funWVW, a argWVW) {
|
||||
n := len(a.z)
|
||||
z := make(nat, n)
|
||||
r := f(addr(z), a.xn, addr(a.x), a.y, n)
|
||||
r := f(z, a.xn, a.x, a.y, n)
|
||||
for i, zi := range z {
|
||||
if zi != a.z[i] {
|
||||
t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i])
|
||||
|
@ -307,7 +307,7 @@ Error:
|
||||
// SetBytes interprets b as the bytes of a big-endian, unsigned integer and
|
||||
// sets z to that value.
|
||||
func (z *Int) SetBytes(b []byte) *Int {
|
||||
s := int(_S)
|
||||
const s = _S
|
||||
z.abs = z.abs.make((len(b) + s - 1) / s)
|
||||
|
||||
j := 0
|
||||
@ -343,7 +343,7 @@ func (z *Int) SetBytes(b []byte) *Int {
|
||||
|
||||
// Bytes returns the absolute value of x as a big-endian byte array.
|
||||
func (z *Int) Bytes() []byte {
|
||||
s := int(_S)
|
||||
const s = _S
|
||||
b := make([]byte, len(z.abs)*s)
|
||||
|
||||
for i, w := range z.abs {
|
||||
|
@ -133,9 +133,9 @@ func (z nat) add(x, y nat) nat {
|
||||
// m > 0
|
||||
|
||||
z = z.make(m)
|
||||
c := addVV(&z[0], &x[0], &y[0], n)
|
||||
c := addVV(z, x, y, n)
|
||||
if m > n {
|
||||
c = addVW(&z[n], &x[n], c, m-n)
|
||||
c = addVW(z[n:], x[n:], c, m-n)
|
||||
}
|
||||
if c > 0 {
|
||||
z = z[0 : m+1]
|
||||
@ -163,9 +163,9 @@ func (z nat) sub(x, y nat) nat {
|
||||
// m > 0
|
||||
|
||||
z = z.make(m)
|
||||
c := subVV(&z[0], &x[0], &y[0], n)
|
||||
c := subVV(z, x, y, n)
|
||||
if m > n {
|
||||
c = subVW(&z[n], &x[n], c, m-n)
|
||||
c = subVW(z[n:], x[n:], c, m-n)
|
||||
}
|
||||
if c != 0 {
|
||||
panic("underflow")
|
||||
@ -211,7 +211,7 @@ func (z nat) mulAddWW(x nat, y, r Word) nat {
|
||||
// m > 0
|
||||
|
||||
z = z.make(m)
|
||||
c := mulAddVWW(&z[0], &x[0], y, r, m)
|
||||
c := mulAddVWW(z, x, y, r, m)
|
||||
if c > 0 {
|
||||
z = z[0 : m+1]
|
||||
z[m] = c
|
||||
@ -227,7 +227,7 @@ func basicMul(z, x, y nat) {
|
||||
z[0 : len(x)+len(y)].clear() // initialize z
|
||||
for i, d := range y {
|
||||
if d != 0 {
|
||||
z[len(x)+i] = addMulVVW(&z[i], &x[0], d, len(x))
|
||||
z[len(x)+i] = addMulVVW(z[i:], x, d, len(x))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -236,16 +236,16 @@ func basicMul(z, x, y nat) {
|
||||
// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
|
||||
// Factored out for readability - do not use outside karatsuba.
|
||||
func karatsubaAdd(z, x nat, n int) {
|
||||
if c := addVV(&z[0], &z[0], &x[0], n); c != 0 {
|
||||
addVW(&z[n], &z[n], c, n>>1)
|
||||
if c := addVV(z, z, x, n); c != 0 {
|
||||
addVW(z[n:], z[n:], c, n>>1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Like karatsubaAdd, but does subtract.
|
||||
func karatsubaSub(z, x nat, n int) {
|
||||
if c := subVV(&z[0], &z[0], &x[0], n); c != 0 {
|
||||
subVW(&z[n], &z[n], c, n>>1)
|
||||
if c := subVV(z, z, x, n); c != 0 {
|
||||
subVW(z[n:], z[n:], c, n>>1)
|
||||
}
|
||||
}
|
||||
|
||||
@ -315,16 +315,16 @@ func karatsuba(z, x, y nat) {
|
||||
// compute xd (or the negative value if underflow occurs)
|
||||
s := 1 // sign of product xd*yd
|
||||
xd := z[2*n : 2*n+n2]
|
||||
if subVV(&xd[0], &x1[0], &x0[0], n2) != 0 { // x1-x0
|
||||
if subVV(xd, x1, x0, n2) != 0 { // x1-x0
|
||||
s = -s
|
||||
subVV(&xd[0], &x0[0], &x1[0], n2) // x0-x1
|
||||
subVV(xd, x0, x1, n2) // x0-x1
|
||||
}
|
||||
|
||||
// compute yd (or the negative value if underflow occurs)
|
||||
yd := z[2*n+n2 : 3*n]
|
||||
if subVV(&yd[0], &y0[0], &y1[0], n2) != 0 { // y0-y1
|
||||
if subVV(yd, y0, y1, n2) != 0 { // y0-y1
|
||||
s = -s
|
||||
subVV(&yd[0], &y1[0], &y0[0], n2) // y1-y0
|
||||
subVV(yd, y1, y0, n2) // y1-y0
|
||||
}
|
||||
|
||||
// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
|
||||
@ -366,10 +366,10 @@ func alias(x, y nat) bool {
|
||||
// slice, and we don't need to normalize z after each addition)
|
||||
func addAt(z, x nat, i int) {
|
||||
if n := len(x); n > 0 {
|
||||
if c := addVV(&z[i], &z[i], &x[0], n); c != 0 {
|
||||
if c := addVV(z[i:], z[i:], x, n); c != 0 {
|
||||
j := i + n
|
||||
if j < len(z) {
|
||||
addVW(&z[j], &z[j], c, len(z)-j)
|
||||
addVW(z[j:], z[j:], c, len(z)-j)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -500,7 +500,7 @@ func (z nat) divW(x nat, y Word) (q nat, r Word) {
|
||||
}
|
||||
// m > 0
|
||||
z = z.make(m)
|
||||
r = divWVW(&z[0], 0, &x[0], y, m)
|
||||
r = divWVW(z, 0, x, y, m)
|
||||
q = z.norm()
|
||||
return
|
||||
}
|
||||
@ -553,8 +553,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
||||
|
||||
// D1.
|
||||
shift := Word(leadingZeros(v[n-1]))
|
||||
shlVW(&v[0], &v[0], shift, n)
|
||||
u[len(uIn)] = shlVW(&u[0], &uIn[0], shift, len(uIn))
|
||||
shlVW(v, v, shift, n)
|
||||
u[len(uIn)] = shlVW(u, uIn, shift, len(uIn))
|
||||
|
||||
// D2.
|
||||
for j := m; j >= 0; j-- {
|
||||
@ -582,11 +582,11 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
||||
}
|
||||
|
||||
// D4.
|
||||
qhatv[n] = mulAddVWW(&qhatv[0], &v[0], qhat, 0, n)
|
||||
qhatv[n] = mulAddVWW(qhatv, v, qhat, 0, n)
|
||||
|
||||
c := subVV(&u[j], &u[j], &qhatv[0], len(qhatv))
|
||||
c := subVV(u[j:], u[j:], qhatv, len(qhatv))
|
||||
if c != 0 {
|
||||
c := addVV(&u[j], &u[j], &v[0], n)
|
||||
c := addVV(u[j:], u[j:], v, n)
|
||||
u[j+n] += c
|
||||
qhat--
|
||||
}
|
||||
@ -595,8 +595,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
||||
}
|
||||
|
||||
q = q.norm()
|
||||
shrVW(&u[0], &u[0], shift, len(u))
|
||||
shrVW(&v[0], &v[0], shift, n)
|
||||
shrVW(u, u, shift, len(u))
|
||||
shrVW(v, v, shift, n)
|
||||
r = u.norm()
|
||||
|
||||
return q, r
|
||||
@ -756,7 +756,7 @@ func (z nat) shl(x nat, s uint) nat {
|
||||
|
||||
n := m + int(s/_W)
|
||||
z = z.make(n + 1)
|
||||
z[n] = shlVW(&z[n-m], &x[0], Word(s%_W), m)
|
||||
z[n] = shlVW(z[n-m:], x, Word(s%_W), m)
|
||||
z[0 : n-m].clear()
|
||||
|
||||
return z.norm()
|
||||
@ -773,7 +773,7 @@ func (z nat) shr(x nat, s uint) nat {
|
||||
// n > 0
|
||||
|
||||
z = z.make(n)
|
||||
shrVW(&z[0], &x[m-n], Word(s%_W), n)
|
||||
shrVW(z, x[m-n:], Word(s%_W), n)
|
||||
|
||||
return z.norm()
|
||||
}
|
||||
@ -863,7 +863,7 @@ func (x nat) modW(d Word) (r Word) {
|
||||
// TODO(agl): we don't actually need to store the q value.
|
||||
var q nat
|
||||
q = q.make(len(x))
|
||||
return divWVW(&q[0], 0, &x[0], d, len(x))
|
||||
return divWVW(q, 0, x, d, len(x))
|
||||
}
|
||||
|
||||
|
||||
@ -882,7 +882,7 @@ func (n nat) powersOfTwoDecompose() (q nat, k Word) {
|
||||
x := trailingZeroBits(n[zeroWords])
|
||||
|
||||
q = q.make(len(n) - zeroWords)
|
||||
shrVW(&q[0], &n[zeroWords], Word(x), len(q))
|
||||
shrVW(q, n[zeroWords:], Word(x), len(q))
|
||||
q = q.norm()
|
||||
|
||||
k = Word(_W*zeroWords + x)
|
||||
|
Loading…
Reference in New Issue
Block a user