mirror of
https://github.com/golang/go
synced 2024-11-22 06:54:39 -07:00
big: potential bug fix, cleanups
- implemented setWord, use it where setUint64 is wrong - divLarge: use fast mulWW, divWW; implemented mulWW, divWW - better assembly code for addMulVVW R=rsc CC=golang-dev https://golang.org/cl/1258042
This commit is contained in:
parent
5b8117b98a
commit
a688eb6ee4
@ -56,6 +56,7 @@ func subWW_g(x, y, c Word) (z1, z0 Word) {
|
|||||||
|
|
||||||
|
|
||||||
// z1<<_W + z0 = x*y
|
// z1<<_W + z0 = x*y
|
||||||
|
func mulWW(x, y Word) (z1, z0 Word)
|
||||||
func mulWW_g(x, y Word) (z1, z0 Word) {
|
func mulWW_g(x, y Word) (z1, z0 Word) {
|
||||||
// Split x and y into 2 halfWords each, multiply
|
// Split x and y into 2 halfWords each, multiply
|
||||||
// the halfWords separately while avoiding overflow,
|
// the halfWords separately while avoiding overflow,
|
||||||
@ -242,6 +243,7 @@ func leadingZeros(x Word) uint {
|
|||||||
|
|
||||||
|
|
||||||
// q = (x1<<_W + x0 - r)/y
|
// q = (x1<<_W + x0 - r)/y
|
||||||
|
func divWW(x1, x0, y Word) (q, r Word)
|
||||||
func divWW_g(x1, x0, y Word) (q, r Word) {
|
func divWW_g(x1, x0, y Word) (q, r Word) {
|
||||||
if x1 == 0 {
|
if x1 == 0 {
|
||||||
q, r = x0/y, x0%y
|
q, r = x0/y, x0%y
|
||||||
|
@ -5,6 +5,25 @@
|
|||||||
// This file provides fast assembly versions for the elementary
|
// This file provides fast assembly versions for the elementary
|
||||||
// arithmetic operations on vectors implemented in arith.go.
|
// arithmetic operations on vectors implemented in arith.go.
|
||||||
|
|
||||||
|
// func mulWW(x, y Word) (z1, z0 Word)
|
||||||
|
TEXT ·mulWW(SB),7,$0
|
||||||
|
MOVL x+0(FP), AX
|
||||||
|
MULL y+4(FP)
|
||||||
|
MOVL DX, z1+8(FP)
|
||||||
|
MOVL AX, z0+12(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
|
// func divWW(x1, x0, y Word) (q, r Word)
|
||||||
|
TEXT ·divWW(SB),7,$0
|
||||||
|
MOVL x1+0(FP), DX
|
||||||
|
MOVL x0+4(FP), AX
|
||||||
|
DIVL y+8(FP)
|
||||||
|
MOVL AX, q+12(FP)
|
||||||
|
MOVL DX, r+16(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
// func addVV(z, x, y []Word) (c Word)
|
// func addVV(z, x, y []Word) (c Word)
|
||||||
TEXT ·addVV(SB),7,$0
|
TEXT ·addVV(SB),7,$0
|
||||||
MOVL z+0(FP), DI
|
MOVL z+0(FP), DI
|
||||||
@ -212,11 +231,10 @@ TEXT ·addMulVVW(SB),7,$0
|
|||||||
|
|
||||||
L6: MOVL (SI)(BX*4), AX
|
L6: MOVL (SI)(BX*4), AX
|
||||||
MULL BP
|
MULL BP
|
||||||
ADDL (DI)(BX*4), AX
|
|
||||||
ADCL $0, DX
|
|
||||||
ADDL CX, AX
|
ADDL CX, AX
|
||||||
ADCL $0, DX
|
ADCL $0, DX
|
||||||
MOVL AX, (DI)(BX*4)
|
ADDL AX, (DI)(BX*4)
|
||||||
|
ADCL $0, DX
|
||||||
MOVL DX, CX
|
MOVL DX, CX
|
||||||
ADDL $1, BX // i++
|
ADDL $1, BX // i++
|
||||||
|
|
||||||
|
@ -7,6 +7,25 @@
|
|||||||
|
|
||||||
// TODO(gri) - experiment with unrolled loops for faster execution
|
// TODO(gri) - experiment with unrolled loops for faster execution
|
||||||
|
|
||||||
|
// func mulWW(x, y Word) (z1, z0 Word)
|
||||||
|
TEXT ·mulWW(SB),7,$0
|
||||||
|
MOVQ x+0(FP), AX
|
||||||
|
MULQ y+8(FP)
|
||||||
|
MOVQ DX, z1+16(FP)
|
||||||
|
MOVQ AX, z0+24(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
|
// func divWW(x1, x0, y Word) (q, r Word)
|
||||||
|
TEXT ·divWW(SB),7,$0
|
||||||
|
MOVQ x1+0(FP), DX
|
||||||
|
MOVQ x0+8(FP), AX
|
||||||
|
DIVQ y+16(FP)
|
||||||
|
MOVQ AX, q+24(FP)
|
||||||
|
MOVQ DX, r+32(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
// func addVV(z, x, y []Word) (c Word)
|
// func addVV(z, x, y []Word) (c Word)
|
||||||
TEXT ·addVV(SB),7,$0
|
TEXT ·addVV(SB),7,$0
|
||||||
MOVQ z+0(FP), R10
|
MOVQ z+0(FP), R10
|
||||||
@ -210,11 +229,10 @@ TEXT ·addMulVVW(SB),7,$0
|
|||||||
|
|
||||||
L6: MOVQ (R8)(BX*8), AX
|
L6: MOVQ (R8)(BX*8), AX
|
||||||
MULQ R9
|
MULQ R9
|
||||||
ADDQ (R10)(BX*8), AX
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ CX, AX
|
ADDQ CX, AX
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ AX, (R10)(BX*8)
|
ADDQ AX, (R10)(BX*8)
|
||||||
|
ADCQ $0, DX
|
||||||
MOVQ DX, CX
|
MOVQ DX, CX
|
||||||
ADDL $1, BX // i++
|
ADDL $1, BX // i++
|
||||||
|
|
||||||
|
@ -69,16 +69,20 @@ func (z nat) make(n int) nat {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func (z nat) setUint64(x uint64) nat {
|
func (z nat) setWord(x Word) nat {
|
||||||
if x == 0 {
|
if x == 0 {
|
||||||
return z.make(0)
|
return z.make(0)
|
||||||
}
|
}
|
||||||
|
z = z.make(1)
|
||||||
|
z[0] = x
|
||||||
|
return z
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func (z nat) setUint64(x uint64) nat {
|
||||||
// single-digit values
|
// single-digit values
|
||||||
if x == uint64(Word(x)) {
|
if w := Word(x); uint64(w) == x {
|
||||||
z = z.make(1)
|
return z.setWord(w)
|
||||||
z[0] = Word(x)
|
|
||||||
return z
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute number of words n required to represent x
|
// compute number of words n required to represent x
|
||||||
@ -194,7 +198,7 @@ func (x nat) cmp(y nat) (r int) {
|
|||||||
func (z nat) mulAddWW(x nat, y, r Word) nat {
|
func (z nat) mulAddWW(x nat, y, r Word) nat {
|
||||||
m := len(x)
|
m := len(x)
|
||||||
if m == 0 || y == 0 {
|
if m == 0 || y == 0 {
|
||||||
return z.setUint64(uint64(r)) // result is r
|
return z.setWord(r) // result is r
|
||||||
}
|
}
|
||||||
// m > 0
|
// m > 0
|
||||||
|
|
||||||
@ -529,6 +533,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||||||
m := len(uIn) - n
|
m := len(uIn) - n
|
||||||
|
|
||||||
// determine if z can be reused
|
// determine if z can be reused
|
||||||
|
// TODO(gri) should find a better solution - this if statement
|
||||||
|
// is very costly (see e.g. time pidigits -s -n 10000)
|
||||||
if alias(z, uIn) || alias(z, v) {
|
if alias(z, uIn) || alias(z, v) {
|
||||||
z = nil // z is an alias for uIn or v - cannot reuse
|
z = nil // z is an alias for uIn or v - cannot reuse
|
||||||
}
|
}
|
||||||
@ -549,15 +555,13 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||||||
// D2.
|
// D2.
|
||||||
for j := m; j >= 0; j-- {
|
for j := m; j >= 0; j-- {
|
||||||
// D3.
|
// D3.
|
||||||
var qhat Word
|
qhat := Word(_M)
|
||||||
if u[j+n] == v[n-1] {
|
if u[j+n] != v[n-1] {
|
||||||
qhat = _B - 1
|
|
||||||
} else {
|
|
||||||
var rhat Word
|
var rhat Word
|
||||||
qhat, rhat = divWW_g(u[j+n], u[j+n-1], v[n-1])
|
qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
|
||||||
|
|
||||||
// x1 | x2 = q̂v_{n-2}
|
// x1 | x2 = q̂v_{n-2}
|
||||||
x1, x2 := mulWW_g(qhat, v[n-2])
|
x1, x2 := mulWW(qhat, v[n-2])
|
||||||
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
|
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
|
||||||
for greaterThan(x1, x2, rhat, u[j+n-2]) {
|
for greaterThan(x1, x2, rhat, u[j+n-2]) {
|
||||||
qhat--
|
qhat--
|
||||||
@ -567,7 +571,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||||||
if rhat < prevRhat {
|
if rhat < prevRhat {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
x1, x2 = mulWW_g(qhat, v[n-2])
|
x1, x2 = mulWW(qhat, v[n-2])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user