From a688eb6ee462e1b1d223426d76c404f6522502b6 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 19 May 2010 09:36:50 -0700 Subject: [PATCH] big: potential bug fix, cleanups - implemented setWord, use it where setUint64 is wrong - divLarge: use fast mulWW, divWW; implemented mulWW, divWW - better assembly code for addMulVVW R=rsc CC=golang-dev https://golang.org/cl/1258042 --- src/pkg/big/arith.go | 2 ++ src/pkg/big/arith_386.s | 24 +++++++++++++++++++++--- src/pkg/big/arith_amd64.s | 24 +++++++++++++++++++++--- src/pkg/big/nat.go | 30 +++++++++++++++++------------- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/pkg/big/arith.go b/src/pkg/big/arith.go index 52bb3e165d5..a5e0dec68ec 100644 --- a/src/pkg/big/arith.go +++ b/src/pkg/big/arith.go @@ -56,6 +56,7 @@ func subWW_g(x, y, c Word) (z1, z0 Word) { // z1<<_W + z0 = x*y +func mulWW(x, y Word) (z1, z0 Word) func mulWW_g(x, y Word) (z1, z0 Word) { // Split x and y into 2 halfWords each, multiply // the halfWords separately while avoiding overflow, @@ -242,6 +243,7 @@ func leadingZeros(x Word) uint { // q = (x1<<_W + x0 - r)/y +func divWW(x1, x0, y Word) (q, r Word) func divWW_g(x1, x0, y Word) (q, r Word) { if x1 == 0 { q, r = x0/y, x0%y diff --git a/src/pkg/big/arith_386.s b/src/pkg/big/arith_386.s index 08eb5d4d50f..21521635b26 100644 --- a/src/pkg/big/arith_386.s +++ b/src/pkg/big/arith_386.s @@ -5,6 +5,25 @@ // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),7,$0 + MOVL x+0(FP), AX + MULL y+4(FP) + MOVL DX, z1+8(FP) + MOVL AX, z0+12(FP) + RET + + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB),7,$0 + MOVL x1+0(FP), DX + MOVL x0+4(FP), AX + DIVL y+8(FP) + MOVL AX, q+12(FP) + MOVL DX, r+16(FP) + RET + + // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),7,$0 MOVL z+0(FP), DI @@ -212,11 +231,10 @@ TEXT ·addMulVVW(SB),7,$0 L6: MOVL (SI)(BX*4), AX MULL BP - ADDL (DI)(BX*4), AX - ADCL $0, DX ADDL CX, AX ADCL $0, DX - MOVL AX, (DI)(BX*4) + ADDL AX, (DI)(BX*4) + ADCL $0, DX MOVL DX, CX ADDL $1, BX // i++ diff --git a/src/pkg/big/arith_amd64.s b/src/pkg/big/arith_amd64.s index 1dd95ec5341..c740565a726 100644 --- a/src/pkg/big/arith_amd64.s +++ b/src/pkg/big/arith_amd64.s @@ -7,6 +7,25 @@ // TODO(gri) - experiment with unrolled loops for faster execution +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),7,$0 + MOVQ x+0(FP), AX + MULQ y+8(FP) + MOVQ DX, z1+16(FP) + MOVQ AX, z0+24(FP) + RET + + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB),7,$0 + MOVQ x1+0(FP), DX + MOVQ x0+8(FP), AX + DIVQ y+16(FP) + MOVQ AX, q+24(FP) + MOVQ DX, r+32(FP) + RET + + // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),7,$0 MOVQ z+0(FP), R10 @@ -210,11 +229,10 @@ TEXT ·addMulVVW(SB),7,$0 L6: MOVQ (R8)(BX*8), AX MULQ R9 - ADDQ (R10)(BX*8), AX - ADCQ $0, DX ADDQ CX, AX ADCQ $0, DX - MOVQ AX, (R10)(BX*8) + ADDQ AX, (R10)(BX*8) + ADCQ $0, DX MOVQ DX, CX ADDL $1, BX // i++ diff --git a/src/pkg/big/nat.go b/src/pkg/big/nat.go index 668a626891d..b098937307b 100755 --- a/src/pkg/big/nat.go +++ b/src/pkg/big/nat.go @@ -69,16 +69,20 @@ func (z nat) make(n int) nat { } -func (z nat) setUint64(x uint64) nat { +func (z nat) setWord(x Word) nat { if x == 0 { return z.make(0) } + z = z.make(1) + z[0] = x + return z +} + +func (z nat) setUint64(x uint64) nat { // single-digit values - if x == uint64(Word(x)) { - z = z.make(1) - z[0] = Word(x) - return z + if w := Word(x); uint64(w) == x { + return z.setWord(w) } // compute number of words n required to represent x @@ -194,7 +198,7 @@ func (x nat) cmp(y nat) (r int) { func (z nat) mulAddWW(x nat, y, r Word) nat { m := len(x) if m == 0 || y == 0 { - return z.setUint64(uint64(r)) // result is r + return z.setWord(r) // result is r } // m > 0 @@ -529,6 +533,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { m := len(uIn) - n // determine if z can be reused + // TODO(gri) should find a better solution - this if statement + // is very costly (see e.g. time pidigits -s -n 10000) if alias(z, uIn) || alias(z, v) { z = nil // z is an alias for uIn or v - cannot reuse } @@ -549,15 +555,13 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { // D2. for j := m; j >= 0; j-- { // D3. - var qhat Word - if u[j+n] == v[n-1] { - qhat = _B - 1 - } else { + qhat := Word(_M) + if u[j+n] != v[n-1] { var rhat Word - qhat, rhat = divWW_g(u[j+n], u[j+n-1], v[n-1]) + qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) // x1 | x2 = q̂v_{n-2} - x1, x2 := mulWW_g(qhat, v[n-2]) + x1, x2 := mulWW(qhat, v[n-2]) // test if q̂v_{n-2} > br̂ + u_{j+n-2} for greaterThan(x1, x2, rhat, u[j+n-2]) { qhat-- @@ -567,7 +571,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) { if rhat < prevRhat { break } - x1, x2 = mulWW_g(qhat, v[n-2]) + x1, x2 = mulWW(qhat, v[n-2]) } }