mirror of
https://github.com/golang/go
synced 2024-11-24 21:00:09 -07:00
math/big: simplify fast string conversion
- use slice ops for convertWords instead of lo/hi boundaries - always compute leading zeroes (simplifies logic significantly), but remove them once, at the end (since leafSize is small, the worst-case scenario is not adding significant overhead) - various comment cleanups (specifically, replaced direct -> iterative, and indirect -> recursive) - slightly faster overall for -bench=String (This CL incorporates the changes re: my comments to CL 5418047 https://golang.org/cl/5418047/ ) benchmark old ns/op new ns/op delta big.BenchmarkString10Base2 519 527 +1.54% big.BenchmarkString100Base2 2279 2158 -5.31% big.BenchmarkString1000Base2 18475 17323 -6.24% big.BenchmarkString10000Base2 178248 166219 -6.75% big.BenchmarkString100000Base2 1548494 1431587 -7.55% big.BenchmarkString10Base8 415 422 +1.69% big.BenchmarkString100Base8 1025 978 -4.59% big.BenchmarkString1000Base8 6822 6428 -5.78% big.BenchmarkString10000Base8 64598 61065 -5.47% big.BenchmarkString100000Base8 593788 549150 -7.52% big.BenchmarkString10Base10 654 645 -1.38% big.BenchmarkString100Base10 1863 1835 -1.50% big.BenchmarkString1000Base10 12099 11981 -0.98% big.BenchmarkString10000Base10 57601 56888 -1.24% big.BenchmarkString100000Base10 20123120 19827890 -1.47% big.BenchmarkString10Base16 358 362 +1.12% big.BenchmarkString100Base16 815 776 -4.79% big.BenchmarkString1000Base16 4710 4421 -6.14% big.BenchmarkString10000Base16 43938 40968 -6.76% big.BenchmarkString100000Base16 406307 373930 -7.97% R=michael.jones, mtj CC=golang-dev https://golang.org/cl/5432090
This commit is contained in:
parent
834830d2bb
commit
b4be65bc7f
@ -715,13 +715,13 @@ func (x nat) decimalString() string {
|
||||
|
||||
// string converts x to a string using digits from a charset; a digit with
|
||||
// value d is represented by charset[d]. The conversion base is determined
|
||||
// by len(charset), which must be >= 2.
|
||||
// by len(charset), which must be >= 2 and <= 256.
|
||||
func (x nat) string(charset string) string {
|
||||
b := Word(len(charset))
|
||||
|
||||
// special cases
|
||||
switch {
|
||||
case b < 2 || MaxBase < b:
|
||||
case b < 2 || MaxBase > 256:
|
||||
panic("illegal base")
|
||||
case len(x) == 0:
|
||||
return string(charset[0])
|
||||
@ -773,49 +773,59 @@ func (x nat) string(charset string) string {
|
||||
w >>= shift
|
||||
nbits -= shift
|
||||
}
|
||||
|
||||
} else {
|
||||
// determine "big base" as in 10^19 for 19 decimal digits in a 64 bit Word
|
||||
bb := Word(1) // big base is b**ndigits
|
||||
ndigits := 0 // number of base b digits
|
||||
// determine "big base"; i.e., the largest possible value bb
|
||||
// that is a power of base b and still fits into a Word
|
||||
// (as in 10^19 for 19 decimal digits in a 64bit Word)
|
||||
bb := b // big base is b**ndigits
|
||||
ndigits := 1 // number of base b digits
|
||||
for max := Word(_M / b); bb <= max; bb *= b {
|
||||
ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
|
||||
}
|
||||
|
||||
// construct table of successive squares of bb*leafSize to use in subdivisions
|
||||
// result (table != nil) <=> (len(x) > leafSize > 0)
|
||||
table := divisors(len(x), b, ndigits, bb)
|
||||
|
||||
// preserve x, create local copy for use in divisions
|
||||
// preserve x, create local copy for use by convertWords
|
||||
q := nat(nil).set(x)
|
||||
|
||||
// convert q to string s in base b with index of MSD indicated by return value
|
||||
i = q.convertWords(0, i, s, charset, b, ndigits, bb, table)
|
||||
// convert q to string s in base b
|
||||
q.convertWords(s, charset, b, ndigits, bb, table)
|
||||
|
||||
// strip leading zeros
|
||||
// (x != 0; thus s must contain at least one non-zero digit
|
||||
// and the loop will terminate)
|
||||
i = 0
|
||||
for zero := charset[0]; s[i] == zero; {
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
return string(s[i:])
|
||||
}
|
||||
|
||||
// Convert words of q to base b digits in s directly using iterated nat/Word divison to extract
|
||||
// low-order Words and indirectly by recursive subdivision and nat/nat division by tabulated
|
||||
// divisors.
|
||||
// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
|
||||
// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
|
||||
// repeated nat/Word divison.
|
||||
//
|
||||
// The direct method processes n Words by n divW() calls, each of which visits every Word in the
|
||||
// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
|
||||
// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
|
||||
// Indirect conversion divides q by its approximate square root, yielding two parts, each half
|
||||
// the size of q. Using the direct method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s plus
|
||||
// the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and is
|
||||
// made better by splitting the subblocks recursively. Best is to split blocks until one more
|
||||
// Recursive conversion divides q by its approximate square root, yielding two parts, each half
|
||||
// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
|
||||
// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
|
||||
// is made better by splitting the subblocks recursively. Best is to split blocks until one more
|
||||
// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
|
||||
// direct approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
|
||||
// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
|
||||
// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
|
||||
// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
|
||||
// specfic hardware.
|
||||
//
|
||||
// lo and hi index character array s. conversion starts with the LSD at hi and moves down toward
|
||||
// the MSD, which will be at s[0] or s[1]. lo == 0 signals span includes the most significant word.
|
||||
//
|
||||
func (q nat) convertWords(lo, hi int, s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) int {
|
||||
// indirect conversion: split larger blocks to reduce quadratic expense of iterated nat/W division
|
||||
if leafSize > 0 && len(q) > leafSize && table != nil {
|
||||
func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
|
||||
// split larger blocks recursively
|
||||
if table != nil {
|
||||
// len(q) > leafSize > 0
|
||||
var r nat
|
||||
index := len(table) - 1
|
||||
for len(q) > leafSize {
|
||||
@ -835,72 +845,52 @@ func (q nat) convertWords(lo, hi int, s []byte, charset string, b Word, ndigits
|
||||
// split q into the two digit number (q'*bbb + r) to form independent subblocks
|
||||
q, r = q.div(r, q, table[index].bbb)
|
||||
|
||||
// convert subblocks and collect results in s[lo:partition] and s[partition:hi]
|
||||
partition := hi - table[index].ndigits
|
||||
r.convertWords(partition, hi, s, charset, b, ndigits, bb, table[0:index])
|
||||
hi = partition // i.e., q.convertWords(lo, partition, s, charset, b, ndigits, bb, table[0:index+1])
|
||||
// convert subblocks and collect results in s[:h] and s[h:]
|
||||
h := len(s) - table[index].ndigits
|
||||
r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
|
||||
s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
|
||||
}
|
||||
} // having split any large blocks now process the remaining small block
|
||||
}
|
||||
|
||||
// direct conversion: process smaller blocks monolithically to avoid overhead of nat/nat division
|
||||
// having split any large blocks now process the remaining (small) block iteratively
|
||||
i := len(s)
|
||||
var r Word
|
||||
if b == 10 { // hard-coding for 10 here speeds this up by 1.25x (allows mod as mul vs div)
|
||||
if b == 10 {
|
||||
// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
|
||||
for len(q) > 0 {
|
||||
// extract least significant, base bb "digit"
|
||||
q, r = q.divW(q, bb)
|
||||
if lo == 0 && len(q) == 0 {
|
||||
// skip leading zeros in most-significant group of digits
|
||||
for j := 0; j < ndigits && r != 0; j++ {
|
||||
hi--
|
||||
t := r / 10
|
||||
s[hi] = charset[r-(t<<3+t<<1)] // 8*t + 2*t = 10*t; r - 10*int(r/10) = r mod 10
|
||||
r = t
|
||||
}
|
||||
} else {
|
||||
for j := 0; j < ndigits && hi > lo; j++ {
|
||||
hi--
|
||||
t := r / 10
|
||||
s[hi] = charset[r-(t<<3+t<<1)] // 8*t + 2*t = 10*t; r - 10*int(r/10) = r mod 10
|
||||
r = t
|
||||
}
|
||||
for j := 0; j < ndigits && i > 0; j++ {
|
||||
i--
|
||||
// avoid % computation since r%10 == r - int(r/10)*10;
|
||||
// this appears to be faster for BenchmarkString10000Base10
|
||||
// and smaller strings (but a bit slower for larger ones)
|
||||
t := r / 10
|
||||
s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
|
||||
r = t
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for len(q) > 0 {
|
||||
// extract least significant group of digits
|
||||
// extract least significant, base bb "digit"
|
||||
q, r = q.divW(q, bb)
|
||||
if lo == 0 && len(q) == 0 {
|
||||
// skip leading zeros in most-significant group of digits
|
||||
for j := 0; j < ndigits && r != 0; j++ {
|
||||
hi--
|
||||
s[hi] = charset[r%b]
|
||||
r = r / b
|
||||
}
|
||||
} else {
|
||||
for j := 0; j < ndigits && hi > lo; j++ {
|
||||
hi--
|
||||
s[hi] = charset[r%b]
|
||||
r = r / b
|
||||
}
|
||||
for j := 0; j < ndigits && i > 0; j++ {
|
||||
i--
|
||||
s[i] = charset[r%b]
|
||||
r /= b
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// prepend high-order zeroes when q has been normalized to a short number of Words.
|
||||
// however, do not prepend zeroes when converting the most dignificant digits.
|
||||
if lo != 0 { // if not MSD
|
||||
zero := charset[0]
|
||||
for hi > lo { // while need more leading zeroes
|
||||
hi--
|
||||
s[hi] = zero
|
||||
}
|
||||
// prepend high-order zeroes
|
||||
zero := charset[0]
|
||||
for i > 0 { // while need more leading zeroes
|
||||
i--
|
||||
s[i] = zero
|
||||
}
|
||||
|
||||
// return index of most significant output digit in s[] (stored in lowest index)
|
||||
return hi
|
||||
}
|
||||
|
||||
// Split blocks greater than leafSize Words (or set to 0 to disable indirect conversion)
|
||||
// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
|
||||
// Benchmark and configure leafSize using: gotest -test.bench="Leaf"
|
||||
// 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
|
||||
// 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
|
||||
@ -912,26 +902,30 @@ type divisor struct {
|
||||
ndigits int // digit length of divisor in terms of output base digits
|
||||
}
|
||||
|
||||
const maxCache = 64 // maximum number of divisors in a single table
|
||||
var cacheBase10 [maxCache]divisor // cached divisors for base 10
|
||||
var cacheLock sync.Mutex // defense against concurrent table extensions
|
||||
var cacheBase10 [64]divisor // cached divisors for base 10
|
||||
var cacheLock sync.Mutex // protects cacheBase10
|
||||
|
||||
// expWW computes x**y
|
||||
func (z nat) expWW(x, y Word) nat {
|
||||
return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
|
||||
}
|
||||
|
||||
// construct table of powers of bb*leafSize to use in subdivisions
|
||||
func divisors(m int, b Word, ndigits int, bb Word) []divisor {
|
||||
// only build table when indirect conversion is enabled and x is large
|
||||
// only compute table when recursive conversion is enabled and x is large
|
||||
if leafSize == 0 || m <= leafSize {
|
||||
return nil
|
||||
}
|
||||
|
||||
// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
|
||||
k := 1
|
||||
for words := leafSize; words < m>>1 && k < maxCache; words <<= 1 {
|
||||
for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
|
||||
k++
|
||||
}
|
||||
|
||||
// create new table of divisors or extend and reuse existing table as appropriate
|
||||
var cached bool
|
||||
var table []divisor
|
||||
var cached bool
|
||||
switch b {
|
||||
case 10:
|
||||
table = cacheBase10[0:k] // reuse old table for this conversion
|
||||
@ -946,28 +940,27 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
|
||||
cacheLock.Lock() // begin critical section
|
||||
}
|
||||
|
||||
var i int
|
||||
// add new entries as needed
|
||||
var larger nat
|
||||
for i < k && table[i].ndigits != 0 { // skip existing entries
|
||||
i++
|
||||
}
|
||||
for ; i < k; i++ { // add new entries
|
||||
if i == 0 {
|
||||
table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
|
||||
table[i].ndigits = ndigits * leafSize
|
||||
} else {
|
||||
table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
|
||||
table[i].ndigits = 2 * table[i-1].ndigits
|
||||
}
|
||||
for i := 0; i < k; i++ {
|
||||
if table[i].ndigits == 0 {
|
||||
if i == 0 {
|
||||
table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
|
||||
table[i].ndigits = ndigits * leafSize
|
||||
} else {
|
||||
table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
|
||||
table[i].ndigits = 2 * table[i-1].ndigits
|
||||
}
|
||||
|
||||
// optimization: exploit aggregated extra bits in macro blocks
|
||||
larger = nat(nil).set(table[i].bbb)
|
||||
for mulAddVWW(larger, larger, b, 0) == 0 {
|
||||
table[i].bbb = table[i].bbb.set(larger)
|
||||
table[i].ndigits++
|
||||
}
|
||||
// optimization: exploit aggregated extra bits in macro blocks
|
||||
larger = nat(nil).set(table[i].bbb)
|
||||
for mulAddVWW(larger, larger, b, 0) == 0 {
|
||||
table[i].bbb = table[i].bbb.set(larger)
|
||||
table[i].ndigits++
|
||||
}
|
||||
|
||||
table[i].nbits = table[i].bbb.bitLen()
|
||||
table[i].nbits = table[i].bbb.bitLen()
|
||||
}
|
||||
}
|
||||
|
||||
if cached {
|
||||
@ -1295,11 +1288,6 @@ func (z nat) expNN(x, y, m nat) nat {
|
||||
return z.norm()
|
||||
}
|
||||
|
||||
// calculate x**y for Word arguments y and y
|
||||
func (z nat) expWW(x, y Word) nat {
|
||||
return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
|
||||
}
|
||||
|
||||
// probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
|
||||
// If it returns true, n is prime with probability 1 - 1/4^reps.
|
||||
// If it returns false, n is not prime.
|
||||
|
Loading…
Reference in New Issue
Block a user