From 4c8023bfed57276498427ce17235c1bfe210d611 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 16 Mar 2017 19:19:29 +0200 Subject: [PATCH] strconv: optimize decimal ints formatting with smallsString MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark results for GOARCH=amd64: name old time/op new time/op delta FormatInt-4 2.51µs ± 2% 2.40µs ± 2% -4.51% (p=0.000 n=9+10) AppendInt-4 1.67µs ± 2% 1.61µs ± 3% -3.74% (p=0.000 n=9+9) FormatUint-4 698ns ± 2% 643ns ± 3% -7.95% (p=0.000 n=10+8) AppendUint-4 478ns ± 1% 418ns ± 2% -12.61% (p=0.000 n=8+10) AppendUintVarlen/1-4 9.30ns ± 6% 9.15ns ± 1% ~ (p=0.199 n=9+10) AppendUintVarlen/12-4 9.12ns ± 0% 9.16ns ± 2% ~ (p=0.307 n=9+9) AppendUintVarlen/123-4 18.6ns ± 2% 18.7ns ± 0% ~ (p=0.091 n=10+6) AppendUintVarlen/1234-4 19.1ns ± 4% 17.7ns ± 1% -7.35% (p=0.000 n=10+9) AppendUintVarlen/12345-4 21.5ns ± 3% 20.7ns ± 3% -3.78% (p=0.002 n=9+10) AppendUintVarlen/123456-4 23.5ns ± 3% 20.9ns ± 1% -11.14% (p=0.000 n=10+9) AppendUintVarlen/1234567-4 25.0ns ± 2% 23.6ns ± 7% -5.48% (p=0.004 n=9+10) AppendUintVarlen/12345678-4 26.8ns ± 2% 23.4ns ± 2% -12.79% (p=0.000 n=9+10) AppendUintVarlen/123456789-4 29.8ns ± 3% 26.5ns ± 5% -11.03% (p=0.000 n=10+10) AppendUintVarlen/1234567890-4 31.6ns ± 3% 26.9ns ± 3% -14.95% (p=0.000 n=10+9) AppendUintVarlen/12345678901-4 33.8ns ± 3% 29.3ns ± 5% -13.21% (p=0.000 n=10+10) AppendUintVarlen/123456789012-4 35.5ns ± 4% 29.2ns ± 4% -17.82% (p=0.000 n=10+10) AppendUintVarlen/1234567890123-4 37.6ns ± 4% 31.4ns ± 3% -16.48% (p=0.000 n=10+10) AppendUintVarlen/12345678901234-4 39.8ns ± 6% 32.0ns ± 7% -19.60% (p=0.000 n=10+10) AppendUintVarlen/123456789012345-4 40.7ns ± 0% 34.4ns ± 4% -15.55% (p=0.000 n=6+10) AppendUintVarlen/1234567890123456-4 45.4ns ± 6% 35.1ns ± 4% -22.66% (p=0.000 n=10+10) AppendUintVarlen/12345678901234567-4 45.1ns ± 1% 36.7ns ± 4% -18.77% (p=0.000 n=9+10) AppendUintVarlen/123456789012345678-4 46.9ns ± 0% 36.4ns ± 3% -22.49% (p=0.000 n=9+10) AppendUintVarlen/1234567890123456789-4 50.6ns ± 6% 38.8ns ± 3% -23.28% (p=0.000 n=10+10) AppendUintVarlen/12345678901234567890-4 51.3ns ± 2% 38.4ns ± 0% -25.00% (p=0.000 n=9+8) Benchmark results for GOARCH=386: name old time/op new time/op delta FormatInt-4 6.21µs ± 0% 6.14µs ± 0% -1.11% (p=0.008 n=5+5) AppendInt-4 4.95µs ± 0% 4.85µs ± 0% -1.99% (p=0.016 n=5+4) FormatUint-4 1.89µs ± 1% 1.83µs ± 1% -2.94% (p=0.008 n=5+5) AppendUint-4 1.59µs ± 0% 1.57µs ± 2% -1.72% (p=0.040 n=5+5) FormatIntSmall-4 8.48ns ± 0% 8.48ns ± 0% ~ (p=0.905 n=5+5) AppendIntSmall-4 12.2ns ± 0% 12.2ns ± 0% ~ (all equal) AppendUintVarlen/1-4 10.6ns ± 1% 10.7ns ± 0% ~ (p=0.238 n=5+4) AppendUintVarlen/12-4 10.7ns ± 0% 10.7ns ± 1% ~ (p=0.333 n=4+5) AppendUintVarlen/123-4 29.9ns ± 1% 30.2ns ± 0% +1.07% (p=0.016 n=5+4) AppendUintVarlen/1234-4 32.4ns ± 1% 30.4ns ± 0% -6.30% (p=0.008 n=5+5) AppendUintVarlen/12345-4 35.1ns ± 2% 34.9ns ± 0% ~ (p=0.238 n=5+5) AppendUintVarlen/123456-4 36.6ns ± 0% 35.3ns ± 0% -3.55% (p=0.029 n=4+4) AppendUintVarlen/1234567-4 38.9ns ± 0% 39.6ns ± 0% +1.80% (p=0.029 n=4+4) AppendUintVarlen/12345678-4 41.3ns ± 0% 40.1ns ± 0% -2.91% (p=0.000 n=5+4) AppendUintVarlen/123456789-4 44.9ns ± 1% 44.8ns ± 0% ~ (p=0.667 n=5+5) AppendUintVarlen/1234567890-4 65.6ns ± 0% 66.2ns ± 1% +0.88% (p=0.016 n=4+5) AppendUintVarlen/12345678901-4 77.9ns ± 0% 76.3ns ± 0% -2.00% (p=0.000 n=4+5) AppendUintVarlen/123456789012-4 80.7ns ± 0% 79.1ns ± 1% -2.01% (p=0.008 n=5+5) AppendUintVarlen/1234567890123-4 83.6ns ± 0% 80.2ns ± 1% -4.07% (p=0.008 n=5+5) AppendUintVarlen/12345678901234-4 86.2ns ± 1% 83.3ns ± 0% -3.39% (p=0.008 n=5+5) AppendUintVarlen/123456789012345-4 88.5ns ± 0% 83.7ns ± 0% -5.42% (p=0.008 n=5+5) AppendUintVarlen/1234567890123456-4 90.6ns ± 0% 88.3ns ± 0% -2.54% (p=0.008 n=5+5) AppendUintVarlen/12345678901234567-4 92.7ns ± 0% 89.0ns ± 1% -4.01% (p=0.008 n=5+5) AppendUintVarlen/123456789012345678-4 95.6ns ± 1% 92.6ns ± 0% -3.18% (p=0.016 n=5+4) AppendUintVarlen/1234567890123456789-4 118ns ± 0% 114ns ± 0% ~ (p=0.079 n=4+5) AppendUintVarlen/12345678901234567890-4 138ns ± 0% 136ns ± 0% -1.45% (p=0.008 n=5+5) Updates #19445 Change-Id: Iafbe5c074898187c150dc3854e5b9fc19c10be05 Reviewed-on: https://go-review.googlesource.com/38255 Run-TryBot: Robert Griesemer TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- src/strconv/itoa.go | 36 +++++++++++++++++++++--------- src/strconv/itoa_test.go | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/strconv/itoa.go b/src/strconv/itoa.go index 76ca676c5f..78527c8ae6 100644 --- a/src/strconv/itoa.go +++ b/src/strconv/itoa.go @@ -121,11 +121,19 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s // are calculated by runtime functions on 32bit machines. q := u / 1e9 us := uint(u - q*1e9) // u % 1e9 fits into a uint - for j := 9; j > 0; j-- { - i-- - a[i] = byte(us%10 + '0') - us /= 10 + for j := 4; j > 0; j-- { + is := us % 100 * 2 + us /= 100 + i -= 2 + a[i+1] = smallsString[is+1] + a[i+0] = smallsString[is+0] } + + // us < 10, since it contains the last digit + // from the initial 9-digit us. + i-- + a[i] = smallsString[us*2+1] + u = q } // u < 1e9 @@ -133,14 +141,22 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s // u guaranteed to fit into a uint us := uint(u) - for us >= 10 { - i-- - a[i] = byte(us%10 + '0') - us /= 10 + for us >= 100 { + is := us % 100 * 2 + us /= 100 + i -= 2 + a[i+1] = smallsString[is+1] + a[i+0] = smallsString[is+0] } - // us < 10 + + // us < 100 + is := us * 2 i-- - a[i] = byte(us + '0') + a[i] = smallsString[is+1] + if us >= 10 { + i-- + a[i] = smallsString[is] + } } else if s := shifts[base]; s > 0 { // base is power of 2: use shifts and masks instead of / and % diff --git a/src/strconv/itoa_test.go b/src/strconv/itoa_test.go index 7823cf4673..89c2de6941 100644 --- a/src/strconv/itoa_test.go +++ b/src/strconv/itoa_test.go @@ -126,6 +126,41 @@ func TestUitoa(t *testing.T) { } } +var varlenUints = []struct { + in uint64 + out string +}{ + {1, "1"}, + {12, "12"}, + {123, "123"}, + {1234, "1234"}, + {12345, "12345"}, + {123456, "123456"}, + {1234567, "1234567"}, + {12345678, "12345678"}, + {123456789, "123456789"}, + {1234567890, "1234567890"}, + {12345678901, "12345678901"}, + {123456789012, "123456789012"}, + {1234567890123, "1234567890123"}, + {12345678901234, "12345678901234"}, + {123456789012345, "123456789012345"}, + {1234567890123456, "1234567890123456"}, + {12345678901234567, "12345678901234567"}, + {123456789012345678, "123456789012345678"}, + {1234567890123456789, "1234567890123456789"}, + {12345678901234567890, "12345678901234567890"}, +} + +func TestFormatUintVarlen(t *testing.T) { + for _, test := range varlenUints { + s := FormatUint(test.in, 10) + if s != test.out { + t.Errorf("FormatUint(%v, 10) = %v want %v", test.in, s, test.out) + } + } +} + func BenchmarkFormatInt(b *testing.B) { for i := 0; i < b.N; i++ { for _, test := range itob64tests { @@ -181,4 +216,16 @@ func BenchmarkAppendIntSmall(b *testing.B) { } } +func BenchmarkAppendUintVarlen(b *testing.B) { + for _, test := range varlenUints { + b.Run(test.out, func(b *testing.B) { + dst := make([]byte, 0, 30) + for j := 0; j < b.N; j++ { + dst = AppendUint(dst[:0], test.in, 10) + BenchSink += len(dst) + } + }) + } +} + var BenchSink int // make sure compiler cannot optimize away benchmarks