1
0
mirror of https://github.com/golang/go synced 2024-11-18 13:44:48 -07:00

encoding/base64: Optimize DecodeString

Optimize DecodeString for the common case where most of the input isn't
a newline or a padding character.
Also add some testcases found when fuzzing this implementation against
upstream.
Change Decode benchmark to run with different input sizes.

name                 old time/op    new time/op    delta
DecodeString/2-4       71.5ns ± 4%    70.0ns ± 6%     ~     (p=0.246 n=5+5)
DecodeString/4-4        112ns ±25%      91ns ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4        136ns ± 5%     126ns ± 5%   -7.33%  (p=0.016 n=5+5)
DecodeString/64-4       872ns ±29%     652ns ±21%  -25.23%  (p=0.032 n=5+5)
DecodeString/8192-4    90.9µs ±21%    61.0µs ±13%  -32.87%  (p=0.008 n=5+5)

name                 old speed      new speed      delta
DecodeString/2-4     56.0MB/s ± 4%  57.2MB/s ± 6%     ~     (p=0.310 n=5+5)
DecodeString/4-4     73.4MB/s ±23%  87.7MB/s ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4     87.8MB/s ± 5%  94.8MB/s ± 5%   +7.98%  (p=0.016 n=5+5)
DecodeString/64-4     103MB/s ±24%   136MB/s ±19%  +32.63%  (p=0.032 n=5+5)
DecodeString/8192-4   122MB/s ±19%   180MB/s ±11%  +47.75%  (p=0.008 n=5+5)

Improves #19636

Change-Id: I39667f4fb682a12b3137946d017ad999553c5780
Reviewed-on: https://go-review.googlesource.com/34950
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Josselin Costanzi 2017-01-07 14:12:57 +01:00 committed by Brad Fitzpatrick
parent d9b1f9e85e
commit 31c96fc227
2 changed files with 41 additions and 25 deletions

View File

@ -273,44 +273,50 @@ func (e CorruptInputError) Error() string {
// indicates if end-of-message padding or a partial quantum was encountered
// and thus any additional data is an error.
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
var inIdx int
si := 0
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
for si < len(src) && !end {
// Decode quantum using the base64 alphabet
var dbuf [4]byte
dinc, dlen := 3, 4
for j := range dbuf {
for j := 0; j < len(dbuf); j++ {
if len(src) == si {
if enc.padChar != NoPadding || j < 2 {
switch {
case j == 0:
return n, false, nil
case j == 1, enc.padChar != NoPadding:
return n, false, CorruptInputError(si - j)
}
dinc, dlen, end = j-1, j, true
break
}
in := src[si]
inIdx = si
si++
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
out := enc.decodeMap[in]
if out != 0xFF {
dbuf[j] = out
continue
}
if in == '\n' || in == '\r' {
j--
continue
}
if rune(in) == enc.padChar {
// We've reached the end and there's padding
switch j {
case 0, 1:
// incorrect padding
return n, false, CorruptInputError(inIdx)
return n, false, CorruptInputError(si - 1)
case 2:
// "==" is expected, the first "=" is already consumed.
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si == len(src) {
// not enough padding
return n, false, CorruptInputError(len(src))
@ -321,10 +327,10 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
}
si++
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
}
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si < len(src) {
// trailing garbage
@ -333,10 +339,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
dinc, dlen, end = 3, j, true
break
}
dbuf[j] = enc.decodeMap[in]
if dbuf[j] == 0xFF {
return n, false, CorruptInputError(inIdx)
}
return n, false, CorruptInputError(si - 1)
}
// Convert 4x 6bit source bytes into 3 bytes

View File

@ -7,6 +7,7 @@ package base64
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"reflect"
@ -202,6 +203,9 @@ func TestDecodeCorrupt(t *testing.T) {
offset int // -1 means no corruption.
}{
{"", -1},
{"\n", -1},
{"AAA=\n", -1},
{"AAAA\n", -1},
{"!!!!", 0},
{"====", 0},
{"x===", 1},
@ -468,10 +472,19 @@ func BenchmarkEncodeToString(b *testing.B) {
}
func BenchmarkDecodeString(b *testing.B) {
data := StdEncoding.EncodeToString(make([]byte, 8192))
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
StdEncoding.DecodeString(data)
sizes := []int{2, 4, 8, 64, 8192}
benchFunc := func(b *testing.B, benchSize int) {
data := StdEncoding.EncodeToString(make([]byte, benchSize))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
StdEncoding.DecodeString(data)
}
}
for _, size := range sizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, size)
})
}
}