1
0
mirror of https://github.com/golang/go synced 2024-11-23 18:10:04 -07:00

compress/bzip2: make decoding faster

Issue 6754 reports that Go bzip2 Decode function is much slower
(about 2.5x in go1.5) than the Python equivalent (which is
actually just a wrapper around the usual C library) on random data.

Profiling the code shows that half a dozen of CMP instructions in a
tight loop are responsibile for most of the execution time.

This patch reduces the number of branches of the loop, greatly
improving performance on random data and speeding up decoding of
real data.

name            old time/op    new time/op    delta
DecodeDigits-4    9.28ms ± 1%    8.05ms ± 1%  -13.18%  (p=0.000 n=15+14)
DecodeTwain-4     28.9ms ± 2%    26.4ms ± 1%   -8.57%  (p=0.000 n=15+14)
DecodeRand-4      3.94ms ± 1%    3.06ms ± 1%  -22.45%  (p=0.000 n=15+14)

name            old speed      new speed      delta
DecodeDigits-4  4.65MB/s ± 1%  5.36MB/s ± 1%  +15.21%  (p=0.000 n=13+14)
DecodeTwain-4   4.32MB/s ± 2%  4.72MB/s ± 1%   +9.36%  (p=0.000 n=15+14)
DecodeRand-4    4.27MB/s ± 1%  5.51MB/s ± 1%  +28.86%  (p=0.000 n=15+14)

I've run some benchmark comparing Go bzip2 implementation with the
usual Linux bzip2 command (which is written in C). On my machine
this patch brings go1.5
  from ~2.26x to ~1.50x of bzip2 time (on 64MB  random data)
  from ~1.70x to ~1.50x of bzip2 time (on 100MB english text)
  from ~2.00x to ~1.88x of bzip2 time (on 64MB  /dev/zero data)

Fixes #6754

Change-Id: I3cb12d2c0c2243c1617edef1edc88f05f91d26d1
Reviewed-on: https://go-review.googlesource.com/13853
Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
Alberto Donizetti 2015-08-23 17:15:07 +02:00 committed by Nigel Tao
parent 499845bfe0
commit 6403c957e0
4 changed files with 32 additions and 24 deletions

View File

@ -77,14 +77,6 @@ func (br *bitReader) ReadBit() bool {
return n != 0
}
func (br *bitReader) TryReadBit() (bit byte, ok bool) {
if br.bits > 0 {
br.bits--
return byte(br.n>>br.bits) & 1, true
}
return 0, false
}
func (br *bitReader) Err() error {
return br.err
}

View File

@ -173,6 +173,7 @@ const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c59
const (
digits = iota
twain
random
)
var testfiles = []string{
@ -182,6 +183,8 @@ var testfiles = []string{
digits: "testdata/e.txt.bz2",
// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
twain: "testdata/Mark.Twain-Tom.Sawyer.txt.bz2",
// 16KB of random data from /dev/urandom
random: "testdata/random.data.bz2",
}
func benchmarkDecode(b *testing.B, testfile int) {
@ -198,6 +201,7 @@ func benchmarkDecode(b *testing.B, testfile int) {
func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) }
func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) }
func TestBufferOverrun(t *testing.T) {
// Tests https://golang.org/issue/5747.

View File

@ -38,23 +38,35 @@ func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
for {
node := &t.nodes[nodeIndex]
bit, ok := br.TryReadBit()
if !ok && br.ReadBit() {
bit = 1
}
// bzip2 encodes left as a true bit.
if bit != 0 {
// left
if node.left == invalidNodeValue {
return node.leftValue
}
nodeIndex = node.left
var bit uint16
if br.bits > 0 {
// Get next bit - fast path.
br.bits--
bit = 0 - (uint16(br.n>>br.bits) & 1)
} else {
// right
if node.right == invalidNodeValue {
return node.rightValue
}
nodeIndex = node.right
// Get next bit - slow path.
// Use ReadBits to retrieve a single bit
// from the underling io.ByteReader.
bit = 0 - uint16(br.ReadBits(1))
}
// now
// bit = 0xffff if the next bit was 1
// bit = 0x0000 if the next bit was 0
// 1 means left, 0 means right.
//
// if bit == 0xffff {
// nodeIndex = node.left
// } else {
// nodeIndex = node.right
// }
nodeIndex = (bit & node.left) | (^bit & node.right)
if nodeIndex == invalidNodeValue {
// We found a leaf. Use the value of bit to decide
// whether is a left or a right value.
return (bit & node.leftValue) | (^bit & node.rightValue)
}
}
}

Binary file not shown.