mirror of
https://github.com/golang/go
synced 2024-11-23 18:10:04 -07:00
compress/bzip2: make decoding faster
Issue 6754 reports that Go bzip2 Decode function is much slower (about 2.5x in go1.5) than the Python equivalent (which is actually just a wrapper around the usual C library) on random data. Profiling the code shows that half a dozen of CMP instructions in a tight loop are responsibile for most of the execution time. This patch reduces the number of branches of the loop, greatly improving performance on random data and speeding up decoding of real data. name old time/op new time/op delta DecodeDigits-4 9.28ms ± 1% 8.05ms ± 1% -13.18% (p=0.000 n=15+14) DecodeTwain-4 28.9ms ± 2% 26.4ms ± 1% -8.57% (p=0.000 n=15+14) DecodeRand-4 3.94ms ± 1% 3.06ms ± 1% -22.45% (p=0.000 n=15+14) name old speed new speed delta DecodeDigits-4 4.65MB/s ± 1% 5.36MB/s ± 1% +15.21% (p=0.000 n=13+14) DecodeTwain-4 4.32MB/s ± 2% 4.72MB/s ± 1% +9.36% (p=0.000 n=15+14) DecodeRand-4 4.27MB/s ± 1% 5.51MB/s ± 1% +28.86% (p=0.000 n=15+14) I've run some benchmark comparing Go bzip2 implementation with the usual Linux bzip2 command (which is written in C). On my machine this patch brings go1.5 from ~2.26x to ~1.50x of bzip2 time (on 64MB random data) from ~1.70x to ~1.50x of bzip2 time (on 100MB english text) from ~2.00x to ~1.88x of bzip2 time (on 64MB /dev/zero data) Fixes #6754 Change-Id: I3cb12d2c0c2243c1617edef1edc88f05f91d26d1 Reviewed-on: https://go-review.googlesource.com/13853 Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
499845bfe0
commit
6403c957e0
@ -77,14 +77,6 @@ func (br *bitReader) ReadBit() bool {
|
||||
return n != 0
|
||||
}
|
||||
|
||||
func (br *bitReader) TryReadBit() (bit byte, ok bool) {
|
||||
if br.bits > 0 {
|
||||
br.bits--
|
||||
return byte(br.n>>br.bits) & 1, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (br *bitReader) Err() error {
|
||||
return br.err
|
||||
}
|
||||
|
@ -173,6 +173,7 @@ const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c59
|
||||
const (
|
||||
digits = iota
|
||||
twain
|
||||
random
|
||||
)
|
||||
|
||||
var testfiles = []string{
|
||||
@ -182,6 +183,8 @@ var testfiles = []string{
|
||||
digits: "testdata/e.txt.bz2",
|
||||
// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
|
||||
twain: "testdata/Mark.Twain-Tom.Sawyer.txt.bz2",
|
||||
// 16KB of random data from /dev/urandom
|
||||
random: "testdata/random.data.bz2",
|
||||
}
|
||||
|
||||
func benchmarkDecode(b *testing.B, testfile int) {
|
||||
@ -198,6 +201,7 @@ func benchmarkDecode(b *testing.B, testfile int) {
|
||||
|
||||
func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
|
||||
func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) }
|
||||
func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) }
|
||||
|
||||
func TestBufferOverrun(t *testing.T) {
|
||||
// Tests https://golang.org/issue/5747.
|
||||
|
@ -38,23 +38,35 @@ func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
|
||||
|
||||
for {
|
||||
node := &t.nodes[nodeIndex]
|
||||
bit, ok := br.TryReadBit()
|
||||
if !ok && br.ReadBit() {
|
||||
bit = 1
|
||||
}
|
||||
// bzip2 encodes left as a true bit.
|
||||
if bit != 0 {
|
||||
// left
|
||||
if node.left == invalidNodeValue {
|
||||
return node.leftValue
|
||||
}
|
||||
nodeIndex = node.left
|
||||
|
||||
var bit uint16
|
||||
if br.bits > 0 {
|
||||
// Get next bit - fast path.
|
||||
br.bits--
|
||||
bit = 0 - (uint16(br.n>>br.bits) & 1)
|
||||
} else {
|
||||
// right
|
||||
if node.right == invalidNodeValue {
|
||||
return node.rightValue
|
||||
}
|
||||
nodeIndex = node.right
|
||||
// Get next bit - slow path.
|
||||
// Use ReadBits to retrieve a single bit
|
||||
// from the underling io.ByteReader.
|
||||
bit = 0 - uint16(br.ReadBits(1))
|
||||
}
|
||||
// now
|
||||
// bit = 0xffff if the next bit was 1
|
||||
// bit = 0x0000 if the next bit was 0
|
||||
|
||||
// 1 means left, 0 means right.
|
||||
//
|
||||
// if bit == 0xffff {
|
||||
// nodeIndex = node.left
|
||||
// } else {
|
||||
// nodeIndex = node.right
|
||||
// }
|
||||
nodeIndex = (bit & node.left) | (^bit & node.right)
|
||||
|
||||
if nodeIndex == invalidNodeValue {
|
||||
// We found a leaf. Use the value of bit to decide
|
||||
// whether is a left or a right value.
|
||||
return (bit & node.leftValue) | (^bit & node.rightValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
BIN
src/compress/bzip2/testdata/random.data.bz2
vendored
Normal file
BIN
src/compress/bzip2/testdata/random.data.bz2
vendored
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user