mirror of
https://github.com/golang/go
synced 2024-11-19 05:04:43 -07:00
bzip2: improve performance
Improve performance of move-to-front by using cache-friendly copies instead of doubly-linked list. Simplify so that the underlying slice is the object. Remove the n=0 special case, which was actually slower with the copy approach. benchmark old ns/op new ns/op delta BenchmarkDecodeDigits 26429714 23859699 -9.72% BenchmarkDecodeTwain 76684510 67591946 -11.86% benchmark old MB/s new MB/s speedup BenchmarkDecodeDigits 1.63 1.81 1.11x BenchmarkDecodeTwain 1.63 1.85 1.13x Updates #6754. LGTM=adg, agl, josharian R=adg, agl, josharian CC=golang-codereviews https://golang.org/cl/131840043
This commit is contained in:
parent
523aa93288
commit
6d248cec56
@ -216,6 +216,44 @@ func TestOutOfRangeSelector(t *testing.T) {
|
|||||||
ioutil.ReadAll(decompressor)
|
ioutil.ReadAll(decompressor)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMTF(t *testing.T) {
|
||||||
|
mtf := newMTFDecoderWithRange(5)
|
||||||
|
|
||||||
|
// 0 1 2 3 4
|
||||||
|
expect := byte(1)
|
||||||
|
x := mtf.Decode(1)
|
||||||
|
if x != expect {
|
||||||
|
t.Errorf("expected %v, got %v", expect, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1 0 2 3 4
|
||||||
|
x = mtf.Decode(0)
|
||||||
|
if x != expect {
|
||||||
|
t.Errorf("expected %v, got %v", expect, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1 0 2 3 4
|
||||||
|
expect = byte(0)
|
||||||
|
x = mtf.Decode(1)
|
||||||
|
if x != expect {
|
||||||
|
t.Errorf("expected %v, got %v", expect, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 0 1 2 3 4
|
||||||
|
expect = byte(4)
|
||||||
|
x = mtf.Decode(4)
|
||||||
|
if x != expect {
|
||||||
|
t.Errorf("expected %v, got %v", expect, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4 0 1 2 3
|
||||||
|
expect = byte(0)
|
||||||
|
x = mtf.Decode(1)
|
||||||
|
if x != expect {
|
||||||
|
t.Errorf("expected %v, got %v", expect, x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var bufferOverrunBase64 string = `
|
var bufferOverrunBase64 string = `
|
||||||
QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
|
QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
|
||||||
////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD
|
////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD
|
||||||
|
@ -11,88 +11,43 @@ package bzip2
|
|||||||
// index into that list. When a symbol is referenced, it's moved to the front
|
// index into that list. When a symbol is referenced, it's moved to the front
|
||||||
// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
|
// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
|
||||||
// as the symbol will be at the front of the list after the first access.
|
// as the symbol will be at the front of the list after the first access.
|
||||||
type moveToFrontDecoder struct {
|
type moveToFrontDecoder []byte
|
||||||
// Rather than actually keep the list in memory, the symbols are stored
|
|
||||||
// as a circular, double linked list with the symbol indexed by head
|
|
||||||
// at the front of the list.
|
|
||||||
symbols [256]byte
|
|
||||||
next [256]uint8
|
|
||||||
prev [256]uint8
|
|
||||||
head uint8
|
|
||||||
len int
|
|
||||||
}
|
|
||||||
|
|
||||||
// newMTFDecoder creates a move-to-front decoder with an explicit initial list
|
// newMTFDecoder creates a move-to-front decoder with an explicit initial list
|
||||||
// of symbols.
|
// of symbols.
|
||||||
func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
|
func newMTFDecoder(symbols []byte) moveToFrontDecoder {
|
||||||
if len(symbols) > 256 {
|
if len(symbols) > 256 {
|
||||||
panic("too many symbols")
|
panic("too many symbols")
|
||||||
}
|
}
|
||||||
|
return moveToFrontDecoder(symbols)
|
||||||
m := new(moveToFrontDecoder)
|
|
||||||
copy(m.symbols[:], symbols)
|
|
||||||
m.len = len(symbols)
|
|
||||||
m.threadLinkedList()
|
|
||||||
return m
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// newMTFDecoderWithRange creates a move-to-front decoder with an initial
|
// newMTFDecoderWithRange creates a move-to-front decoder with an initial
|
||||||
// symbol list of 0...n-1.
|
// symbol list of 0...n-1.
|
||||||
func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
|
func newMTFDecoderWithRange(n int) moveToFrontDecoder {
|
||||||
if n > 256 {
|
if n > 256 {
|
||||||
panic("newMTFDecoderWithRange: cannot have > 256 symbols")
|
panic("newMTFDecoderWithRange: cannot have > 256 symbols")
|
||||||
}
|
}
|
||||||
|
|
||||||
m := new(moveToFrontDecoder)
|
m := make([]byte, n)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
m.symbols[byte(i)] = byte(i)
|
m[i] = byte(i)
|
||||||
}
|
}
|
||||||
m.len = n
|
return moveToFrontDecoder(m)
|
||||||
m.threadLinkedList()
|
|
||||||
return m
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// threadLinkedList creates the initial linked-list pointers.
|
func (m moveToFrontDecoder) Decode(n int) (b byte) {
|
||||||
func (m *moveToFrontDecoder) threadLinkedList() {
|
// Implement move-to-front with a simple copy. This approach
|
||||||
if m.len == 0 {
|
// beats more sophisticated approaches in benchmarking, probably
|
||||||
return
|
// because it has high locality of reference inside of a
|
||||||
}
|
// single cache line (most move-to-front operations have n < 64).
|
||||||
|
b = m[n]
|
||||||
m.prev[0] = uint8(m.len - 1)
|
copy(m[1:], m[:n])
|
||||||
|
m[0] = b
|
||||||
for i := byte(0); int(i) < m.len-1; i++ {
|
|
||||||
m.next[i] = uint8(i + 1)
|
|
||||||
m.prev[i+1] = uint8(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
m.next[m.len-1] = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *moveToFrontDecoder) Decode(n int) (b byte) {
|
|
||||||
// Most of the time, n will be zero so it's worth dealing with this
|
|
||||||
// simple case.
|
|
||||||
if n == 0 {
|
|
||||||
return m.symbols[m.head]
|
|
||||||
}
|
|
||||||
|
|
||||||
i := m.head
|
|
||||||
for j := 0; j < n; j++ {
|
|
||||||
i = m.next[i]
|
|
||||||
}
|
|
||||||
b = m.symbols[i]
|
|
||||||
|
|
||||||
m.next[m.prev[i]] = m.next[i]
|
|
||||||
m.prev[m.next[i]] = m.prev[i]
|
|
||||||
m.next[i] = m.head
|
|
||||||
m.prev[i] = m.prev[m.head]
|
|
||||||
m.next[m.prev[m.head]] = i
|
|
||||||
m.prev[m.head] = i
|
|
||||||
m.head = i
|
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// First returns the symbol at the front of the list.
|
// First returns the symbol at the front of the list.
|
||||||
func (m *moveToFrontDecoder) First() byte {
|
func (m moveToFrontDecoder) First() byte {
|
||||||
return m.symbols[m.head]
|
return m[0]
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user