diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go index 6b8711b8116..fb79d089eb3 100644 --- a/src/pkg/compress/bzip2/bzip2_test.go +++ b/src/pkg/compress/bzip2/bzip2_test.go @@ -216,6 +216,44 @@ func TestOutOfRangeSelector(t *testing.T) { ioutil.ReadAll(decompressor) } +func TestMTF(t *testing.T) { + mtf := newMTFDecoderWithRange(5) + + // 0 1 2 3 4 + expect := byte(1) + x := mtf.Decode(1) + if x != expect { + t.Errorf("expected %v, got %v", expect, x) + } + + // 1 0 2 3 4 + x = mtf.Decode(0) + if x != expect { + t.Errorf("expected %v, got %v", expect, x) + } + + // 1 0 2 3 4 + expect = byte(0) + x = mtf.Decode(1) + if x != expect { + t.Errorf("expected %v, got %v", expect, x) + } + + // 0 1 2 3 4 + expect = byte(4) + x = mtf.Decode(4) + if x != expect { + t.Errorf("expected %v, got %v", expect, x) + } + + // 4 0 1 2 3 + expect = byte(0) + x = mtf.Decode(1) + if x != expect { + t.Errorf("expected %v, got %v", expect, x) + } +} + var bufferOverrunBase64 string = ` QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3/// ////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go index b7e75a700a1..526dfb34cc0 100644 --- a/src/pkg/compress/bzip2/move_to_front.go +++ b/src/pkg/compress/bzip2/move_to_front.go @@ -11,88 +11,43 @@ package bzip2 // index into that list. When a symbol is referenced, it's moved to the front // of the list. Thus, a repeated symbol ends up being encoded with many zeros, // as the symbol will be at the front of the list after the first access. -type moveToFrontDecoder struct { - // Rather than actually keep the list in memory, the symbols are stored - // as a circular, double linked list with the symbol indexed by head - // at the front of the list. - symbols [256]byte - next [256]uint8 - prev [256]uint8 - head uint8 - len int -} +type moveToFrontDecoder []byte // newMTFDecoder creates a move-to-front decoder with an explicit initial list // of symbols. -func newMTFDecoder(symbols []byte) *moveToFrontDecoder { +func newMTFDecoder(symbols []byte) moveToFrontDecoder { if len(symbols) > 256 { panic("too many symbols") } - - m := new(moveToFrontDecoder) - copy(m.symbols[:], symbols) - m.len = len(symbols) - m.threadLinkedList() - return m + return moveToFrontDecoder(symbols) } // newMTFDecoderWithRange creates a move-to-front decoder with an initial // symbol list of 0...n-1. -func newMTFDecoderWithRange(n int) *moveToFrontDecoder { +func newMTFDecoderWithRange(n int) moveToFrontDecoder { if n > 256 { panic("newMTFDecoderWithRange: cannot have > 256 symbols") } - m := new(moveToFrontDecoder) + m := make([]byte, n) for i := 0; i < n; i++ { - m.symbols[byte(i)] = byte(i) + m[i] = byte(i) } - m.len = n - m.threadLinkedList() - return m + return moveToFrontDecoder(m) } -// threadLinkedList creates the initial linked-list pointers. -func (m *moveToFrontDecoder) threadLinkedList() { - if m.len == 0 { - return - } - - m.prev[0] = uint8(m.len - 1) - - for i := byte(0); int(i) < m.len-1; i++ { - m.next[i] = uint8(i + 1) - m.prev[i+1] = uint8(i) - } - - m.next[m.len-1] = 0 -} - -func (m *moveToFrontDecoder) Decode(n int) (b byte) { - // Most of the time, n will be zero so it's worth dealing with this - // simple case. - if n == 0 { - return m.symbols[m.head] - } - - i := m.head - for j := 0; j < n; j++ { - i = m.next[i] - } - b = m.symbols[i] - - m.next[m.prev[i]] = m.next[i] - m.prev[m.next[i]] = m.prev[i] - m.next[i] = m.head - m.prev[i] = m.prev[m.head] - m.next[m.prev[m.head]] = i - m.prev[m.head] = i - m.head = i - +func (m moveToFrontDecoder) Decode(n int) (b byte) { + // Implement move-to-front with a simple copy. This approach + // beats more sophisticated approaches in benchmarking, probably + // because it has high locality of reference inside of a + // single cache line (most move-to-front operations have n < 64). + b = m[n] + copy(m[1:], m[:n]) + m[0] = b return } // First returns the symbol at the front of the list. -func (m *moveToFrontDecoder) First() byte { - return m.symbols[m.head] +func (m moveToFrontDecoder) First() byte { + return m[0] }