mirror of
https://github.com/golang/go
synced 2024-11-21 22:34:48 -07:00
bzip2: speed up decompression.
This borrows a trick from the bzip2 source and effects a decent speed up when decompressing highly compressed sources. Rather than unshuffle the BTW block when performing the IBTW, a linked-list is threaded through the array, in place. This improves cache hit rates. R=bradfitzgo, bradfitzwork, cw CC=golang-dev https://golang.org/cl/4247047
This commit is contained in:
parent
7483c6ee3c
commit
e7fa307902
@ -30,14 +30,15 @@ type reader struct {
|
|||||||
blockSize int // blockSize in bytes, i.e. 900 * 1024.
|
blockSize int // blockSize in bytes, i.e. 900 * 1024.
|
||||||
eof bool
|
eof bool
|
||||||
buf []byte // stores Burrows-Wheeler transformed data.
|
buf []byte // stores Burrows-Wheeler transformed data.
|
||||||
rle []byte // stores the RLE compressed data.
|
c [256]uint // the `C' array for the inverse BWT.
|
||||||
c [256]uint // the `C' and `P' arrays for the inverse BWT.
|
tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
|
||||||
p []uint
|
tPos uint32 // Index of the next output byte in tt.
|
||||||
|
|
||||||
preRLE []byte // contains the RLE data still to be processed.
|
preRLE []uint32 // contains the RLE data still to be processed.
|
||||||
lastByte int // the last byte value seen.
|
preRLEUsed int // number of entries of preRLE used.
|
||||||
byteRepeats uint // the number of repeats of lastByte seen.
|
lastByte int // the last byte value seen.
|
||||||
repeats uint // the number of copies of lastByte to output.
|
byteRepeats uint // the number of repeats of lastByte seen.
|
||||||
|
repeats uint // the number of copies of lastByte to output.
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewReader returns an io.Reader which decompresses bzip2 data from r.
|
// NewReader returns an io.Reader which decompresses bzip2 data from r.
|
||||||
@ -71,9 +72,7 @@ func (bz2 *reader) setup() os.Error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bz2.blockSize = 100 * 1024 * (int(level) - '0')
|
bz2.blockSize = 100 * 1024 * (int(level) - '0')
|
||||||
bz2.buf = make([]byte, bz2.blockSize)
|
bz2.tt = make([]uint32, bz2.blockSize)
|
||||||
bz2.rle = make([]byte, bz2.blockSize)
|
|
||||||
bz2.p = make([]uint, bz2.blockSize)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,7 +109,7 @@ func (bz2 *reader) read(buf []byte) (n int, err os.Error) {
|
|||||||
// maximum expansion. Thus we process blocks all at once, except for
|
// maximum expansion. Thus we process blocks all at once, except for
|
||||||
// the RLE which we decompress as required.
|
// the RLE which we decompress as required.
|
||||||
|
|
||||||
for (bz2.repeats > 0 || len(bz2.preRLE) > 0) && n < len(buf) {
|
for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
|
||||||
// We have RLE data pending.
|
// We have RLE data pending.
|
||||||
|
|
||||||
// The run-length encoding works like this:
|
// The run-length encoding works like this:
|
||||||
@ -130,8 +129,10 @@ func (bz2 *reader) read(buf []byte) (n int, err os.Error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
b := bz2.preRLE[0]
|
bz2.tPos = bz2.preRLE[bz2.tPos]
|
||||||
bz2.preRLE = bz2.preRLE[1:]
|
b := byte(bz2.tPos)
|
||||||
|
bz2.tPos >>= 8
|
||||||
|
bz2.preRLEUsed++
|
||||||
|
|
||||||
if bz2.byteRepeats == 3 {
|
if bz2.byteRepeats == 3 {
|
||||||
bz2.repeats = uint(b)
|
bz2.repeats = uint(b)
|
||||||
@ -306,6 +307,12 @@ func (bz2 *reader) readBlock() (err os.Error) {
|
|||||||
}
|
}
|
||||||
repeat += repeat_power << v
|
repeat += repeat_power << v
|
||||||
repeat_power <<= 1
|
repeat_power <<= 1
|
||||||
|
|
||||||
|
// This limit of 2 million comes from the bzip2 source
|
||||||
|
// code. It prevents repeat from overflowing.
|
||||||
|
if repeat > 2*1024*1024 {
|
||||||
|
return StructuralError("repeat count too large")
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -314,8 +321,7 @@ func (bz2 *reader) readBlock() (err os.Error) {
|
|||||||
// replicate the last output symbol.
|
// replicate the last output symbol.
|
||||||
for i := 0; i < repeat; i++ {
|
for i := 0; i < repeat; i++ {
|
||||||
b := byte(mtf.First())
|
b := byte(mtf.First())
|
||||||
bz2.buf[bufIndex] = b
|
bz2.tt[bufIndex] = uint32(b)
|
||||||
bz2.p[bufIndex] = bz2.c[b]
|
|
||||||
bz2.c[b]++
|
bz2.c[b]++
|
||||||
bufIndex++
|
bufIndex++
|
||||||
}
|
}
|
||||||
@ -336,16 +342,20 @@ func (bz2 *reader) readBlock() (err os.Error) {
|
|||||||
// doesn't need to be encoded and we have |v-1| in the next
|
// doesn't need to be encoded and we have |v-1| in the next
|
||||||
// line.
|
// line.
|
||||||
b := byte(mtf.Decode(int(v - 1)))
|
b := byte(mtf.Decode(int(v - 1)))
|
||||||
bz2.buf[bufIndex] = b
|
bz2.tt[bufIndex] = uint32(b)
|
||||||
bz2.p[bufIndex] = bz2.c[b]
|
|
||||||
bz2.c[b]++
|
bz2.c[b]++
|
||||||
bufIndex++
|
bufIndex++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if origPtr >= uint(bufIndex) {
|
||||||
|
return StructuralError("origPtr out of bounds")
|
||||||
|
}
|
||||||
|
|
||||||
// We have completed the entropy decoding. Now we can perform the
|
// We have completed the entropy decoding. Now we can perform the
|
||||||
// inverse BWT and setup the RLE buffer.
|
// inverse BWT and setup the RLE buffer.
|
||||||
inverseBWT(bz2.rle, bz2.buf[:bufIndex], origPtr, bz2.c[:], bz2.p[:bufIndex])
|
bz2.preRLE = bz2.tt[:bufIndex]
|
||||||
bz2.preRLE = bz2.rle[:bufIndex]
|
bz2.preRLEUsed = 0
|
||||||
|
bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
|
||||||
bz2.lastByte = -1
|
bz2.lastByte = -1
|
||||||
bz2.byteRepeats = 0
|
bz2.byteRepeats = 0
|
||||||
bz2.repeats = 0
|
bz2.repeats = 0
|
||||||
@ -355,19 +365,26 @@ func (bz2 *reader) readBlock() (err os.Error) {
|
|||||||
|
|
||||||
// inverseBWT implements the inverse Burrows-Wheeler transform as described in
|
// inverseBWT implements the inverse Burrows-Wheeler transform as described in
|
||||||
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
|
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
|
||||||
// In that document, origPtr is called `I' and c and p are the `C' and `P'
|
// In that document, origPtr is called `I' and c is the `C' array after the
|
||||||
// arrays after the first pass over the data. They are arguments here because
|
// first pass over the data. It's an argument here because we merge the first
|
||||||
// we merge the first pass with the Huffman decoding.
|
// pass with the Huffman decoding.
|
||||||
func inverseBWT(out, in []byte, origPtr uint, c, p []uint) {
|
//
|
||||||
|
// This also implements the `single array' method from the bzip2 source code
|
||||||
|
// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
|
||||||
|
// index of the next byte in the top 24-bits. The index of the first byte is
|
||||||
|
// returned.
|
||||||
|
func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
|
||||||
sum := uint(0)
|
sum := uint(0)
|
||||||
for i := 0; i < 256; i++ {
|
for i := 0; i < 256; i++ {
|
||||||
sum += c[i]
|
sum += c[i]
|
||||||
c[i] = sum - c[i]
|
c[i] = sum - c[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
i := origPtr
|
for i := range tt {
|
||||||
for j := len(in) - 1; j >= 0; j-- {
|
b := tt[i] & 0xff
|
||||||
out[j] = in[i]
|
tt[c[b]] |= uint32(i) << 8
|
||||||
i = p[i] + c[in[i]]
|
c[b]++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return tt[origPtr] >> 8
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user