mirror of
https://github.com/golang/go
synced 2024-11-24 05:40:15 -07:00
compress/flate: distinguish between base and min match length.
Change-Id: I93db5cd86e3fb568e4444cad95268ba4a02ce8a0 Reviewed-on: https://go-review.googlesource.com/22787 Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
1f4f2d0d39
commit
27d0d849fe
@ -19,12 +19,20 @@ const (
|
|||||||
logWindowSize = 15
|
logWindowSize = 15
|
||||||
windowSize = 1 << logWindowSize
|
windowSize = 1 << logWindowSize
|
||||||
windowMask = windowSize - 1
|
windowMask = windowSize - 1
|
||||||
logMaxOffsetSize = 15 // Standard DEFLATE
|
|
||||||
minMatchLength = 4 // The smallest match that the compressor looks for
|
|
||||||
maxMatchLength = 258 // The longest match for the compressor
|
|
||||||
minOffsetSize = 1 // The shortest offset that makes any sense
|
|
||||||
|
|
||||||
// The maximum number of tokens we put into a single flat block, just to
|
// The LZ77 step produces a sequence of literal tokens and <length, offset>
|
||||||
|
// pair tokens. The offset is also known as distance. The underlying wire
|
||||||
|
// format limits the range of lengths and offsets. For example, there are
|
||||||
|
// 256 legitimate lengths: those in the range [3, 258]. This package's
|
||||||
|
// compressor uses a higher minimum match length, enabling optimizations
|
||||||
|
// such as finding matches via 32-bit loads and compares.
|
||||||
|
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
|
||||||
|
minMatchLength = 4 // The smallest match length that the compressor actually emits
|
||||||
|
maxMatchLength = 258 // The largest match length
|
||||||
|
baseMatchOffset = 1 // The smallest match offset
|
||||||
|
maxMatchOffset = 1 << 15 // The largest match offset
|
||||||
|
|
||||||
|
// The maximum number of tokens we put into a single flate block, just to
|
||||||
// stop things from getting too large.
|
// stop things from getting too large.
|
||||||
maxFlateBlockTokens = 1 << 14
|
maxFlateBlockTokens = 1 << 14
|
||||||
maxStoreBlockSize = 65535
|
maxStoreBlockSize = 65535
|
||||||
@ -424,9 +432,9 @@ Loop:
|
|||||||
// There was a match at the previous step, and the current match is
|
// There was a match at the previous step, and the current match is
|
||||||
// not better. Output the previous match.
|
// not better. Output the previous match.
|
||||||
if d.fastSkipHashing != skipNever {
|
if d.fastSkipHashing != skipNever {
|
||||||
d.tokens = append(d.tokens, matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)))
|
d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
|
||||||
} else {
|
} else {
|
||||||
d.tokens = append(d.tokens, matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)))
|
d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
|
||||||
}
|
}
|
||||||
// Insert in the hash table all strings up to the end of the match.
|
// Insert in the hash table all strings up to the end of the match.
|
||||||
// index and index-1 are already inserted. If there is not enough
|
// index and index-1 are already inserted. If there is not enough
|
||||||
|
@ -8,8 +8,6 @@ package flate
|
|||||||
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
|
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
|
||||||
|
|
||||||
const (
|
const (
|
||||||
maxOffset = 1 << logMaxOffsetSize // Maximum deflate offset.
|
|
||||||
|
|
||||||
tableBits = 14 // Bits used in the table.
|
tableBits = 14 // Bits used in the table.
|
||||||
tableSize = 1 << tableBits // Size of the table.
|
tableSize = 1 << tableBits // Size of the table.
|
||||||
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
|
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
|
||||||
@ -97,7 +95,8 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||||||
candidate = int(table[nextHash&tableMask])
|
candidate = int(table[nextHash&tableMask])
|
||||||
table[nextHash&tableMask] = uint16(s)
|
table[nextHash&tableMask] = uint16(s)
|
||||||
nextHash = hash(load32(src, nextS))
|
nextHash = hash(load32(src, nextS))
|
||||||
if s-candidate < maxOffset && load32(src, s) == load32(src, candidate) {
|
// TODO: < should be <=, and add a test for that.
|
||||||
|
if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -133,7 +132,7 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// matchToken is flate's equivalent of Snappy's emitCopy.
|
// matchToken is flate's equivalent of Snappy's emitCopy.
|
||||||
dst = append(dst, matchToken(uint32(s-base-3), uint32(base-candidate-minOffsetSize)))
|
dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)))
|
||||||
nextEmit = s
|
nextEmit = s
|
||||||
if s >= sLimit {
|
if s >= sLimit {
|
||||||
goto emitRemainder
|
goto emitRemainder
|
||||||
@ -151,7 +150,8 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||||||
currHash := hash(uint32(x >> 8))
|
currHash := hash(uint32(x >> 8))
|
||||||
candidate = int(table[currHash&tableMask])
|
candidate = int(table[currHash&tableMask])
|
||||||
table[currHash&tableMask] = uint16(s)
|
table[currHash&tableMask] = uint16(s)
|
||||||
if s-candidate >= maxOffset || uint32(x>>8) != load32(src, candidate) {
|
// TODO: >= should be >, and add a test for that.
|
||||||
|
if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
|
||||||
nextHash = hash(uint32(x >> 16))
|
nextHash = hash(uint32(x >> 16))
|
||||||
s++
|
s++
|
||||||
break
|
break
|
||||||
|
@ -16,7 +16,6 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
maxCodeLen = 16 // max length of Huffman code
|
maxCodeLen = 16 // max length of Huffman code
|
||||||
maxHist = 32768 // max history required
|
|
||||||
// The next three numbers come from the RFC section 3.2.7, with the
|
// The next three numbers come from the RFC section 3.2.7, with the
|
||||||
// additional proviso in section 3.2.5 which implies that distance codes
|
// additional proviso in section 3.2.5 which implies that distance codes
|
||||||
// 30 and 31 should never occur in compressed data.
|
// 30 and 31 should never occur in compressed data.
|
||||||
@ -767,7 +766,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
|
|||||||
dict: f.dict,
|
dict: f.dict,
|
||||||
step: (*decompressor).nextBlock,
|
step: (*decompressor).nextBlock,
|
||||||
}
|
}
|
||||||
f.dict.init(maxHist, nil)
|
f.dict.init(maxMatchOffset, nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -787,7 +786,7 @@ func NewReader(r io.Reader) io.ReadCloser {
|
|||||||
f.bits = new([maxNumLit + maxNumDist]int)
|
f.bits = new([maxNumLit + maxNumDist]int)
|
||||||
f.codebits = new([numCodes]int)
|
f.codebits = new([numCodes]int)
|
||||||
f.step = (*decompressor).nextBlock
|
f.step = (*decompressor).nextBlock
|
||||||
f.dict.init(maxHist, nil)
|
f.dict.init(maxMatchOffset, nil)
|
||||||
return &f
|
return &f
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -806,6 +805,6 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
|
|||||||
f.bits = new([maxNumLit + maxNumDist]int)
|
f.bits = new([maxNumLit + maxNumDist]int)
|
||||||
f.codebits = new([numCodes]int)
|
f.codebits = new([numCodes]int)
|
||||||
f.step = (*decompressor).nextBlock
|
f.step = (*decompressor).nextBlock
|
||||||
f.dict.init(maxHist, dict)
|
f.dict.init(maxMatchOffset, dict)
|
||||||
return &f
|
return &f
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user