From 27d0d849fe7802794c74d049f5e8e1b0be018f9a Mon Sep 17 00:00:00 2001
From: Nigel Tao <nigeltao@golang.org>
Date: Thu, 5 May 2016 06:43:13 +1000
Subject: [PATCH] compress/flate: distinguish between base and min match
 length.

Change-Id: I93db5cd86e3fb568e4444cad95268ba4a02ce8a0
Reviewed-on: https://go-review.googlesource.com/22787
Reviewed-by: Nigel Tao <nigeltao@golang.org>
---
 src/compress/flate/deflate.go     | 22 +++++++++++++++-------
 src/compress/flate/deflatefast.go | 10 +++++-----
 src/compress/flate/inflate.go     |  9 ++++-----
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go
index f7ba02fe4e..8467d7749d 100644
--- a/src/compress/flate/deflate.go
+++ b/src/compress/flate/deflate.go
@@ -19,12 +19,20 @@ const (
 	logWindowSize      = 15
 	windowSize         = 1 << logWindowSize
 	windowMask         = windowSize - 1
-	logMaxOffsetSize   = 15  // Standard DEFLATE
-	minMatchLength     = 4   // The smallest match that the compressor looks for
-	maxMatchLength     = 258 // The longest match for the compressor
-	minOffsetSize      = 1   // The shortest offset that makes any sense
 
-	// The maximum number of tokens we put into a single flat block, just to
+	// The LZ77 step produces a sequence of literal tokens and <length, offset>
+	// pair tokens. The offset is also known as distance. The underlying wire
+	// format limits the range of lengths and offsets. For example, there are
+	// 256 legitimate lengths: those in the range [3, 258]. This package's
+	// compressor uses a higher minimum match length, enabling optimizations
+	// such as finding matches via 32-bit loads and compares.
+	baseMatchLength = 3       // The smallest match length per the RFC section 3.2.5
+	minMatchLength  = 4       // The smallest match length that the compressor actually emits
+	maxMatchLength  = 258     // The largest match length
+	baseMatchOffset = 1       // The smallest match offset
+	maxMatchOffset  = 1 << 15 // The largest match offset
+
+	// The maximum number of tokens we put into a single flate block, just to
 	// stop things from getting too large.
 	maxFlateBlockTokens = 1 << 14
 	maxStoreBlockSize   = 65535
@@ -424,9 +432,9 @@ Loop:
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
 			if d.fastSkipHashing != skipNever {
-				d.tokens = append(d.tokens, matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)))
+				d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
 			} else {
-				d.tokens = append(d.tokens, matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)))
+				d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
 			}
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
diff --git a/src/compress/flate/deflatefast.go b/src/compress/flate/deflatefast.go
index 6cff27c00a..6b881a477c 100644
--- a/src/compress/flate/deflatefast.go
+++ b/src/compress/flate/deflatefast.go
@@ -8,8 +8,6 @@ package flate
 // based on Snappy's LZ77-style encoder: github.com/golang/snappy
 
 const (
-	maxOffset = 1 << logMaxOffsetSize // Maximum deflate offset.
-
 	tableBits  = 14             // Bits used in the table.
 	tableSize  = 1 << tableBits // Size of the table.
 	tableMask  = tableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
@@ -97,7 +95,8 @@ func encodeBestSpeed(dst []token, src []byte) []token {
 			candidate = int(table[nextHash&tableMask])
 			table[nextHash&tableMask] = uint16(s)
 			nextHash = hash(load32(src, nextS))
-			if s-candidate < maxOffset && load32(src, s) == load32(src, candidate) {
+			// TODO: < should be <=, and add a test for that.
+			if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
 				break
 			}
 		}
@@ -133,7 +132,7 @@ func encodeBestSpeed(dst []token, src []byte) []token {
 			}
 
 			// matchToken is flate's equivalent of Snappy's emitCopy.
-			dst = append(dst, matchToken(uint32(s-base-3), uint32(base-candidate-minOffsetSize)))
+			dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)))
 			nextEmit = s
 			if s >= sLimit {
 				goto emitRemainder
@@ -151,7 +150,8 @@ func encodeBestSpeed(dst []token, src []byte) []token {
 			currHash := hash(uint32(x >> 8))
 			candidate = int(table[currHash&tableMask])
 			table[currHash&tableMask] = uint16(s)
-			if s-candidate >= maxOffset || uint32(x>>8) != load32(src, candidate) {
+			// TODO: >= should be >, and add a test for that.
+			if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
 				nextHash = hash(uint32(x >> 16))
 				s++
 				break
diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go
index d5f55eab34..c1a4b60cd7 100644
--- a/src/compress/flate/inflate.go
+++ b/src/compress/flate/inflate.go
@@ -15,8 +15,7 @@ import (
 )
 
 const (
-	maxCodeLen = 16    // max length of Huffman code
-	maxHist    = 32768 // max history required
+	maxCodeLen = 16 // max length of Huffman code
 	// The next three numbers come from the RFC section 3.2.7, with the
 	// additional proviso in section 3.2.5 which implies that distance codes
 	// 30 and 31 should never occur in compressed data.
@@ -767,7 +766,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
 		dict:     f.dict,
 		step:     (*decompressor).nextBlock,
 	}
-	f.dict.init(maxHist, nil)
+	f.dict.init(maxMatchOffset, nil)
 	return nil
 }
 
@@ -787,7 +786,7 @@ func NewReader(r io.Reader) io.ReadCloser {
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
 	f.step = (*decompressor).nextBlock
-	f.dict.init(maxHist, nil)
+	f.dict.init(maxMatchOffset, nil)
 	return &f
 }
 
@@ -806,6 +805,6 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
 	f.step = (*decompressor).nextBlock
-	f.dict.init(maxHist, dict)
+	f.dict.init(maxMatchOffset, dict)
 	return &f
 }