compress/flate: make lazy matching work.

R=rsc, imkrasin CC=golang-dev https://golang.org/cl/5554066
2024-11-24 21:00:09 -07:00 · 2012-01-20 23:35:18 -05:00 · 2012-01-20 23:35:18 -05:00 · c4b16a3864
commit c4b16a3864
parent fb3b27329e
3 changed files with 8916 additions and 24 deletions
--- a/src/pkg/compress/flate/deflate.go
+++ b/src/pkg/compress/flate/deflate.go
@ -31,6 +31,8 @@ const (
 	hashSize            = 1 << hashBits
 	hashMask            = (1 << hashBits) - 1
 	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
+
+	skipNever = math.MaxInt32
 )

 type compressionLevel struct {
@ -45,12 +47,12 @@ var levels = []compressionLevel{
 	{3, 0, 32, 32, 6},
 	// Levels 4-9 use increasingly more lazy matching
 	// and increasingly stringent conditions for "good enough".
-	{4, 4, 16, 16, math.MaxInt32},
-	{8, 16, 32, 32, math.MaxInt32},
-	{8, 16, 128, 128, math.MaxInt32},
-	{8, 32, 128, 256, math.MaxInt32},
-	{32, 128, 258, 1024, math.MaxInt32},
-	{32, 258, 258, 4096, math.MaxInt32},
+	{4, 4, 16, 16, skipNever},
+	{8, 16, 32, 32, skipNever},
+	{8, 16, 128, 128, skipNever},
+	{8, 32, 128, 256, skipNever},
+	{32, 128, 258, 1024, skipNever},
+	{32, 258, 258, 4096, skipNever},
 }

 type compressor struct {
@ -100,7 +102,7 @@ func (d *compressor) fillDeflate(b []byte) int {
 		if d.blockStart >= windowSize {
 			d.blockStart -= windowSize
 		} else {
-			d.blockStart = math.MaxInt32
+			d.blockStart = skipNever
 		}
 		for i, h := range d.hashHead {
 			v := h - windowSize
@ -273,18 +275,18 @@ Loop:
 		}

 		if d.chainHead >= minIndex &&
-			(d.fastSkipHashing != 0 && lookahead > minMatchLength-1 ||
-				d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) {
+			(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
+				d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
 			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok {
 				d.length = newLength
 				d.offset = newOffset
 			}
 		}
-		if d.fastSkipHashing != 0 && d.length >= minMatchLength ||
-			d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength {
+		if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
+			d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			if d.fastSkipHashing != 0 {
+			if d.fastSkipHashing != skipNever {
 				d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))
 			} else {
 				d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
@ -296,10 +298,10 @@ Loop:
 			// table.
 			if d.length <= d.fastSkipHashing {
 				var newIndex int
-				if d.fastSkipHashing != 0 {
+				if d.fastSkipHashing != skipNever {
 					newIndex = d.index + d.length
 				} else {
-					newIndex = prevLength - 1
+					newIndex = d.index + prevLength - 1
 				}
 				for d.index++; d.index < newIndex; d.index++ {
 					if d.index < d.maxInsertIndex {
@ -311,7 +313,7 @@ Loop:
 						d.hashHead[d.hash] = d.index
 					}
 				}
-				if d.fastSkipHashing == 0 {
+				if d.fastSkipHashing == skipNever {
 					d.byteAvailable = false
 					d.length = minMatchLength - 1
 				}
@ -331,9 +333,9 @@ Loop:
 				d.ti = 0
 			}
 		} else {
-			if d.fastSkipHashing != 0 || d.byteAvailable {
+			if d.fastSkipHashing != skipNever || d.byteAvailable {
 				i := d.index - 1
-				if d.fastSkipHashing != 0 {
+				if d.fastSkipHashing != skipNever {
 					i = d.index
 				}
 				d.tokens[d.ti] = literalToken(uint32(d.window[i]))
@ -346,7 +348,7 @@ Loop:
 				}
 			}
 			d.index++
-			if d.fastSkipHashing == 0 {
+			if d.fastSkipHashing == skipNever {
 				d.byteAvailable = true
 			}
 		}
--- a/src/pkg/compress/flate/deflate_test.go
+++ b/src/pkg/compress/flate/deflate_test.go
@ -225,10 +225,17 @@ func testSync(t *testing.T, level int, input []byte, name string) {
 }

 func testToFromWithLevel(t *testing.T, level int, input []byte, name string) error {
+	return testToFromWithLevelAndLimit(t, level, input, name, -1)
+}
+
+func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name string, limit int) error {
 	buffer := bytes.NewBuffer(nil)
 	w := NewWriter(buffer, level)
 	w.Write(input)
 	w.Close()
+	if limit > 0 && buffer.Len() > limit {
+		t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
+	}
 	r := NewReader(buffer)
 	out, err := ioutil.ReadAll(r)
 	if err != nil {
@ -244,12 +251,16 @@ func testToFromWithLevel(t *testing.T, level int, input []byte, name string) err
 	return nil
 }

-func testToFrom(t *testing.T, input []byte, name string) {
+func testToFromWithLimit(t *testing.T, input []byte, name string, limit [10]int) {
 	for i := 0; i < 10; i++ {
-		testToFromWithLevel(t, i, input, name)
+		testToFromWithLevelAndLimit(t, i, input, name, limit[i])
 	}
 }

+func testToFrom(t *testing.T, input []byte, name string) {
+	testToFromWithLimit(t, input, name, [10]int{})
+}
+
 func TestDeflateInflate(t *testing.T) {
 	for i, h := range deflateInflateTests {
 		testToFrom(t, h.in, fmt.Sprintf("#%d", i))
@ -265,12 +276,33 @@ func TestReverseBits(t *testing.T) {
 	}
 }

+type deflateInflateStringTest struct {
+	filename string
+	label    string
+	limit    [10]int
+}
+
+var deflateInflateStringTests = []deflateInflateStringTest{
+	{
+		"../testdata/e.txt",
+		"2.718281828...",
+		[...]int{10013, 5065, 5096, 5115, 5093, 5079, 5079, 5079, 5079, 5079},
+	},
+	{
+		"../testdata/Mark.Twain-Tom.Sawyer.txt",
+		"Mark.Twain-Tom.Sawyer",
+		[...]int{416188, 191483, 185232, 179560, 175233, 171263, 169908, 169758, 169712, 169712},
+	},
+}
+
 func TestDeflateInflateString(t *testing.T) {
-	gold, err := ioutil.ReadFile("../testdata/e.txt")
-	if err != nil {
-		t.Error(err)
+	for _, test := range deflateInflateStringTests {
+		gold, err := ioutil.ReadFile(test.filename)
+		if err != nil {
+			t.Error(err)
+		}
+		testToFromWithLimit(t, gold, test.label, test.limit)
 	}
-	testToFromWithLevel(t, 1, gold, "2.718281828...")
 }

 func TestReaderDict(t *testing.T) {
--- a/src/pkg/compress/testdata/Mark.Twain-Tom.Sawyer.txt
+++ b/src/pkg/compress/testdata/Mark.Twain-Tom.Sawyer.txt