compress/lzw: reduce decoder buffer size from 3*4096 to 2*4096.

This happens to speed up the decoder benchmarks by 50% on my computer (GOARCH=amd64 GOOS=linux), but I don't have a good intuition as to why. For example, just adding an unused [4096]byte field to the decoder struct doesn't significantly change the numbers. Before: lzw.BenchmarkDecoder1e4 5000 488057 ns/op 20.49 MB/s lzw.BenchmarkDecoder1e5 500 4613638 ns/op 21.67 MB/s lzw.BenchmarkDecoder1e6 50 45672260 ns/op 21.90 MB/s lzw.BenchmarkEncoder1e4 5000 353563 ns/op 28.28 MB/s lzw.BenchmarkEncoder1e5 500 3431618 ns/op 29.14 MB/s lzw.BenchmarkEncoder1e6 50 34009640 ns/op 29.40 MB/s After: lzw.BenchmarkDecoder1e4 5000 339725 ns/op 29.44 MB/s lzw.BenchmarkDecoder1e5 500 3166894 ns/op 31.58 MB/s lzw.BenchmarkDecoder1e6 50 31317260 ns/op 31.93 MB/s lzw.BenchmarkEncoder1e4 5000 354909 ns/op 28.18 MB/s lzw.BenchmarkEncoder1e5 500 3432710 ns/op 29.13 MB/s lzw.BenchmarkEncoder1e6 50 34010500 ns/op 29.40 MB/s R=rsc, r CC=golang-dev https://golang.org/cl/4535123
2024-11-19 14:54:43 -07:00 · 2011-06-09 09:50:38 +10:00 · 2011-06-09 09:50:38 +10:00 · 833529fd6f
commit 833529fd6f
parent 9c436ab7dc
1 changed files with 11 additions and 10 deletions
--- a/src/pkg/compress/lzw/reader.go
+++ b/src/pkg/compress/lzw/reader.go
@ -64,13 +64,14 @@ type decoder struct {
 	// The c == hi case is a special case.
 	suffix [1 << maxWidth]uint8
 	prefix [1 << maxWidth]uint16
-	// buf is a scratch buffer for reconstituting the bytes that a code expands to.
-	// Code suffixes are written right-to-left from the end of the buffer.
-	buf [1 << maxWidth]byte

 	// output is the temporary output buffer.
+	// Literal codes are accumulated from the start of the buffer.
+	// Non-literal codes decode to a sequence of suffixes that are first
+	// written right-to-left from the end of the buffer before being copied
+	// to the start of the buffer.
 	// It is flushed when it contains >= 1<<maxWidth bytes,
-	// so that there is always room to copy buf into it while decoding.
+	// so that there is always room to decode an entire code.
 	output [2 * 1 << maxWidth]byte
 	o      int    // write index into output
 	toRead []byte // bytes to return from Read
@ -158,7 +159,7 @@ func (d *decoder) decode() {
 			d.err = os.EOF
 			return
 		case code <= d.hi:
-			c, i := code, len(d.buf)-1
+			c, i := code, len(d.output)-1
 			if code == d.hi {
 				// code == hi is a special case which expands to the last expansion
 				// followed by the head of the last expansion. To find the head, we walk
@ -167,18 +168,18 @@ func (d *decoder) decode() {
 				for c >= d.clear {
 					c = d.prefix[c]
 				}
-				d.buf[i] = uint8(c)
+				d.output[i] = uint8(c)
 				i--
 				c = d.last
 			}
-			// Copy the suffix chain into buf and then write that to w.
+			// Copy the suffix chain into output and then write that to w.
 			for c >= d.clear {
-				d.buf[i] = d.suffix[c]
+				d.output[i] = d.suffix[c]
 				i--
 				c = d.prefix[c]
 			}
-			d.buf[i] = uint8(c)
-			d.o += copy(d.output[d.o:], d.buf[i:])
+			d.output[i] = uint8(c)
+			d.o += copy(d.output[d.o:], d.output[i:])
 			if d.last != decoderInvalidCode {
 				// Save what the hi code expands to.
 				d.suffix[d.hi] = uint8(c)