From 833529fd6f5b1cc469a080980275ace3d43ade49 Mon Sep 17 00:00:00 2001
From: Nigel Tao <nigeltao@golang.org>
Date: Thu, 9 Jun 2011 09:50:38 +1000
Subject: [PATCH] compress/lzw: reduce decoder buffer size from 3*4096 to
 2*4096.

This happens to speed up the decoder benchmarks by 50% on my computer
(GOARCH=amd64 GOOS=linux), but I don't have a good intuition as to why.
For example, just adding an unused [4096]byte field to the decoder
struct doesn't significantly change the numbers.

Before:

lzw.BenchmarkDecoder1e4	    5000	    488057 ns/op	  20.49 MB/s
lzw.BenchmarkDecoder1e5	     500	   4613638 ns/op	  21.67 MB/s
lzw.BenchmarkDecoder1e6	      50	  45672260 ns/op	  21.90 MB/s
lzw.BenchmarkEncoder1e4	    5000	    353563 ns/op	  28.28 MB/s
lzw.BenchmarkEncoder1e5	     500	   3431618 ns/op	  29.14 MB/s
lzw.BenchmarkEncoder1e6	      50	  34009640 ns/op	  29.40 MB/s

After:

lzw.BenchmarkDecoder1e4	    5000	    339725 ns/op	  29.44 MB/s
lzw.BenchmarkDecoder1e5	     500	   3166894 ns/op	  31.58 MB/s
lzw.BenchmarkDecoder1e6	      50	  31317260 ns/op	  31.93 MB/s
lzw.BenchmarkEncoder1e4	    5000	    354909 ns/op	  28.18 MB/s
lzw.BenchmarkEncoder1e5	     500	   3432710 ns/op	  29.13 MB/s
lzw.BenchmarkEncoder1e6	      50	  34010500 ns/op	  29.40 MB/s

R=rsc, r
CC=golang-dev
https://golang.org/cl/4535123
---
 src/pkg/compress/lzw/reader.go | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go
index ccd882f88b..21231c8e51 100644
--- a/src/pkg/compress/lzw/reader.go
+++ b/src/pkg/compress/lzw/reader.go
@@ -64,13 +64,14 @@ type decoder struct {
 	// The c == hi case is a special case.
 	suffix [1 << maxWidth]uint8
 	prefix [1 << maxWidth]uint16
-	// buf is a scratch buffer for reconstituting the bytes that a code expands to.
-	// Code suffixes are written right-to-left from the end of the buffer.
-	buf [1 << maxWidth]byte
 
 	// output is the temporary output buffer.
+	// Literal codes are accumulated from the start of the buffer.
+	// Non-literal codes decode to a sequence of suffixes that are first
+	// written right-to-left from the end of the buffer before being copied
+	// to the start of the buffer.
 	// It is flushed when it contains >= 1<<maxWidth bytes,
-	// so that there is always room to copy buf into it while decoding.
+	// so that there is always room to decode an entire code.
 	output [2 * 1 << maxWidth]byte
 	o      int    // write index into output
 	toRead []byte // bytes to return from Read
@@ -158,7 +159,7 @@ func (d *decoder) decode() {
 			d.err = os.EOF
 			return
 		case code <= d.hi:
-			c, i := code, len(d.buf)-1
+			c, i := code, len(d.output)-1
 			if code == d.hi {
 				// code == hi is a special case which expands to the last expansion
 				// followed by the head of the last expansion. To find the head, we walk
@@ -167,18 +168,18 @@ func (d *decoder) decode() {
 				for c >= d.clear {
 					c = d.prefix[c]
 				}
-				d.buf[i] = uint8(c)
+				d.output[i] = uint8(c)
 				i--
 				c = d.last
 			}
-			// Copy the suffix chain into buf and then write that to w.
+			// Copy the suffix chain into output and then write that to w.
 			for c >= d.clear {
-				d.buf[i] = d.suffix[c]
+				d.output[i] = d.suffix[c]
 				i--
 				c = d.prefix[c]
 			}
-			d.buf[i] = uint8(c)
-			d.o += copy(d.output[d.o:], d.buf[i:])
+			d.output[i] = uint8(c)
+			d.o += copy(d.output[d.o:], d.output[i:])
 			if d.last != decoderInvalidCode {
 				// Save what the hi code expands to.
 				d.suffix[d.hi] = uint8(c)