From 833529fd6f5b1cc469a080980275ace3d43ade49 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Thu, 9 Jun 2011 09:50:38 +1000 Subject: [PATCH] compress/lzw: reduce decoder buffer size from 3*4096 to 2*4096. This happens to speed up the decoder benchmarks by 50% on my computer (GOARCH=amd64 GOOS=linux), but I don't have a good intuition as to why. For example, just adding an unused [4096]byte field to the decoder struct doesn't significantly change the numbers. Before: lzw.BenchmarkDecoder1e4 5000 488057 ns/op 20.49 MB/s lzw.BenchmarkDecoder1e5 500 4613638 ns/op 21.67 MB/s lzw.BenchmarkDecoder1e6 50 45672260 ns/op 21.90 MB/s lzw.BenchmarkEncoder1e4 5000 353563 ns/op 28.28 MB/s lzw.BenchmarkEncoder1e5 500 3431618 ns/op 29.14 MB/s lzw.BenchmarkEncoder1e6 50 34009640 ns/op 29.40 MB/s After: lzw.BenchmarkDecoder1e4 5000 339725 ns/op 29.44 MB/s lzw.BenchmarkDecoder1e5 500 3166894 ns/op 31.58 MB/s lzw.BenchmarkDecoder1e6 50 31317260 ns/op 31.93 MB/s lzw.BenchmarkEncoder1e4 5000 354909 ns/op 28.18 MB/s lzw.BenchmarkEncoder1e5 500 3432710 ns/op 29.13 MB/s lzw.BenchmarkEncoder1e6 50 34010500 ns/op 29.40 MB/s R=rsc, r CC=golang-dev https://golang.org/cl/4535123 --- src/pkg/compress/lzw/reader.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go index ccd882f88b..21231c8e51 100644 --- a/src/pkg/compress/lzw/reader.go +++ b/src/pkg/compress/lzw/reader.go @@ -64,13 +64,14 @@ type decoder struct { // The c == hi case is a special case. suffix [1 << maxWidth]uint8 prefix [1 << maxWidth]uint16 - // buf is a scratch buffer for reconstituting the bytes that a code expands to. - // Code suffixes are written right-to-left from the end of the buffer. - buf [1 << maxWidth]byte // output is the temporary output buffer. + // Literal codes are accumulated from the start of the buffer. + // Non-literal codes decode to a sequence of suffixes that are first + // written right-to-left from the end of the buffer before being copied + // to the start of the buffer. // It is flushed when it contains >= 1<= d.clear { c = d.prefix[c] } - d.buf[i] = uint8(c) + d.output[i] = uint8(c) i-- c = d.last } - // Copy the suffix chain into buf and then write that to w. + // Copy the suffix chain into output and then write that to w. for c >= d.clear { - d.buf[i] = d.suffix[c] + d.output[i] = d.suffix[c] i-- c = d.prefix[c] } - d.buf[i] = uint8(c) - d.o += copy(d.output[d.o:], d.buf[i:]) + d.output[i] = uint8(c) + d.o += copy(d.output[d.o:], d.output[i:]) if d.last != decoderInvalidCode { // Save what the hi code expands to. d.suffix[d.hi] = uint8(c)