From ec837ad73c20d9b33d8aea9d79ce68bf95598544 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 14 Aug 2013 23:21:57 -0700 Subject: [PATCH] archive/zip: speed up Zip64 test Took 76 seconds or so before. By avoiding flate and crc32 on 4GB of data, it's now only 12 seconds. Still a slow test, but not painful to run anymore when you forget -short. R=golang-dev, adg CC=golang-dev https://golang.org/cl/12950043 --- src/pkg/archive/zip/zip_test.go | 162 +++++++++++++++++++++++++++++--- 1 file changed, 151 insertions(+), 11 deletions(-) diff --git a/src/pkg/archive/zip/zip_test.go b/src/pkg/archive/zip/zip_test.go index 870f0431442..32a16a79efb 100644 --- a/src/pkg/archive/zip/zip_test.go +++ b/src/pkg/archive/zip/zip_test.go @@ -9,8 +9,10 @@ package zip import ( "bytes" "fmt" + "hash" "io" "io/ioutil" + "sort" "strings" "testing" "time" @@ -105,29 +107,156 @@ func TestFileHeaderRoundTrip64(t *testing.T) { testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t) } +type repeatedByte struct { + off int64 + b byte + n int64 +} + +// rleBuffer is a run-length-encoded byte buffer. +// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt, +// allowing random-access reads. +type rleBuffer struct { + buf []repeatedByte +} + +func (r *rleBuffer) Size() int64 { + if len(r.buf) == 0 { + return 0 + } + last := &r.buf[len(r.buf)-1] + return last.off + last.n +} + +func (r *rleBuffer) Write(p []byte) (n int, err error) { + var rp *repeatedByte + if len(r.buf) > 0 { + rp = &r.buf[len(r.buf)-1] + // Fast path, if p is entirely the same byte repeated. + if lastByte := rp.b; len(p) > 0 && p[0] == lastByte { + all := true + for _, b := range p { + if b != lastByte { + all = false + break + } + } + if all { + rp.n += int64(len(p)) + return len(p), nil + } + } + } + + for _, b := range p { + if rp == nil || rp.b != b { + r.buf = append(r.buf, repeatedByte{r.Size(), b, 1}) + rp = &r.buf[len(r.buf)-1] + } else { + rp.n++ + } + } + return len(p), nil +} + +func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) { + if len(p) == 0 { + return + } + skipParts := sort.Search(len(r.buf), func(i int) bool { + part := &r.buf[i] + return part.off+part.n > off + }) + parts := r.buf[skipParts:] + if len(parts) > 0 { + skipBytes := off - parts[0].off + for len(parts) > 0 { + part := parts[0] + for i := skipBytes; i < part.n; i++ { + if n == len(p) { + return + } + p[n] = part.b + n++ + } + parts = parts[1:] + skipBytes = 0 + } + } + if n != len(p) { + err = io.ErrUnexpectedEOF + } + return +} + +// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code. +func TestRLEBuffer(t *testing.T) { + b := new(rleBuffer) + var all []byte + writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"} + for _, w := range writes { + b.Write([]byte(w)) + all = append(all, w...) + } + if len(b.buf) != 10 { + t.Fatalf("len(b.buf) = %d; want 10", len(b.buf)) + } + + for i := 0; i < len(all); i++ { + for j := 0; j < len(all)-i; j++ { + buf := make([]byte, j) + n, err := b.ReadAt(buf, int64(i)) + if err != nil || n != len(buf) { + t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf)) + } + if !bytes.Equal(buf, all[i:i+j]) { + t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j]) + } + } + } +} + +// fakeHash32 is a dummy Hash32 that always returns 0. +type fakeHash32 struct { + hash.Hash32 +} + +func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } +func (fakeHash32) Sum32() uint32 { return 0 } + func TestZip64(t *testing.T) { if testing.Short() { t.Skip("slow test; skipping") } + const size = 1 << 32 // before the "END\n" part + testZip64(t, size) +} + +func testZip64(t testing.TB, size int64) { + const chunkSize = 1024 + chunks := int(size / chunkSize) // write 2^32 bytes plus "END\n" to a zip file - buf := new(bytes.Buffer) + buf := new(rleBuffer) w := NewWriter(buf) - f, err := w.Create("huge.txt") + f, err := w.CreateHeader(&FileHeader{ + Name: "huge.txt", + Method: Store, + }) if err != nil { t.Fatal(err) } - chunk := make([]byte, 1024) + f.(*fileWriter).crc32 = fakeHash32{} + chunk := make([]byte, chunkSize) for i := range chunk { chunk[i] = '.' } - chunk[len(chunk)-1] = '\n' - end := []byte("END\n") - for i := 0; i < (1<<32)/1024; i++ { + for i := 0; i < chunks; i++ { _, err := f.Write(chunk) if err != nil { t.Fatal("write chunk:", err) } } + end := []byte("END\n") _, err = f.Write(end) if err != nil { t.Fatal("write end:", err) @@ -137,7 +266,7 @@ func TestZip64(t *testing.T) { } // read back zip file and check that we get to the end of it - r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + r, err := NewReader(buf, int64(buf.Size())) if err != nil { t.Fatal("reader:", err) } @@ -146,7 +275,8 @@ func TestZip64(t *testing.T) { if err != nil { t.Fatal("opening:", err) } - for i := 0; i < (1<<32)/1024; i++ { + rc.(*checksumReader).hash = fakeHash32{} + for i := 0; i < chunks; i++ { _, err := io.ReadFull(rc, chunk) if err != nil { t.Fatal("read:", err) @@ -163,11 +293,13 @@ func TestZip64(t *testing.T) { if err != nil { t.Fatal("closing:", err) } - if got, want := f0.UncompressedSize, uint32(uint32max); got != want { - t.Errorf("UncompressedSize %d, want %d", got, want) + if size == 1<<32 { + if got, want := f0.UncompressedSize, uint32(uint32max); got != want { + t.Errorf("UncompressedSize %d, want %d", got, want) + } } - if got, want := f0.UncompressedSize64, (1<<32)+uint64(len(end)); got != want { + if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want { t.Errorf("UncompressedSize64 %d, want %d", got, want) } } @@ -253,3 +385,11 @@ func TestZeroLengthHeader(t *testing.T) { } testValidHeader(&h, t) } + +// Just benchmarking how fast the Zip64 test above is. Not related to +// our zip performance, since the test above disabled CRC32 and flate. +func BenchmarkZip64Test(b *testing.B) { + for i := 0; i < b.N; i++ { + testZip64(b, 1<<26) + } +}