mirror of
https://github.com/golang/go
synced 2024-11-22 09:34:54 -07:00
archive/zip: speed up Zip64 test
Took 76 seconds or so before. By avoiding flate and crc32 on 4GB of data, it's now only 12 seconds. Still a slow test, but not painful to run anymore when you forget -short. R=golang-dev, adg CC=golang-dev https://golang.org/cl/12950043
This commit is contained in:
parent
35d8bb39bd
commit
ec837ad73c
@ -9,8 +9,10 @@ package zip
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@ -105,29 +107,156 @@ func TestFileHeaderRoundTrip64(t *testing.T) {
|
|||||||
testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t)
|
testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type repeatedByte struct {
|
||||||
|
off int64
|
||||||
|
b byte
|
||||||
|
n int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// rleBuffer is a run-length-encoded byte buffer.
|
||||||
|
// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt,
|
||||||
|
// allowing random-access reads.
|
||||||
|
type rleBuffer struct {
|
||||||
|
buf []repeatedByte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rleBuffer) Size() int64 {
|
||||||
|
if len(r.buf) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
last := &r.buf[len(r.buf)-1]
|
||||||
|
return last.off + last.n
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rleBuffer) Write(p []byte) (n int, err error) {
|
||||||
|
var rp *repeatedByte
|
||||||
|
if len(r.buf) > 0 {
|
||||||
|
rp = &r.buf[len(r.buf)-1]
|
||||||
|
// Fast path, if p is entirely the same byte repeated.
|
||||||
|
if lastByte := rp.b; len(p) > 0 && p[0] == lastByte {
|
||||||
|
all := true
|
||||||
|
for _, b := range p {
|
||||||
|
if b != lastByte {
|
||||||
|
all = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if all {
|
||||||
|
rp.n += int64(len(p))
|
||||||
|
return len(p), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, b := range p {
|
||||||
|
if rp == nil || rp.b != b {
|
||||||
|
r.buf = append(r.buf, repeatedByte{r.Size(), b, 1})
|
||||||
|
rp = &r.buf[len(r.buf)-1]
|
||||||
|
} else {
|
||||||
|
rp.n++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(p), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) {
|
||||||
|
if len(p) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
skipParts := sort.Search(len(r.buf), func(i int) bool {
|
||||||
|
part := &r.buf[i]
|
||||||
|
return part.off+part.n > off
|
||||||
|
})
|
||||||
|
parts := r.buf[skipParts:]
|
||||||
|
if len(parts) > 0 {
|
||||||
|
skipBytes := off - parts[0].off
|
||||||
|
for len(parts) > 0 {
|
||||||
|
part := parts[0]
|
||||||
|
for i := skipBytes; i < part.n; i++ {
|
||||||
|
if n == len(p) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p[n] = part.b
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
parts = parts[1:]
|
||||||
|
skipBytes = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if n != len(p) {
|
||||||
|
err = io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code.
|
||||||
|
func TestRLEBuffer(t *testing.T) {
|
||||||
|
b := new(rleBuffer)
|
||||||
|
var all []byte
|
||||||
|
writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"}
|
||||||
|
for _, w := range writes {
|
||||||
|
b.Write([]byte(w))
|
||||||
|
all = append(all, w...)
|
||||||
|
}
|
||||||
|
if len(b.buf) != 10 {
|
||||||
|
t.Fatalf("len(b.buf) = %d; want 10", len(b.buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(all); i++ {
|
||||||
|
for j := 0; j < len(all)-i; j++ {
|
||||||
|
buf := make([]byte, j)
|
||||||
|
n, err := b.ReadAt(buf, int64(i))
|
||||||
|
if err != nil || n != len(buf) {
|
||||||
|
t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf))
|
||||||
|
}
|
||||||
|
if !bytes.Equal(buf, all[i:i+j]) {
|
||||||
|
t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fakeHash32 is a dummy Hash32 that always returns 0.
|
||||||
|
type fakeHash32 struct {
|
||||||
|
hash.Hash32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil }
|
||||||
|
func (fakeHash32) Sum32() uint32 { return 0 }
|
||||||
|
|
||||||
func TestZip64(t *testing.T) {
|
func TestZip64(t *testing.T) {
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
t.Skip("slow test; skipping")
|
t.Skip("slow test; skipping")
|
||||||
}
|
}
|
||||||
|
const size = 1 << 32 // before the "END\n" part
|
||||||
|
testZip64(t, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testZip64(t testing.TB, size int64) {
|
||||||
|
const chunkSize = 1024
|
||||||
|
chunks := int(size / chunkSize)
|
||||||
// write 2^32 bytes plus "END\n" to a zip file
|
// write 2^32 bytes plus "END\n" to a zip file
|
||||||
buf := new(bytes.Buffer)
|
buf := new(rleBuffer)
|
||||||
w := NewWriter(buf)
|
w := NewWriter(buf)
|
||||||
f, err := w.Create("huge.txt")
|
f, err := w.CreateHeader(&FileHeader{
|
||||||
|
Name: "huge.txt",
|
||||||
|
Method: Store,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
chunk := make([]byte, 1024)
|
f.(*fileWriter).crc32 = fakeHash32{}
|
||||||
|
chunk := make([]byte, chunkSize)
|
||||||
for i := range chunk {
|
for i := range chunk {
|
||||||
chunk[i] = '.'
|
chunk[i] = '.'
|
||||||
}
|
}
|
||||||
chunk[len(chunk)-1] = '\n'
|
for i := 0; i < chunks; i++ {
|
||||||
end := []byte("END\n")
|
|
||||||
for i := 0; i < (1<<32)/1024; i++ {
|
|
||||||
_, err := f.Write(chunk)
|
_, err := f.Write(chunk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("write chunk:", err)
|
t.Fatal("write chunk:", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
end := []byte("END\n")
|
||||||
_, err = f.Write(end)
|
_, err = f.Write(end)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("write end:", err)
|
t.Fatal("write end:", err)
|
||||||
@ -137,7 +266,7 @@ func TestZip64(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// read back zip file and check that we get to the end of it
|
// read back zip file and check that we get to the end of it
|
||||||
r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
|
r, err := NewReader(buf, int64(buf.Size()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("reader:", err)
|
t.Fatal("reader:", err)
|
||||||
}
|
}
|
||||||
@ -146,7 +275,8 @@ func TestZip64(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("opening:", err)
|
t.Fatal("opening:", err)
|
||||||
}
|
}
|
||||||
for i := 0; i < (1<<32)/1024; i++ {
|
rc.(*checksumReader).hash = fakeHash32{}
|
||||||
|
for i := 0; i < chunks; i++ {
|
||||||
_, err := io.ReadFull(rc, chunk)
|
_, err := io.ReadFull(rc, chunk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("read:", err)
|
t.Fatal("read:", err)
|
||||||
@ -163,11 +293,13 @@ func TestZip64(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("closing:", err)
|
t.Fatal("closing:", err)
|
||||||
}
|
}
|
||||||
if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
|
if size == 1<<32 {
|
||||||
t.Errorf("UncompressedSize %d, want %d", got, want)
|
if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
|
||||||
|
t.Errorf("UncompressedSize %d, want %d", got, want)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if got, want := f0.UncompressedSize64, (1<<32)+uint64(len(end)); got != want {
|
if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want {
|
||||||
t.Errorf("UncompressedSize64 %d, want %d", got, want)
|
t.Errorf("UncompressedSize64 %d, want %d", got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -253,3 +385,11 @@ func TestZeroLengthHeader(t *testing.T) {
|
|||||||
}
|
}
|
||||||
testValidHeader(&h, t)
|
testValidHeader(&h, t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Just benchmarking how fast the Zip64 test above is. Not related to
|
||||||
|
// our zip performance, since the test above disabled CRC32 and flate.
|
||||||
|
func BenchmarkZip64Test(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
testZip64(b, 1<<26)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user