From b6c5edae7c0e9dd6d12dbb8f1c9638dea45f9464 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 1 Feb 2016 22:02:52 -0500 Subject: [PATCH] =?UTF-8?q?archive/zip:=20handle=20pre-zip64=20zip=20files?= =?UTF-8?q?=20containing=202=C2=B3=C2=B2-1-byte=20content?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This corrects a regression from Go 1.5 introduced by CL 18317. Fixes #14185. Change-Id: Ic3215714846d9f28809cd04e3eb3664b599244f4 Reviewed-on: https://go-review.googlesource.com/19151 Reviewed-by: Brad Fitzpatrick --- src/archive/zip/reader.go | 12 +- src/archive/zip/reader_test.go | 235 +++++++++++++++++++++++++++++++-- 2 files changed, 236 insertions(+), 11 deletions(-) diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go index 84a9d41888..10e8172875 100644 --- a/src/archive/zip/reader.go +++ b/src/archive/zip/reader.go @@ -330,7 +330,17 @@ func readDirectoryHeader(f *File, r io.Reader) error { } } - if needUSize || needCSize || needHeaderOffset { + // Assume that uncompressed size 2³²-1 could plausibly happen in + // an old zip32 file that was sharding inputs into the largest chunks + // possible (or is just malicious; search the web for 42.zip). + // If needUSize is true still, it means we didn't see a zip64 extension. + // As long as the compressed size is not also 2³²-1 (implausible) + // and the header is not also 2³²-1 (equally implausible), + // accept the uncompressed size 2³²-1 as valid. + // If nothing else, this keeps archive/zip working with 42.zip. + _ = needUSize + + if needCSize || needHeaderOffset { return ErrFormat } diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go index 8f7e8bf555..72cf5d9cf4 100644 --- a/src/archive/zip/reader_test.go +++ b/src/archive/zip/reader_test.go @@ -27,12 +27,24 @@ type ZipTest struct { } type ZipTestFile struct { - Name string - Content []byte // if blank, will attempt to compare against File + Name string + Mode os.FileMode + Mtime string // optional, modified time in format "mm-dd-yy hh:mm:ss" + + // Information describing expected zip file content. + // First, reading the entire content should produce the error ContentErr. + // Second, if ContentErr==nil, the content should match Content. + // If content is large, an alternative to setting Content is to set File, + // which names a file in the testdata/ directory containing the + // uncompressed expected content. + // If content is very large, an alternative to setting Content or File + // is to set Size, which will then be checked against the header-reported size + // but will bypass the decompressing of the actual data. + // This last option is used for testing very large (multi-GB) compressed files. ContentErr error - File string // name of file to compare to (relative to testdata/) - Mtime string // modified time in format "mm-dd-yy hh:mm:ss" - Mode os.FileMode + Content []byte + File string + Size uint64 } // Caution: The Mtime values found for the test files should correspond to @@ -248,6 +260,19 @@ var tests = []ZipTest{ }, }, }, + // Largest possible non-zip64 file, with no zip64 header. + { + Name: "big.zip", + Source: returnBigZipBytes, + File: []ZipTestFile{ + { + Name: "big.file", + Content: nil, + Size: 1<<32 - 1, + Mode: 0666, + }, + }, + }, } var crossPlatform = []ZipTestFile{ @@ -356,13 +381,31 @@ func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) { testFileMode(t, zt.Name, f, ft.Mode) - var b bytes.Buffer + size := uint64(f.UncompressedSize) + if size == uint32max { + size = f.UncompressedSize64 + } else if size != f.UncompressedSize64 { + t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64) + } + r, err := f.Open() if err != nil { t.Errorf("%s: %v", zt.Name, err) return } + // For very large files, just check that the size is correct. + // The content is expected to be all zeros. + // Don't bother uncompressing: too big. + if ft.Content == nil && ft.File == "" && ft.Size > 0 { + if size != ft.Size { + t.Errorf("%v: uncompressed size %#x, want %#x", size, ft.Size) + } + r.Close() + return + } + + var b bytes.Buffer _, err = io.Copy(&b, r) if err != ft.ContentErr { t.Errorf("%s: copying contents: %v (want %v)", zt.Name, err, ft.ContentErr) @@ -372,10 +415,6 @@ func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) { } r.Close() - size := uint64(f.UncompressedSize) - if size == uint32max { - size = f.UncompressedSize64 - } if g := uint64(b.Len()); g != size { t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size) } @@ -510,6 +549,182 @@ func returnRecursiveZip() (r io.ReaderAt, size int64) { return bytes.NewReader(b), int64(len(b)) } +// biggestZipBytes returns the bytes of a zip file biggest.zip +// that contains a zip file bigger.zip that contains a zip file +// big.zip that contains big.file, which contains 2³²-1 zeros. +// The big.zip file is interesting because it has no zip64 header, +// much like the innermost zip files in the well-known 42.zip. +// +// biggest.zip was generated by changing isZip64 to use > uint32max +// instead of >= uint32max and then running this program: +// +// package main +// +// import ( +// "archive/zip" +// "bytes" +// "io" +// "io/ioutil" +// "log" +// ) +// +// type zeros struct{} +// +// func (zeros) Read(b []byte) (int, error) { +// for i := range b { +// b[i] = 0 +// } +// return len(b), nil +// } +// +// func main() { +// bigZip := makeZip("big.file", io.LimitReader(zeros{}, 1<<32-1)) +// if err := ioutil.WriteFile("/tmp/big.zip", bigZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggerZip := makeZip("big.zip", bytes.NewReader(bigZip)) +// if err := ioutil.WriteFile("/tmp/bigger.zip", biggerZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggestZip := makeZip("bigger.zip", bytes.NewReader(biggerZip)) +// if err := ioutil.WriteFile("/tmp/biggest.zip", biggestZip, 0666); err != nil { +// log.Fatal(err) +// } +// } +// +// func makeZip(name string, r io.Reader) []byte { +// var buf bytes.Buffer +// w := zip.NewWriter(&buf) +// wf, err := w.Create(name) +// if err != nil { +// log.Fatal(err) +// } +// if _, err = io.Copy(wf, r); err != nil { +// log.Fatal(err) +// } +// if err := w.Close(); err != nil { +// log.Fatal(err) +// } +// return buf.Bytes() +// } +// +// The 4 GB of zeros compresses to 4 MB, which compresses to 20 kB, +// which compresses to 1252 bytes (in the hex dump below). +// +// It's here in hex for the same reason as rZipBytes above: to avoid +// problems with on-disk virus scanners or other zip processors. +// +func biggestZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 08 00 08 00 00 00 00 00 00 00 +0000010 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 62 69 +0000020 67 67 65 72 2e 7a 69 70 ec dc 6b 4c 53 67 18 07 +0000030 f0 16 c5 ca 65 2e cb b8 94 20 61 1f 44 33 c7 cd +0000040 c0 86 4a b5 c0 62 8a 61 05 c6 cd 91 b2 54 8c 1b +0000050 63 8b 03 9c 1b 95 52 5a e3 a0 19 6c b2 05 59 44 +0000060 64 9d 73 83 71 11 46 61 14 b9 1d 14 09 4a c3 60 +0000070 2e 4c 6e a5 60 45 02 62 81 95 b6 94 9e 9e 77 e7 +0000080 d0 43 b6 f8 71 df 96 3c e7 a4 69 ce bf cf e9 79 +0000090 ce ef 79 3f bf f1 31 db b6 bb 31 76 92 e7 f3 07 +00000a0 8b fc 9c ca cc 08 cc cb cc 5e d2 1c 88 d9 7e bb +00000b0 4f bb 3a 3f 75 f1 5d 7f 8f c2 68 67 77 8f 25 ff +00000c0 84 e2 93 2d ef a4 95 3d 71 4e 2c b9 b0 87 c3 be +00000d0 3d f8 a7 60 24 61 c5 ef ae 9e c8 6c 6d 4e 69 c8 +00000e0 67 65 34 f8 37 76 2d 76 5c 54 f3 95 65 49 c7 0f +00000f0 18 71 4b 7e 5b 6a d1 79 47 61 41 b0 4e 2a 74 45 +0000100 43 58 12 b2 5a a5 c6 7d 68 55 88 d4 98 75 18 6d +0000110 08 d1 1f 8f 5a 9e 96 ee 45 cf a4 84 4e 4b e8 50 +0000120 a7 13 d9 06 de 52 81 97 36 b2 d7 b8 fc 2b 5f 55 +0000130 23 1f 32 59 cf 30 27 fb e2 8a b9 de 45 dd 63 9c +0000140 4b b5 8b 96 4c 7a 62 62 cc a1 a7 cf fa f1 fe dd +0000150 54 62 11 bf 36 78 b3 c7 b1 b5 f2 61 4d 4e dd 66 +0000160 32 2e e6 70 34 5f f4 c9 e6 6c 43 6f da 6b c6 c3 +0000170 09 2c ce 09 57 7f d2 7e b4 23 ba 7c 1b 99 bc 22 +0000180 3e f1 de 91 2f e3 9c 1b 82 cc c2 84 39 aa e6 de +0000190 b4 69 fc cc cb 72 a6 61 45 f0 d3 1d 26 19 7c 8d +00001a0 29 c8 66 02 be 77 6a f9 3d 34 79 17 19 c8 96 24 +00001b0 a3 ac e4 dd 3b 1a 8e c6 fe 96 38 6b bf 67 5a 23 +00001c0 f4 16 f4 e6 8a b4 fc c2 cd bf 95 66 1d bb 35 aa +00001d0 92 7d 66 d8 08 8d a5 1f 54 2a af 09 cf 61 ff d2 +00001e0 85 9d 8f b6 d7 88 07 4a 86 03 db 64 f3 d9 92 73 +00001f0 df ec a7 fc 23 4c 8d 83 79 63 2a d9 fd 8d b3 c8 +0000200 8f 7e d4 19 85 e6 8d 1c 76 f0 8b 58 32 fd 9a d6 +0000210 85 e2 48 ad c3 d5 60 6f 7e 22 dd ef 09 49 7c 7f +0000220 3a 45 c3 71 b7 df f3 4c 63 fb b5 d9 31 5f 6e d6 +0000230 24 1d a4 4a fe 32 a7 5c 16 48 5c 3e 08 6b 8a d3 +0000240 25 1d a2 12 a5 59 24 ea 20 5f 52 6d ad 94 db 6b +0000250 94 b9 5d eb 4b a7 5c 44 bb 1e f2 3c 6b cf 52 c9 +0000260 e9 e5 ba 06 b9 c4 e5 0a d0 00 0d d0 00 0d d0 00 +0000270 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d +0000280 d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 +0000290 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 +00002a0 0d d0 00 cd ff 9e 46 86 fa a7 7d 3a 43 d7 8e 10 +00002b0 52 e9 be e6 6e cf eb 9e 85 4d 65 ce cc 30 c1 44 +00002c0 c0 4e af bc 9c 6c 4b a0 d7 54 ff 1d d5 5c 89 fb +00002d0 b5 34 7e c4 c2 9e f5 a0 f6 5b 7e 6e ca 73 c7 ef +00002e0 5d be de f9 e8 81 eb a5 0a a5 63 54 2c d7 1c d1 +00002f0 89 17 85 f8 16 94 f2 8a b2 a3 f5 b6 6d df 75 cd +0000300 90 dd 64 bd 5d 55 4e f2 55 19 1b b7 cc ef 1b ea +0000310 2e 05 9c f4 aa 1e a8 cd a6 82 c7 59 0f 5e 9d e0 +0000320 bb fc 6c d6 99 23 eb 36 ad c6 c5 e1 d8 e1 e2 3e +0000330 d9 90 5a f7 91 5d 6f bc 33 6d 98 47 d2 7c 2e 2f +0000340 99 a4 25 72 85 49 2c be 0b 5b af 8f e5 6e 81 a6 +0000350 a3 5a 6f 39 53 3a ab 7a 8b 1e 26 f7 46 6c 7d 26 +0000360 53 b3 22 31 94 d3 83 f2 18 4d f5 92 33 27 53 97 +0000370 0f d3 e6 55 9c a6 c5 31 87 6f d3 f3 ae 39 6f 56 +0000380 10 7b ab 7e d0 b4 ca f2 b8 05 be 3f 0e 6e 5a 75 +0000390 ab 0c f5 37 0e ba 8e 75 71 7a aa ed 7a dd 6a 63 +00003a0 be 9b a0 97 27 6a 6f e7 d3 8b c4 7c ec d3 91 56 +00003b0 d9 ac 5e bf 16 42 2f 00 1f 93 a2 23 87 bd e2 59 +00003c0 a0 de 1a 66 c8 62 eb 55 8f 91 17 b4 61 42 7a 50 +00003d0 40 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 +00003e0 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 03 +00003f0 34 40 03 34 40 03 34 ff 85 86 90 8b ea 67 90 0d +0000400 e1 42 1b d2 61 d6 79 ec fd 3e 44 28 a4 51 6c 5c +0000410 fc d2 72 ca ba 82 18 46 16 61 cd 93 a9 0f d1 24 +0000420 17 99 e2 2c 71 16 84 0c c8 7a 13 0f 9a 5e c5 f0 +0000430 79 64 e2 12 4d c8 82 a1 81 19 2d aa 44 6d 87 54 +0000440 84 71 c1 f6 d4 ca 25 8c 77 b9 08 c7 c8 5e 10 8a +0000450 8f 61 ed 8c ba 30 1f 79 9a c7 60 34 2b b9 8c f8 +0000460 18 a6 83 1b e3 9f ad 79 fe fd 1b 8b f1 fc 41 6f +0000470 d4 13 1f e3 b8 83 ba 64 92 e7 eb e4 77 05 8f ba +0000480 fa 3b 00 00 ff ff 50 4b 07 08 a6 18 b1 91 5e 04 +0000490 00 00 e4 47 00 00 50 4b 01 02 14 00 14 00 08 00 +00004a0 08 00 00 00 00 00 a6 18 b1 91 5e 04 00 00 e4 47 +00004b0 00 00 0a 00 00 00 00 00 00 00 00 00 00 00 00 00 +00004c0 00 00 00 00 62 69 67 67 65 72 2e 7a 69 70 50 4b +00004d0 05 06 00 00 00 00 01 00 01 00 38 00 00 00 96 04 +00004e0 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnBigZipBytes() (r io.ReaderAt, size int64) { + b := biggestZipBytes() + for i := 0; i < 2; i++ { + r, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + panic(err) + } + f, err := r.File[0].Open() + if err != nil { + panic(err) + } + b, err = ioutil.ReadAll(f) + if err != nil { + panic(err) + } + } + return bytes.NewReader(b), int64(len(b)) +} + func TestIssue8186(t *testing.T) { // Directory headers & data found in the TOC of a JAR file. dirEnts := []string{