mirror of
https://github.com/golang/go
synced 2024-11-22 01:54:42 -07:00
archive/zip: more efficient reader and bug fix
Fixes #2090. R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/4815068
This commit is contained in:
parent
60dac9b3db
commit
e0b6f4721f
@ -6,7 +6,6 @@ package zip
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
|
||||||
"compress/flate"
|
"compress/flate"
|
||||||
"hash"
|
"hash"
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
@ -37,8 +36,7 @@ type File struct {
|
|||||||
FileHeader
|
FileHeader
|
||||||
zipr io.ReaderAt
|
zipr io.ReaderAt
|
||||||
zipsize int64
|
zipsize int64
|
||||||
headerOffset uint32
|
headerOffset int64
|
||||||
bodyOffset int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *File) hasDataDescriptor() bool {
|
func (f *File) hasDataDescriptor() bool {
|
||||||
@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
|
|||||||
|
|
||||||
// The count of files inside a zip is truncated to fit in a uint16.
|
// The count of files inside a zip is truncated to fit in a uint16.
|
||||||
// Gloss over this by reading headers until we encounter
|
// Gloss over this by reading headers until we encounter
|
||||||
// a bad one, and then only report a FormatError if
|
// a bad one, and then only report a FormatError or UnexpectedEOF if
|
||||||
// the file count modulo 65536 is incorrect.
|
// the file count modulo 65536 is incorrect.
|
||||||
for {
|
for {
|
||||||
f := &File{zipr: r, zipsize: size}
|
f := &File{zipr: r, zipsize: size}
|
||||||
err := readDirectoryHeader(f, buf)
|
err = readDirectoryHeader(f, buf)
|
||||||
if err == FormatError {
|
if err == FormatError || err == io.ErrUnexpectedEOF {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
|
|||||||
z.File = append(z.File, f)
|
z.File = append(z.File, f)
|
||||||
}
|
}
|
||||||
if uint16(len(z.File)) != end.directoryRecords {
|
if uint16(len(z.File)) != end.directoryRecords {
|
||||||
return FormatError
|
// Return the readDirectoryHeader error if we read
|
||||||
|
// the wrong number of directory entries.
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Open returns a ReadCloser that provides access to the File's contents.
|
// Open returns a ReadCloser that provides access to the File's contents.
|
||||||
|
// It is safe to Open and Read from files concurrently.
|
||||||
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
|
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
|
||||||
off := int64(f.headerOffset)
|
bodyOffset, err := f.findBodyOffset()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
size := int64(f.CompressedSize)
|
size := int64(f.CompressedSize)
|
||||||
if f.bodyOffset == 0 {
|
if size == 0 && f.hasDataDescriptor() {
|
||||||
r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
|
|
||||||
if err = readFileHeader(f, r); err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if size == 0 {
|
|
||||||
size = int64(f.CompressedSize)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if f.hasDataDescriptor() && size == 0 {
|
|
||||||
// permit SectionReader to see the rest of the file
|
// permit SectionReader to see the rest of the file
|
||||||
size = f.zipsize - (off + f.bodyOffset)
|
size = f.zipsize - (f.headerOffset + bodyOffset)
|
||||||
}
|
}
|
||||||
r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size)
|
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
|
||||||
switch f.Method {
|
switch f.Method {
|
||||||
case Store: // (no compression)
|
case Store: // (no compression)
|
||||||
rc = ioutil.NopCloser(r)
|
rc = ioutil.NopCloser(r)
|
||||||
@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
|
|||||||
|
|
||||||
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
|
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
|
||||||
|
|
||||||
func readFileHeader(f *File, r io.Reader) (err os.Error) {
|
func readFileHeader(f *File, r io.Reader) os.Error {
|
||||||
defer recoverError(&err)
|
var b [fileHeaderLen]byte
|
||||||
var (
|
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||||
signature uint32
|
return err
|
||||||
filenameLength uint16
|
}
|
||||||
extraLength uint16
|
c := binary.LittleEndian
|
||||||
)
|
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
|
||||||
read(r, &signature)
|
|
||||||
if signature != fileHeaderSignature {
|
|
||||||
return FormatError
|
return FormatError
|
||||||
}
|
}
|
||||||
read(r, &f.ReaderVersion)
|
f.ReaderVersion = c.Uint16(b[4:6])
|
||||||
read(r, &f.Flags)
|
f.Flags = c.Uint16(b[6:8])
|
||||||
read(r, &f.Method)
|
f.Method = c.Uint16(b[8:10])
|
||||||
read(r, &f.ModifiedTime)
|
f.ModifiedTime = c.Uint16(b[10:12])
|
||||||
read(r, &f.ModifiedDate)
|
f.ModifiedDate = c.Uint16(b[12:14])
|
||||||
read(r, &f.CRC32)
|
f.CRC32 = c.Uint32(b[14:18])
|
||||||
read(r, &f.CompressedSize)
|
f.CompressedSize = c.Uint32(b[18:22])
|
||||||
read(r, &f.UncompressedSize)
|
f.UncompressedSize = c.Uint32(b[22:26])
|
||||||
read(r, &filenameLength)
|
filenameLen := int(c.Uint16(b[26:28]))
|
||||||
read(r, &extraLength)
|
extraLen := int(c.Uint16(b[28:30]))
|
||||||
f.Name = string(readByteSlice(r, filenameLength))
|
d := make([]byte, filenameLen+extraLen)
|
||||||
f.Extra = readByteSlice(r, extraLength)
|
if _, err := io.ReadFull(r, d); err != nil {
|
||||||
return
|
return err
|
||||||
|
}
|
||||||
|
f.Name = string(d[:filenameLen])
|
||||||
|
f.Extra = d[filenameLen:]
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
|
// findBodyOffset does the minimum work to verify the file has a header
|
||||||
defer recoverError(&err)
|
// and returns the file body offset.
|
||||||
var (
|
func (f *File) findBodyOffset() (int64, os.Error) {
|
||||||
signature uint32
|
r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset)
|
||||||
filenameLength uint16
|
var b [fileHeaderLen]byte
|
||||||
extraLength uint16
|
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||||
commentLength uint16
|
return 0, err
|
||||||
startDiskNumber uint16 // unused
|
}
|
||||||
internalAttributes uint16 // unused
|
c := binary.LittleEndian
|
||||||
externalAttributes uint32 // unused
|
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
|
||||||
)
|
return 0, FormatError
|
||||||
read(r, &signature)
|
}
|
||||||
if signature != directoryHeaderSignature {
|
filenameLen := int(c.Uint16(b[26:28]))
|
||||||
|
extraLen := int(c.Uint16(b[28:30]))
|
||||||
|
return int64(fileHeaderLen + filenameLen + extraLen), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readDirectoryHeader attempts to read a directory header from r.
|
||||||
|
// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
|
||||||
|
// and FormatError if it doesn't find a valid header signature.
|
||||||
|
func readDirectoryHeader(f *File, r io.Reader) os.Error {
|
||||||
|
var b [directoryHeaderLen]byte
|
||||||
|
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
c := binary.LittleEndian
|
||||||
|
if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature {
|
||||||
return FormatError
|
return FormatError
|
||||||
}
|
}
|
||||||
read(r, &f.CreatorVersion)
|
f.CreatorVersion = c.Uint16(b[4:6])
|
||||||
read(r, &f.ReaderVersion)
|
f.ReaderVersion = c.Uint16(b[6:8])
|
||||||
read(r, &f.Flags)
|
f.Flags = c.Uint16(b[8:10])
|
||||||
read(r, &f.Method)
|
f.Method = c.Uint16(b[10:12])
|
||||||
read(r, &f.ModifiedTime)
|
f.ModifiedTime = c.Uint16(b[12:14])
|
||||||
read(r, &f.ModifiedDate)
|
f.ModifiedDate = c.Uint16(b[14:16])
|
||||||
read(r, &f.CRC32)
|
f.CRC32 = c.Uint32(b[16:20])
|
||||||
read(r, &f.CompressedSize)
|
f.CompressedSize = c.Uint32(b[20:24])
|
||||||
read(r, &f.UncompressedSize)
|
f.UncompressedSize = c.Uint32(b[24:28])
|
||||||
read(r, &filenameLength)
|
filenameLen := int(c.Uint16(b[28:30]))
|
||||||
read(r, &extraLength)
|
extraLen := int(c.Uint16(b[30:32]))
|
||||||
read(r, &commentLength)
|
commentLen := int(c.Uint16(b[32:34]))
|
||||||
read(r, &startDiskNumber)
|
// startDiskNumber := c.Uint16(b[34:36]) // Unused
|
||||||
read(r, &internalAttributes)
|
// internalAttributes := c.Uint16(b[36:38]) // Unused
|
||||||
read(r, &externalAttributes)
|
// externalAttributes := c.Uint32(b[38:42]) // Unused
|
||||||
read(r, &f.headerOffset)
|
f.headerOffset = int64(c.Uint32(b[42:46]))
|
||||||
f.Name = string(readByteSlice(r, filenameLength))
|
d := make([]byte, filenameLen+extraLen+commentLen)
|
||||||
f.Extra = readByteSlice(r, extraLength)
|
if _, err := io.ReadFull(r, d); err != nil {
|
||||||
f.Comment = string(readByteSlice(r, commentLength))
|
return err
|
||||||
return
|
}
|
||||||
|
f.Name = string(d[:filenameLen])
|
||||||
|
f.Extra = d[filenameLen : filenameLen+extraLen]
|
||||||
|
f.Comment = string(d[filenameLen+extraLen:])
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readDataDescriptor(r io.Reader, f *File) (err os.Error) {
|
func readDataDescriptor(r io.Reader, f *File) os.Error {
|
||||||
defer recoverError(&err)
|
var b [dataDescriptorLen]byte
|
||||||
read(r, &f.CRC32)
|
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||||
read(r, &f.CompressedSize)
|
return err
|
||||||
read(r, &f.UncompressedSize)
|
}
|
||||||
return
|
c := binary.LittleEndian
|
||||||
|
f.CRC32 = c.Uint32(b[:4])
|
||||||
|
f.CompressedSize = c.Uint32(b[4:8])
|
||||||
|
f.UncompressedSize = c.Uint32(b[8:12])
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
|
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
|
||||||
@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
// read header into struct
|
// read header into struct
|
||||||
defer recoverError(&err)
|
c := binary.LittleEndian
|
||||||
br := bytes.NewBuffer(b[4:]) // skip over signature
|
|
||||||
d := new(directoryEnd)
|
d := new(directoryEnd)
|
||||||
read(br, &d.diskNbr)
|
d.diskNbr = c.Uint16(b[4:6])
|
||||||
read(br, &d.dirDiskNbr)
|
d.dirDiskNbr = c.Uint16(b[6:8])
|
||||||
read(br, &d.dirRecordsThisDisk)
|
d.dirRecordsThisDisk = c.Uint16(b[8:10])
|
||||||
read(br, &d.directoryRecords)
|
d.directoryRecords = c.Uint16(b[10:12])
|
||||||
read(br, &d.directorySize)
|
d.directorySize = c.Uint32(b[12:16])
|
||||||
read(br, &d.directoryOffset)
|
d.directoryOffset = c.Uint32(b[16:20])
|
||||||
read(br, &d.commentLen)
|
d.commentLen = c.Uint16(b[20:22])
|
||||||
d.comment = string(readByteSlice(br, d.commentLen))
|
d.comment = string(b[22 : 22+int(d.commentLen)])
|
||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func findSignatureInBlock(b []byte) int {
|
func findSignatureInBlock(b []byte) int {
|
||||||
const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
|
for i := len(b) - directoryEndLen; i >= 0; i-- {
|
||||||
for i := len(b) - minSize; i >= 0; i-- {
|
|
||||||
// defined from directoryEndSignature in struct.go
|
// defined from directoryEndSignature in struct.go
|
||||||
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
||||||
// n is length of comment
|
// n is length of comment
|
||||||
n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
|
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
|
||||||
if n+minSize+i == len(b) {
|
if n+directoryEndLen+i == len(b) {
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func read(r io.Reader, data interface{}) {
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, data); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func readByteSlice(r io.Reader, l uint16) []byte {
|
|
||||||
b := make([]byte, l)
|
|
||||||
if l == 0 {
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
if _, err := io.ReadFull(r, b); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
|
|||||||
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
|
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size0 := f.UncompressedSize
|
||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
r, err := f.Open()
|
r, err := f.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if size1 := f.UncompressedSize; size0 != size1 {
|
||||||
|
t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1)
|
||||||
|
}
|
||||||
|
|
||||||
_, err = io.Copy(&b, r)
|
_, err = io.Copy(&b, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
|
@ -24,6 +24,9 @@ const (
|
|||||||
fileHeaderSignature = 0x04034b50
|
fileHeaderSignature = 0x04034b50
|
||||||
directoryHeaderSignature = 0x02014b50
|
directoryHeaderSignature = 0x02014b50
|
||||||
directoryEndSignature = 0x06054b50
|
directoryEndSignature = 0x06054b50
|
||||||
|
fileHeaderLen = 30 // + filename + extra
|
||||||
|
directoryHeaderLen = 46 // + filename + extra + comment
|
||||||
|
directoryEndLen = 22 // + comment
|
||||||
dataDescriptorLen = 12
|
dataDescriptorLen = 12
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user