mirror of
https://github.com/golang/go
synced 2024-11-21 13:44:45 -07:00
archive/zip: more efficient reader and bug fix
Fixes #2090. R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/4815068
This commit is contained in:
parent
60dac9b3db
commit
e0b6f4721f
@ -6,7 +6,6 @@ package zip
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/flate"
|
||||
"hash"
|
||||
"hash/crc32"
|
||||
@ -37,8 +36,7 @@ type File struct {
|
||||
FileHeader
|
||||
zipr io.ReaderAt
|
||||
zipsize int64
|
||||
headerOffset uint32
|
||||
bodyOffset int64
|
||||
headerOffset int64
|
||||
}
|
||||
|
||||
func (f *File) hasDataDescriptor() bool {
|
||||
@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
|
||||
|
||||
// The count of files inside a zip is truncated to fit in a uint16.
|
||||
// Gloss over this by reading headers until we encounter
|
||||
// a bad one, and then only report a FormatError if
|
||||
// a bad one, and then only report a FormatError or UnexpectedEOF if
|
||||
// the file count modulo 65536 is incorrect.
|
||||
for {
|
||||
f := &File{zipr: r, zipsize: size}
|
||||
err := readDirectoryHeader(f, buf)
|
||||
if err == FormatError {
|
||||
err = readDirectoryHeader(f, buf)
|
||||
if err == FormatError || err == io.ErrUnexpectedEOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
|
||||
z.File = append(z.File, f)
|
||||
}
|
||||
if uint16(len(z.File)) != end.directoryRecords {
|
||||
return FormatError
|
||||
// Return the readDirectoryHeader error if we read
|
||||
// the wrong number of directory entries.
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error {
|
||||
}
|
||||
|
||||
// Open returns a ReadCloser that provides access to the File's contents.
|
||||
// It is safe to Open and Read from files concurrently.
|
||||
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
|
||||
off := int64(f.headerOffset)
|
||||
bodyOffset, err := f.findBodyOffset()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
size := int64(f.CompressedSize)
|
||||
if f.bodyOffset == 0 {
|
||||
r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
|
||||
if err = readFileHeader(f, r); err != nil {
|
||||
return
|
||||
}
|
||||
if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil {
|
||||
return
|
||||
}
|
||||
if size == 0 {
|
||||
size = int64(f.CompressedSize)
|
||||
}
|
||||
}
|
||||
if f.hasDataDescriptor() && size == 0 {
|
||||
if size == 0 && f.hasDataDescriptor() {
|
||||
// permit SectionReader to see the rest of the file
|
||||
size = f.zipsize - (off + f.bodyOffset)
|
||||
size = f.zipsize - (f.headerOffset + bodyOffset)
|
||||
}
|
||||
r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size)
|
||||
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
|
||||
switch f.Method {
|
||||
case Store: // (no compression)
|
||||
rc = ioutil.NopCloser(r)
|
||||
@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
|
||||
|
||||
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
|
||||
|
||||
func readFileHeader(f *File, r io.Reader) (err os.Error) {
|
||||
defer recoverError(&err)
|
||||
var (
|
||||
signature uint32
|
||||
filenameLength uint16
|
||||
extraLength uint16
|
||||
)
|
||||
read(r, &signature)
|
||||
if signature != fileHeaderSignature {
|
||||
func readFileHeader(f *File, r io.Reader) os.Error {
|
||||
var b [fileHeaderLen]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
c := binary.LittleEndian
|
||||
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
|
||||
return FormatError
|
||||
}
|
||||
read(r, &f.ReaderVersion)
|
||||
read(r, &f.Flags)
|
||||
read(r, &f.Method)
|
||||
read(r, &f.ModifiedTime)
|
||||
read(r, &f.ModifiedDate)
|
||||
read(r, &f.CRC32)
|
||||
read(r, &f.CompressedSize)
|
||||
read(r, &f.UncompressedSize)
|
||||
read(r, &filenameLength)
|
||||
read(r, &extraLength)
|
||||
f.Name = string(readByteSlice(r, filenameLength))
|
||||
f.Extra = readByteSlice(r, extraLength)
|
||||
return
|
||||
f.ReaderVersion = c.Uint16(b[4:6])
|
||||
f.Flags = c.Uint16(b[6:8])
|
||||
f.Method = c.Uint16(b[8:10])
|
||||
f.ModifiedTime = c.Uint16(b[10:12])
|
||||
f.ModifiedDate = c.Uint16(b[12:14])
|
||||
f.CRC32 = c.Uint32(b[14:18])
|
||||
f.CompressedSize = c.Uint32(b[18:22])
|
||||
f.UncompressedSize = c.Uint32(b[22:26])
|
||||
filenameLen := int(c.Uint16(b[26:28]))
|
||||
extraLen := int(c.Uint16(b[28:30]))
|
||||
d := make([]byte, filenameLen+extraLen)
|
||||
if _, err := io.ReadFull(r, d); err != nil {
|
||||
return err
|
||||
}
|
||||
f.Name = string(d[:filenameLen])
|
||||
f.Extra = d[filenameLen:]
|
||||
return nil
|
||||
}
|
||||
|
||||
func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
|
||||
defer recoverError(&err)
|
||||
var (
|
||||
signature uint32
|
||||
filenameLength uint16
|
||||
extraLength uint16
|
||||
commentLength uint16
|
||||
startDiskNumber uint16 // unused
|
||||
internalAttributes uint16 // unused
|
||||
externalAttributes uint32 // unused
|
||||
)
|
||||
read(r, &signature)
|
||||
if signature != directoryHeaderSignature {
|
||||
// findBodyOffset does the minimum work to verify the file has a header
|
||||
// and returns the file body offset.
|
||||
func (f *File) findBodyOffset() (int64, os.Error) {
|
||||
r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset)
|
||||
var b [fileHeaderLen]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
c := binary.LittleEndian
|
||||
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
|
||||
return 0, FormatError
|
||||
}
|
||||
filenameLen := int(c.Uint16(b[26:28]))
|
||||
extraLen := int(c.Uint16(b[28:30]))
|
||||
return int64(fileHeaderLen + filenameLen + extraLen), nil
|
||||
}
|
||||
|
||||
// readDirectoryHeader attempts to read a directory header from r.
|
||||
// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
|
||||
// and FormatError if it doesn't find a valid header signature.
|
||||
func readDirectoryHeader(f *File, r io.Reader) os.Error {
|
||||
var b [directoryHeaderLen]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
c := binary.LittleEndian
|
||||
if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature {
|
||||
return FormatError
|
||||
}
|
||||
read(r, &f.CreatorVersion)
|
||||
read(r, &f.ReaderVersion)
|
||||
read(r, &f.Flags)
|
||||
read(r, &f.Method)
|
||||
read(r, &f.ModifiedTime)
|
||||
read(r, &f.ModifiedDate)
|
||||
read(r, &f.CRC32)
|
||||
read(r, &f.CompressedSize)
|
||||
read(r, &f.UncompressedSize)
|
||||
read(r, &filenameLength)
|
||||
read(r, &extraLength)
|
||||
read(r, &commentLength)
|
||||
read(r, &startDiskNumber)
|
||||
read(r, &internalAttributes)
|
||||
read(r, &externalAttributes)
|
||||
read(r, &f.headerOffset)
|
||||
f.Name = string(readByteSlice(r, filenameLength))
|
||||
f.Extra = readByteSlice(r, extraLength)
|
||||
f.Comment = string(readByteSlice(r, commentLength))
|
||||
return
|
||||
f.CreatorVersion = c.Uint16(b[4:6])
|
||||
f.ReaderVersion = c.Uint16(b[6:8])
|
||||
f.Flags = c.Uint16(b[8:10])
|
||||
f.Method = c.Uint16(b[10:12])
|
||||
f.ModifiedTime = c.Uint16(b[12:14])
|
||||
f.ModifiedDate = c.Uint16(b[14:16])
|
||||
f.CRC32 = c.Uint32(b[16:20])
|
||||
f.CompressedSize = c.Uint32(b[20:24])
|
||||
f.UncompressedSize = c.Uint32(b[24:28])
|
||||
filenameLen := int(c.Uint16(b[28:30]))
|
||||
extraLen := int(c.Uint16(b[30:32]))
|
||||
commentLen := int(c.Uint16(b[32:34]))
|
||||
// startDiskNumber := c.Uint16(b[34:36]) // Unused
|
||||
// internalAttributes := c.Uint16(b[36:38]) // Unused
|
||||
// externalAttributes := c.Uint32(b[38:42]) // Unused
|
||||
f.headerOffset = int64(c.Uint32(b[42:46]))
|
||||
d := make([]byte, filenameLen+extraLen+commentLen)
|
||||
if _, err := io.ReadFull(r, d); err != nil {
|
||||
return err
|
||||
}
|
||||
f.Name = string(d[:filenameLen])
|
||||
f.Extra = d[filenameLen : filenameLen+extraLen]
|
||||
f.Comment = string(d[filenameLen+extraLen:])
|
||||
return nil
|
||||
}
|
||||
|
||||
func readDataDescriptor(r io.Reader, f *File) (err os.Error) {
|
||||
defer recoverError(&err)
|
||||
read(r, &f.CRC32)
|
||||
read(r, &f.CompressedSize)
|
||||
read(r, &f.UncompressedSize)
|
||||
return
|
||||
func readDataDescriptor(r io.Reader, f *File) os.Error {
|
||||
var b [dataDescriptorLen]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
c := binary.LittleEndian
|
||||
f.CRC32 = c.Uint32(b[:4])
|
||||
f.CompressedSize = c.Uint32(b[4:8])
|
||||
f.UncompressedSize = c.Uint32(b[8:12])
|
||||
return nil
|
||||
}
|
||||
|
||||
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
|
||||
@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro
|
||||
}
|
||||
|
||||
// read header into struct
|
||||
defer recoverError(&err)
|
||||
br := bytes.NewBuffer(b[4:]) // skip over signature
|
||||
c := binary.LittleEndian
|
||||
d := new(directoryEnd)
|
||||
read(br, &d.diskNbr)
|
||||
read(br, &d.dirDiskNbr)
|
||||
read(br, &d.dirRecordsThisDisk)
|
||||
read(br, &d.directoryRecords)
|
||||
read(br, &d.directorySize)
|
||||
read(br, &d.directoryOffset)
|
||||
read(br, &d.commentLen)
|
||||
d.comment = string(readByteSlice(br, d.commentLen))
|
||||
d.diskNbr = c.Uint16(b[4:6])
|
||||
d.dirDiskNbr = c.Uint16(b[6:8])
|
||||
d.dirRecordsThisDisk = c.Uint16(b[8:10])
|
||||
d.directoryRecords = c.Uint16(b[10:12])
|
||||
d.directorySize = c.Uint32(b[12:16])
|
||||
d.directoryOffset = c.Uint32(b[16:20])
|
||||
d.commentLen = c.Uint16(b[20:22])
|
||||
d.comment = string(b[22 : 22+int(d.commentLen)])
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func findSignatureInBlock(b []byte) int {
|
||||
const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
|
||||
for i := len(b) - minSize; i >= 0; i-- {
|
||||
for i := len(b) - directoryEndLen; i >= 0; i-- {
|
||||
// defined from directoryEndSignature in struct.go
|
||||
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
|
||||
// n is length of comment
|
||||
n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
|
||||
if n+minSize+i == len(b) {
|
||||
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
|
||||
if n+directoryEndLen+i == len(b) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func read(r io.Reader, data interface{}) {
|
||||
if err := binary.Read(r, binary.LittleEndian, data); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func readByteSlice(r io.Reader, l uint16) []byte {
|
||||
b := make([]byte, l)
|
||||
if l == 0 {
|
||||
return b
|
||||
}
|
||||
if _, err := io.ReadFull(r, b); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
|
||||
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
|
||||
}
|
||||
|
||||
size0 := f.UncompressedSize
|
||||
|
||||
var b bytes.Buffer
|
||||
r, err := f.Open()
|
||||
if err != nil {
|
||||
@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
|
||||
return
|
||||
}
|
||||
|
||||
if size1 := f.UncompressedSize; size0 != size1 {
|
||||
t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1)
|
||||
}
|
||||
|
||||
_, err = io.Copy(&b, r)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
@ -24,6 +24,9 @@ const (
|
||||
fileHeaderSignature = 0x04034b50
|
||||
directoryHeaderSignature = 0x02014b50
|
||||
directoryEndSignature = 0x06054b50
|
||||
fileHeaderLen = 30 // + filename + extra
|
||||
directoryHeaderLen = 46 // + filename + extra + comment
|
||||
directoryEndLen = 22 // + comment
|
||||
dataDescriptorLen = 12
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user