2011-07-09 19:30:16 -06:00
|
|
|
// Copyright 2011 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package zip
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2012-02-27 15:41:30 -07:00
|
|
|
"encoding/binary"
|
2011-11-01 20:04:37 -06:00
|
|
|
"errors"
|
2011-07-09 19:30:16 -06:00
|
|
|
"hash"
|
|
|
|
"hash/crc32"
|
|
|
|
"io"
|
2018-04-21 02:55:50 -06:00
|
|
|
"strings"
|
2017-04-05 02:32:09 -06:00
|
|
|
"unicode/utf8"
|
2011-07-09 19:30:16 -06:00
|
|
|
)
|
|
|
|
|
2017-07-15 12:33:06 -06:00
|
|
|
var (
|
|
|
|
errLongName = errors.New("zip: FileHeader.Name too long")
|
|
|
|
errLongExtra = errors.New("zip: FileHeader.Extra too long")
|
|
|
|
)
|
|
|
|
|
2011-07-09 19:30:16 -06:00
|
|
|
// Writer implements a zip file writer.
|
|
|
|
type Writer struct {
|
2015-11-05 16:47:20 -07:00
|
|
|
cw *countWriter
|
|
|
|
dir []*header
|
|
|
|
last *fileWriter
|
|
|
|
closed bool
|
|
|
|
compressors map[uint16]Compressor
|
2017-11-22 09:10:47 -07:00
|
|
|
comment string
|
2016-11-15 12:33:10 -07:00
|
|
|
|
|
|
|
// testHookCloseSizeOffset if non-nil is called with the size
|
|
|
|
// of offset of the central directory at Close.
|
|
|
|
testHookCloseSizeOffset func(size, offset uint64)
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
type header struct {
|
|
|
|
*FileHeader
|
2012-08-21 19:05:24 -06:00
|
|
|
offset uint64
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewWriter returns a new Writer writing a zip file to w.
|
|
|
|
func NewWriter(w io.Writer) *Writer {
|
2015-03-11 18:54:11 -06:00
|
|
|
return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
|
2015-01-19 06:39:33 -07:00
|
|
|
}
|
|
|
|
|
2015-03-11 18:54:11 -06:00
|
|
|
// SetOffset sets the offset of the beginning of the zip data within the
|
|
|
|
// underlying writer. It should be used when the zip data is appended to an
|
|
|
|
// existing file, such as a binary executable.
|
|
|
|
// It must be called before any data is written.
|
|
|
|
func (w *Writer) SetOffset(n int64) {
|
|
|
|
if w.cw.count != 0 {
|
|
|
|
panic("zip: SetOffset called after data was written")
|
2015-01-19 06:39:33 -07:00
|
|
|
}
|
2015-03-11 18:54:11 -06:00
|
|
|
w.cw.count = n
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
2014-08-31 22:32:13 -06:00
|
|
|
// Flush flushes any buffered data to the underlying writer.
|
|
|
|
// Calling Flush is not normally necessary; calling Close is sufficient.
|
|
|
|
func (w *Writer) Flush() error {
|
|
|
|
return w.cw.w.(*bufio.Writer).Flush()
|
|
|
|
}
|
|
|
|
|
2017-11-22 09:10:47 -07:00
|
|
|
// SetComment sets the end-of-central-directory comment field.
|
|
|
|
// It can only be called before Close.
|
|
|
|
func (w *Writer) SetComment(comment string) error {
|
|
|
|
if len(comment) > uint16max {
|
2017-08-26 03:44:27 -06:00
|
|
|
return errors.New("zip: Writer.Comment too long")
|
|
|
|
}
|
2017-11-22 09:10:47 -07:00
|
|
|
w.comment = comment
|
|
|
|
return nil
|
|
|
|
}
|
2017-08-26 03:44:27 -06:00
|
|
|
|
2017-11-22 09:10:47 -07:00
|
|
|
// Close finishes writing the zip file by writing the central directory.
|
2018-05-31 16:07:02 -06:00
|
|
|
// It does not close the underlying writer.
|
2017-11-22 09:10:47 -07:00
|
|
|
func (w *Writer) Close() error {
|
2011-07-09 19:30:16 -06:00
|
|
|
if w.last != nil && !w.last.closed {
|
2012-02-26 22:29:22 -07:00
|
|
|
if err := w.last.close(); err != nil {
|
|
|
|
return err
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
w.last = nil
|
|
|
|
}
|
|
|
|
if w.closed {
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("zip: writer closed twice")
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
w.closed = true
|
|
|
|
|
|
|
|
// write central directory
|
2012-02-13 16:47:48 -07:00
|
|
|
start := w.cw.count
|
2011-07-09 19:30:16 -06:00
|
|
|
for _, h := range w.dir {
|
2012-02-26 23:37:59 -07:00
|
|
|
var buf [directoryHeaderLen]byte
|
|
|
|
b := writeBuf(buf[:])
|
|
|
|
b.uint32(uint32(directoryHeaderSignature))
|
|
|
|
b.uint16(h.CreatorVersion)
|
|
|
|
b.uint16(h.ReaderVersion)
|
|
|
|
b.uint16(h.Flags)
|
|
|
|
b.uint16(h.Method)
|
|
|
|
b.uint16(h.ModifiedTime)
|
|
|
|
b.uint16(h.ModifiedDate)
|
|
|
|
b.uint32(h.CRC32)
|
2016-01-06 10:22:16 -07:00
|
|
|
if h.isZip64() || h.offset >= uint32max {
|
2012-08-21 19:05:24 -06:00
|
|
|
// the file needs a zip64 header. store maxint in both
|
|
|
|
// 32 bit size fields (and offset later) to signal that the
|
|
|
|
// zip64 extra header should be used.
|
|
|
|
b.uint32(uint32max) // compressed size
|
|
|
|
b.uint32(uint32max) // uncompressed size
|
|
|
|
|
|
|
|
// append a zip64 extra block to Extra
|
|
|
|
var buf [28]byte // 2x uint16 + 3x uint64
|
|
|
|
eb := writeBuf(buf[:])
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
eb.uint16(zip64ExtraID)
|
2012-08-21 19:05:24 -06:00
|
|
|
eb.uint16(24) // size = 3x uint64
|
|
|
|
eb.uint64(h.UncompressedSize64)
|
|
|
|
eb.uint64(h.CompressedSize64)
|
|
|
|
eb.uint64(h.offset)
|
|
|
|
h.Extra = append(h.Extra, buf[:]...)
|
|
|
|
} else {
|
|
|
|
b.uint32(h.CompressedSize)
|
|
|
|
b.uint32(h.UncompressedSize)
|
|
|
|
}
|
2016-01-06 05:36:31 -07:00
|
|
|
|
2012-02-26 23:37:59 -07:00
|
|
|
b.uint16(uint16(len(h.Name)))
|
|
|
|
b.uint16(uint16(len(h.Extra)))
|
|
|
|
b.uint16(uint16(len(h.Comment)))
|
|
|
|
b = b[4:] // skip disk number start and internal file attr (2x uint16)
|
|
|
|
b.uint32(h.ExternalAttrs)
|
2012-08-21 19:05:24 -06:00
|
|
|
if h.offset > uint32max {
|
|
|
|
b.uint32(uint32max)
|
|
|
|
} else {
|
|
|
|
b.uint32(uint32(h.offset))
|
|
|
|
}
|
2012-02-26 23:37:59 -07:00
|
|
|
if _, err := w.cw.Write(buf[:]); err != nil {
|
2012-02-26 22:29:22 -07:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := io.WriteString(w.cw, h.Name); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := w.cw.Write(h.Extra); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := io.WriteString(w.cw, h.Comment); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
2012-02-13 16:47:48 -07:00
|
|
|
end := w.cw.count
|
2011-07-09 19:30:16 -06:00
|
|
|
|
2012-08-21 19:05:24 -06:00
|
|
|
records := uint64(len(w.dir))
|
|
|
|
size := uint64(end - start)
|
|
|
|
offset := uint64(start)
|
|
|
|
|
2016-11-15 12:33:10 -07:00
|
|
|
if f := w.testHookCloseSizeOffset; f != nil {
|
|
|
|
f(size, offset)
|
|
|
|
}
|
|
|
|
|
|
|
|
if records >= uint16max || size >= uint32max || offset >= uint32max {
|
2012-08-21 19:05:24 -06:00
|
|
|
var buf [directory64EndLen + directory64LocLen]byte
|
|
|
|
b := writeBuf(buf[:])
|
|
|
|
|
|
|
|
// zip64 end of central directory record
|
|
|
|
b.uint32(directory64EndSignature)
|
2015-02-12 15:21:01 -07:00
|
|
|
b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
|
|
|
|
b.uint16(zipVersion45) // version made by
|
|
|
|
b.uint16(zipVersion45) // version needed to extract
|
|
|
|
b.uint32(0) // number of this disk
|
|
|
|
b.uint32(0) // number of the disk with the start of the central directory
|
|
|
|
b.uint64(records) // total number of entries in the central directory on this disk
|
|
|
|
b.uint64(records) // total number of entries in the central directory
|
|
|
|
b.uint64(size) // size of the central directory
|
|
|
|
b.uint64(offset) // offset of start of central directory with respect to the starting disk number
|
2012-08-21 19:05:24 -06:00
|
|
|
|
|
|
|
// zip64 end of central directory locator
|
|
|
|
b.uint32(directory64LocSignature)
|
|
|
|
b.uint32(0) // number of the disk with the start of the zip64 end of central directory
|
|
|
|
b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
|
|
|
|
b.uint32(1) // total number of disks
|
|
|
|
|
|
|
|
if _, err := w.cw.Write(buf[:]); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// store max values in the regular end record to signal that
|
|
|
|
// that the zip64 values should be used instead
|
|
|
|
records = uint16max
|
|
|
|
size = uint32max
|
|
|
|
offset = uint32max
|
|
|
|
}
|
|
|
|
|
2011-07-09 19:30:16 -06:00
|
|
|
// write end record
|
2012-02-26 23:37:59 -07:00
|
|
|
var buf [directoryEndLen]byte
|
|
|
|
b := writeBuf(buf[:])
|
|
|
|
b.uint32(uint32(directoryEndSignature))
|
2017-08-26 03:44:27 -06:00
|
|
|
b = b[4:] // skip over disk number and first disk number (2x uint16)
|
|
|
|
b.uint16(uint16(records)) // number of entries this disk
|
|
|
|
b.uint16(uint16(records)) // number of entries total
|
|
|
|
b.uint32(uint32(size)) // size of directory
|
|
|
|
b.uint32(uint32(offset)) // start of directory
|
2017-11-22 09:10:47 -07:00
|
|
|
b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
|
2012-02-26 23:37:59 -07:00
|
|
|
if _, err := w.cw.Write(buf[:]); err != nil {
|
2012-02-26 22:29:22 -07:00
|
|
|
return err
|
|
|
|
}
|
2017-11-22 09:10:47 -07:00
|
|
|
if _, err := io.WriteString(w.cw, w.comment); err != nil {
|
2017-08-26 03:44:27 -06:00
|
|
|
return err
|
|
|
|
}
|
2012-02-13 16:47:48 -07:00
|
|
|
|
|
|
|
return w.cw.w.(*bufio.Writer).Flush()
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create adds a file to the zip file using the provided name.
|
|
|
|
// It returns a Writer to which the file contents should be written.
|
2017-11-15 03:24:21 -07:00
|
|
|
// The file contents will be compressed using the Deflate method.
|
2013-04-17 14:25:12 -06:00
|
|
|
// The name must be a relative path: it must not start with a drive
|
|
|
|
// letter (e.g. C:) or leading slash, and only forward slashes are
|
2018-02-25 07:34:35 -07:00
|
|
|
// allowed. To create a directory instead of a file, add a trailing
|
|
|
|
// slash to the name.
|
2011-07-09 19:30:16 -06:00
|
|
|
// The file's contents must be written to the io.Writer before the next
|
|
|
|
// call to Create, CreateHeader, or Close.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (w *Writer) Create(name string) (io.Writer, error) {
|
2011-07-09 19:30:16 -06:00
|
|
|
header := &FileHeader{
|
|
|
|
Name: name,
|
|
|
|
Method: Deflate,
|
|
|
|
}
|
|
|
|
return w.CreateHeader(header)
|
|
|
|
}
|
|
|
|
|
2017-10-23 14:47:15 -06:00
|
|
|
// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
|
archive/zip: add FileHeader.NonUTF8 field
The NonUTF8 field provides users with a way to explictly tell the
ZIP writer to avoid setting the UTF-8 flag.
This is necessary because many readers:
1) (Still) do not support UTF-8
2) And use the local system encoding instead
Thus, even though character encodings other than CP-437 and UTF-8
are not officially supported by the ZIP specification, pragmatically
the world has permitted use of them.
When a non-standard encoding is used, it is the user's responsibility
to ensure that the target system is expecting the encoding used
(e.g., producing a ZIP file you know is used on a Chinese version of Windows).
We adjust the detectUTF8 function to account for Shift-JIS and EUC-KR
not being identical to ASCII for two characters.
We don't need an API for users to explicitly specify that they are encoding
with UTF-8 since all single byte characters are compatible with all other
common encodings (Windows-1256, Windows-1252, Windows-1251, Windows-1250,
IEC-8859, EUC-KR, KOI8-R, Latin-1, Shift-JIS, GB-2312, GBK) except for
the non-printable characters and the backslash character (all of which
are invalid characters in a path name anyways).
Fixes #10741
Change-Id: I9004542d1d522c9137973f1b6e2b623fa54dfd66
Reviewed-on: https://go-review.googlesource.com/75592
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-11-02 14:53:16 -06:00
|
|
|
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
|
|
|
|
// or any other common encoding).
|
2017-10-23 14:47:15 -06:00
|
|
|
func detectUTF8(s string) (valid, require bool) {
|
2017-11-15 09:47:31 -07:00
|
|
|
for i := 0; i < len(s); {
|
|
|
|
r, size := utf8.DecodeRuneInString(s[i:])
|
|
|
|
i += size
|
archive/zip: add FileHeader.NonUTF8 field
The NonUTF8 field provides users with a way to explictly tell the
ZIP writer to avoid setting the UTF-8 flag.
This is necessary because many readers:
1) (Still) do not support UTF-8
2) And use the local system encoding instead
Thus, even though character encodings other than CP-437 and UTF-8
are not officially supported by the ZIP specification, pragmatically
the world has permitted use of them.
When a non-standard encoding is used, it is the user's responsibility
to ensure that the target system is expecting the encoding used
(e.g., producing a ZIP file you know is used on a Chinese version of Windows).
We adjust the detectUTF8 function to account for Shift-JIS and EUC-KR
not being identical to ASCII for two characters.
We don't need an API for users to explicitly specify that they are encoding
with UTF-8 since all single byte characters are compatible with all other
common encodings (Windows-1256, Windows-1252, Windows-1251, Windows-1250,
IEC-8859, EUC-KR, KOI8-R, Latin-1, Shift-JIS, GB-2312, GBK) except for
the non-printable characters and the backslash character (all of which
are invalid characters in a path name anyways).
Fixes #10741
Change-Id: I9004542d1d522c9137973f1b6e2b623fa54dfd66
Reviewed-on: https://go-review.googlesource.com/75592
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-11-02 14:53:16 -06:00
|
|
|
// Officially, ZIP uses CP-437, but many readers use the system's
|
|
|
|
// local character encoding. Most encoding are compatible with a large
|
|
|
|
// subset of CP-437, which itself is ASCII-like.
|
|
|
|
//
|
|
|
|
// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
|
|
|
|
// characters with localized currency and overline characters.
|
|
|
|
if r < 0x20 || r > 0x7d || r == 0x5c {
|
2017-11-15 09:47:31 -07:00
|
|
|
if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
|
2017-10-23 14:47:15 -06:00
|
|
|
return false, false
|
2017-04-05 02:32:09 -06:00
|
|
|
}
|
2017-10-23 14:47:15 -06:00
|
|
|
require = true
|
2017-04-05 02:32:09 -06:00
|
|
|
}
|
|
|
|
}
|
2017-10-23 14:47:15 -06:00
|
|
|
return true, require
|
2017-04-05 02:32:09 -06:00
|
|
|
}
|
|
|
|
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
// CreateHeader adds a file to the zip archive using the provided FileHeader
|
|
|
|
// for the file metadata. Writer takes ownership of fh and may mutate
|
|
|
|
// its fields. The caller must not modify fh after calling CreateHeader.
|
2015-06-10 12:19:14 -06:00
|
|
|
//
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
// This returns a Writer to which the file contents should be written.
|
2011-07-09 19:30:16 -06:00
|
|
|
// The file's contents must be written to the io.Writer before the next
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
// call to Create, CreateHeader, or Close.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
|
2011-07-09 19:30:16 -06:00
|
|
|
if w.last != nil && !w.last.closed {
|
|
|
|
if err := w.last.close(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2015-06-10 12:19:14 -06:00
|
|
|
if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
|
|
|
|
// See https://golang.org/issue/11144 confusion.
|
|
|
|
return nil, errors.New("archive/zip: invalid duplicate FileHeader")
|
|
|
|
}
|
2011-07-09 19:30:16 -06:00
|
|
|
|
2017-10-23 14:47:15 -06:00
|
|
|
// The ZIP format has a sad state of affairs regarding character encoding.
|
|
|
|
// Officially, the name and comment fields are supposed to be encoded
|
|
|
|
// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
|
|
|
|
// flag bit is set. However, there are several problems:
|
|
|
|
//
|
|
|
|
// * Many ZIP readers still do not support UTF-8.
|
|
|
|
// * If the UTF-8 flag is cleared, several readers simply interpret the
|
|
|
|
// name and comment fields as whatever the local system encoding is.
|
|
|
|
//
|
|
|
|
// In order to avoid breaking readers without UTF-8 support,
|
|
|
|
// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
|
|
|
|
// However, if the strings require multibyte UTF-8 encoding and is a
|
|
|
|
// valid UTF-8 string, then we set the UTF-8 bit.
|
|
|
|
//
|
|
|
|
// For the case, where the user explicitly wants to specify the encoding
|
|
|
|
// as UTF-8, they will need to set the flag bit themselves.
|
|
|
|
utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
|
|
|
|
utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
|
archive/zip: add FileHeader.NonUTF8 field
The NonUTF8 field provides users with a way to explictly tell the
ZIP writer to avoid setting the UTF-8 flag.
This is necessary because many readers:
1) (Still) do not support UTF-8
2) And use the local system encoding instead
Thus, even though character encodings other than CP-437 and UTF-8
are not officially supported by the ZIP specification, pragmatically
the world has permitted use of them.
When a non-standard encoding is used, it is the user's responsibility
to ensure that the target system is expecting the encoding used
(e.g., producing a ZIP file you know is used on a Chinese version of Windows).
We adjust the detectUTF8 function to account for Shift-JIS and EUC-KR
not being identical to ASCII for two characters.
We don't need an API for users to explicitly specify that they are encoding
with UTF-8 since all single byte characters are compatible with all other
common encodings (Windows-1256, Windows-1252, Windows-1251, Windows-1250,
IEC-8859, EUC-KR, KOI8-R, Latin-1, Shift-JIS, GB-2312, GBK) except for
the non-printable characters and the backslash character (all of which
are invalid characters in a path name anyways).
Fixes #10741
Change-Id: I9004542d1d522c9137973f1b6e2b623fa54dfd66
Reviewed-on: https://go-review.googlesource.com/75592
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-11-02 14:53:16 -06:00
|
|
|
switch {
|
|
|
|
case fh.NonUTF8:
|
|
|
|
fh.Flags &^= 0x800
|
|
|
|
case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
|
2017-10-23 14:47:15 -06:00
|
|
|
fh.Flags |= 0x800
|
2017-04-05 02:32:09 -06:00
|
|
|
}
|
|
|
|
|
2012-08-21 19:05:24 -06:00
|
|
|
fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
|
|
|
|
fh.ReaderVersion = zipVersion20
|
2011-07-09 19:30:16 -06:00
|
|
|
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
// If Modified is set, this takes precedence over MS-DOS timestamp fields.
|
|
|
|
if !fh.Modified.IsZero() {
|
|
|
|
// Contrary to the FileHeader.SetModTime method, we intentionally
|
|
|
|
// do not convert to UTC, because we assume the user intends to encode
|
|
|
|
// the date using the specified timezone. A user may want this control
|
|
|
|
// because many legacy ZIP readers interpret the timestamp according
|
|
|
|
// to the local timezone.
|
|
|
|
//
|
|
|
|
// The timezone is only non-UTC if a user directly sets the Modified
|
|
|
|
// field directly themselves. All other approaches sets UTC.
|
|
|
|
fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
|
|
|
|
|
|
|
|
// Use "extended timestamp" format since this is what Info-ZIP uses.
|
|
|
|
// Nearly every major ZIP implementation uses a different format,
|
|
|
|
// but at least most seem to be able to understand the other formats.
|
|
|
|
//
|
|
|
|
// This format happens to be identical for both local and central header
|
|
|
|
// if modification time is the only timestamp being encoded.
|
|
|
|
var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
|
2017-11-15 13:38:26 -07:00
|
|
|
mt := uint32(fh.Modified.Unix())
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
eb := writeBuf(mbuf[:])
|
|
|
|
eb.uint16(extTimeExtraID)
|
|
|
|
eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32)
|
|
|
|
eb.uint8(1) // Flags: ModTime
|
|
|
|
eb.uint32(mt) // ModTime
|
|
|
|
fh.Extra = append(fh.Extra, mbuf[:]...)
|
|
|
|
}
|
|
|
|
|
2018-04-21 02:55:50 -06:00
|
|
|
var (
|
|
|
|
ow io.Writer
|
|
|
|
fw *fileWriter
|
|
|
|
)
|
2011-07-09 19:30:16 -06:00
|
|
|
h := &header{
|
|
|
|
FileHeader: fh,
|
2012-08-21 19:05:24 -06:00
|
|
|
offset: uint64(w.cw.count),
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
2018-04-21 02:55:50 -06:00
|
|
|
if strings.HasSuffix(fh.Name, "/") {
|
2018-05-02 04:07:56 -06:00
|
|
|
// Set the compression method to Store to ensure data length is truly zero,
|
|
|
|
// which the writeHeader method always encodes for the size fields.
|
|
|
|
// This is necessary as most compression formats have non-zero lengths
|
|
|
|
// even when compressing an empty string.
|
|
|
|
fh.Method = Store
|
|
|
|
fh.Flags &^= 0x8 // we will not write a data descriptor
|
|
|
|
|
2018-04-21 02:55:50 -06:00
|
|
|
ow = dirWriter{}
|
|
|
|
} else {
|
2018-05-02 04:07:56 -06:00
|
|
|
fh.Flags |= 0x8 // we will write a data descriptor
|
|
|
|
|
2018-04-21 02:55:50 -06:00
|
|
|
fw = &fileWriter{
|
|
|
|
zipw: w.cw,
|
|
|
|
compCount: &countWriter{w: w.cw},
|
|
|
|
crc32: crc32.NewIEEE(),
|
|
|
|
}
|
|
|
|
comp := w.compressor(fh.Method)
|
|
|
|
if comp == nil {
|
|
|
|
return nil, ErrAlgorithm
|
|
|
|
}
|
|
|
|
var err error
|
|
|
|
fw.comp, err = comp(fw.compCount)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
fw.rawCount = &countWriter{w: fw.comp}
|
|
|
|
fw.header = h
|
|
|
|
ow = fw
|
|
|
|
}
|
|
|
|
w.dir = append(w.dir, h)
|
2012-02-13 16:47:48 -07:00
|
|
|
if err := writeHeader(w.cw, fh); err != nil {
|
2011-07-09 19:30:16 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
2018-04-21 02:55:50 -06:00
|
|
|
// If we're creating a directory, fw is nil.
|
2011-07-09 19:30:16 -06:00
|
|
|
w.last = fw
|
2018-04-21 02:55:50 -06:00
|
|
|
return ow, nil
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
2012-02-26 22:29:22 -07:00
|
|
|
func writeHeader(w io.Writer, h *FileHeader) error {
|
2017-07-15 12:33:06 -06:00
|
|
|
const maxUint16 = 1<<16 - 1
|
|
|
|
if len(h.Name) > maxUint16 {
|
|
|
|
return errLongName
|
|
|
|
}
|
|
|
|
if len(h.Extra) > maxUint16 {
|
|
|
|
return errLongExtra
|
|
|
|
}
|
|
|
|
|
2012-02-26 23:37:59 -07:00
|
|
|
var buf [fileHeaderLen]byte
|
|
|
|
b := writeBuf(buf[:])
|
|
|
|
b.uint32(uint32(fileHeaderSignature))
|
|
|
|
b.uint16(h.ReaderVersion)
|
|
|
|
b.uint16(h.Flags)
|
|
|
|
b.uint16(h.Method)
|
|
|
|
b.uint16(h.ModifiedTime)
|
|
|
|
b.uint16(h.ModifiedDate)
|
2012-08-21 19:05:24 -06:00
|
|
|
b.uint32(0) // since we are writing a data descriptor crc32,
|
|
|
|
b.uint32(0) // compressed size,
|
|
|
|
b.uint32(0) // and uncompressed size should be zero
|
2012-02-26 23:37:59 -07:00
|
|
|
b.uint16(uint16(len(h.Name)))
|
|
|
|
b.uint16(uint16(len(h.Extra)))
|
|
|
|
if _, err := w.Write(buf[:]); err != nil {
|
2012-02-26 22:29:22 -07:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := io.WriteString(w, h.Name); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
_, err := w.Write(h.Extra)
|
|
|
|
return err
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
2015-11-05 16:47:20 -07:00
|
|
|
// RegisterCompressor registers or overrides a custom compressor for a specific
|
|
|
|
// method ID. If a compressor for a given method is not found, Writer will
|
|
|
|
// default to looking up the compressor at the package level.
|
|
|
|
func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
|
|
|
|
if w.compressors == nil {
|
|
|
|
w.compressors = make(map[uint16]Compressor)
|
|
|
|
}
|
|
|
|
w.compressors[method] = comp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) compressor(method uint16) Compressor {
|
|
|
|
comp := w.compressors[method]
|
|
|
|
if comp == nil {
|
|
|
|
comp = compressor(method)
|
|
|
|
}
|
|
|
|
return comp
|
|
|
|
}
|
|
|
|
|
2018-04-21 02:55:50 -06:00
|
|
|
type dirWriter struct{}
|
|
|
|
|
|
|
|
func (dirWriter) Write([]byte) (int, error) {
|
|
|
|
return 0, errors.New("zip: write to directory")
|
|
|
|
}
|
|
|
|
|
2011-07-09 19:30:16 -06:00
|
|
|
type fileWriter struct {
|
|
|
|
*header
|
|
|
|
zipw io.Writer
|
|
|
|
rawCount *countWriter
|
|
|
|
comp io.WriteCloser
|
|
|
|
compCount *countWriter
|
|
|
|
crc32 hash.Hash32
|
|
|
|
closed bool
|
|
|
|
}
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func (w *fileWriter) Write(p []byte) (int, error) {
|
2011-07-09 19:30:16 -06:00
|
|
|
if w.closed {
|
2011-11-01 20:04:37 -06:00
|
|
|
return 0, errors.New("zip: write to closed file")
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
w.crc32.Write(p)
|
|
|
|
return w.rawCount.Write(p)
|
|
|
|
}
|
|
|
|
|
2012-02-26 22:29:22 -07:00
|
|
|
func (w *fileWriter) close() error {
|
2011-07-09 19:30:16 -06:00
|
|
|
if w.closed {
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("zip: file closed twice")
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
w.closed = true
|
2012-02-26 22:29:22 -07:00
|
|
|
if err := w.comp.Close(); err != nil {
|
|
|
|
return err
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// update FileHeader
|
|
|
|
fh := w.header.FileHeader
|
|
|
|
fh.CRC32 = w.crc32.Sum32()
|
2012-08-21 19:05:24 -06:00
|
|
|
fh.CompressedSize64 = uint64(w.compCount.count)
|
|
|
|
fh.UncompressedSize64 = uint64(w.rawCount.count)
|
2011-07-09 19:30:16 -06:00
|
|
|
|
2012-08-21 19:05:24 -06:00
|
|
|
if fh.isZip64() {
|
|
|
|
fh.CompressedSize = uint32max
|
|
|
|
fh.UncompressedSize = uint32max
|
|
|
|
fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
|
|
|
|
} else {
|
|
|
|
fh.CompressedSize = uint32(fh.CompressedSize64)
|
|
|
|
fh.UncompressedSize = uint32(fh.UncompressedSize64)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write data descriptor. This is more complicated than one would
|
|
|
|
// think, see e.g. comments in zipfile.c:putextended() and
|
|
|
|
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
|
|
|
|
// The approach here is to write 8 byte sizes if needed without
|
|
|
|
// adding a zip64 extra in the local header (too late anyway).
|
|
|
|
var buf []byte
|
|
|
|
if fh.isZip64() {
|
|
|
|
buf = make([]byte, dataDescriptor64Len)
|
|
|
|
} else {
|
|
|
|
buf = make([]byte, dataDescriptorLen)
|
|
|
|
}
|
|
|
|
b := writeBuf(buf)
|
2012-03-09 15:12:02 -07:00
|
|
|
b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
|
2012-02-26 23:37:59 -07:00
|
|
|
b.uint32(fh.CRC32)
|
2012-08-21 19:05:24 -06:00
|
|
|
if fh.isZip64() {
|
|
|
|
b.uint64(fh.CompressedSize64)
|
|
|
|
b.uint64(fh.UncompressedSize64)
|
|
|
|
} else {
|
|
|
|
b.uint32(fh.CompressedSize)
|
|
|
|
b.uint32(fh.UncompressedSize)
|
|
|
|
}
|
|
|
|
_, err := w.zipw.Write(buf)
|
2012-02-26 22:29:22 -07:00
|
|
|
return err
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
type countWriter struct {
|
|
|
|
w io.Writer
|
|
|
|
count int64
|
|
|
|
}
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func (w *countWriter) Write(p []byte) (int, error) {
|
2011-07-09 19:30:16 -06:00
|
|
|
n, err := w.w.Write(p)
|
|
|
|
w.count += int64(n)
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
type nopCloser struct {
|
|
|
|
io.Writer
|
|
|
|
}
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func (w nopCloser) Close() error {
|
2011-07-09 19:30:16 -06:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2012-02-26 23:37:59 -07:00
|
|
|
type writeBuf []byte
|
|
|
|
|
archive/zip: add FileHeader.Modified field
The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.
Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.
Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.
The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.
The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.
Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.
Fixes #18359
Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-28 13:07:58 -06:00
|
|
|
func (b *writeBuf) uint8(v uint8) {
|
|
|
|
(*b)[0] = v
|
|
|
|
*b = (*b)[1:]
|
|
|
|
}
|
|
|
|
|
2012-02-26 23:37:59 -07:00
|
|
|
func (b *writeBuf) uint16(v uint16) {
|
2012-02-27 15:41:30 -07:00
|
|
|
binary.LittleEndian.PutUint16(*b, v)
|
2012-02-26 23:37:59 -07:00
|
|
|
*b = (*b)[2:]
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
|
|
|
|
2012-02-26 23:37:59 -07:00
|
|
|
func (b *writeBuf) uint32(v uint32) {
|
2012-02-27 15:41:30 -07:00
|
|
|
binary.LittleEndian.PutUint32(*b, v)
|
2012-02-26 23:37:59 -07:00
|
|
|
*b = (*b)[4:]
|
2011-07-09 19:30:16 -06:00
|
|
|
}
|
2012-08-21 19:05:24 -06:00
|
|
|
|
|
|
|
func (b *writeBuf) uint64(v uint64) {
|
|
|
|
binary.LittleEndian.PutUint64(*b, v)
|
|
|
|
*b = (*b)[8:]
|
|
|
|
}
|