2009-06-09 00:22:56 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package tar
|
|
|
|
|
|
|
|
// TODO(dsymonds):
|
2009-07-06 23:59:31 -06:00
|
|
|
// - pax extensions
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
import (
|
2009-12-15 16:33:31 -07:00
|
|
|
"bytes"
|
2011-11-01 20:04:37 -06:00
|
|
|
"errors"
|
2009-12-15 16:33:31 -07:00
|
|
|
"io"
|
2011-04-27 16:57:22 -06:00
|
|
|
"io/ioutil"
|
2015-09-28 17:38:16 -06:00
|
|
|
"math"
|
2009-12-15 16:33:31 -07:00
|
|
|
"strconv"
|
2013-02-10 17:36:29 -07:00
|
|
|
"strings"
|
2011-11-30 10:01:46 -07:00
|
|
|
"time"
|
2009-06-09 00:22:56 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2012-03-12 00:33:35 -06:00
|
|
|
ErrHeader = errors.New("archive/tar: invalid tar header")
|
2009-06-09 00:22:56 -06:00
|
|
|
)
|
|
|
|
|
2013-02-10 17:36:29 -07:00
|
|
|
const maxNanoSecondIntSize = 9
|
|
|
|
|
2009-06-09 00:22:56 -06:00
|
|
|
// A Reader provides sequential access to the contents of a tar archive.
|
2009-07-08 19:31:14 -06:00
|
|
|
// A tar archive consists of a sequence of files.
|
2009-06-09 00:22:56 -06:00
|
|
|
// The Next method advances to the next file in the archive (including the first),
|
|
|
|
// and then it can be treated as an io.Reader to access the file's data.
|
|
|
|
type Reader struct {
|
2015-09-17 01:22:56 -06:00
|
|
|
r io.Reader
|
|
|
|
pad int64 // amount of padding (ignored) after current file entry
|
|
|
|
curr numBytesReader // reader for current file entry
|
|
|
|
blk block // buffer to use as temporary local storage
|
2016-08-29 17:10:32 -06:00
|
|
|
|
|
|
|
// err is a persistent error.
|
|
|
|
// It is only the responsibility of every exported method of Reader to
|
|
|
|
// ensure that this error is sticky.
|
|
|
|
err error
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-09-28 14:49:35 -06:00
|
|
|
type parser struct {
|
|
|
|
err error // Last error seen
|
|
|
|
}
|
|
|
|
|
2014-04-03 14:01:04 -06:00
|
|
|
// A numBytesReader is an io.Reader with a numBytes method, returning the number
|
|
|
|
// of bytes remaining in the underlying encoded data.
|
|
|
|
type numBytesReader interface {
|
|
|
|
io.Reader
|
|
|
|
numBytes() int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// A regFileReader is a numBytesReader for reading file data from a tar archive.
|
|
|
|
type regFileReader struct {
|
|
|
|
r io.Reader // underlying reader
|
|
|
|
nb int64 // number of unread bytes for current file entry
|
|
|
|
}
|
|
|
|
|
2015-09-28 17:38:16 -06:00
|
|
|
// A sparseFileReader is a numBytesReader for reading sparse file data from a
|
|
|
|
// tar archive.
|
2014-04-03 14:01:04 -06:00
|
|
|
type sparseFileReader struct {
|
2015-09-28 17:38:16 -06:00
|
|
|
rfr numBytesReader // Reads the sparse-encoded file data
|
|
|
|
sp []sparseEntry // The sparse map for the file
|
|
|
|
pos int64 // Keeps track of file position
|
|
|
|
total int64 // Total size of the file
|
|
|
|
}
|
|
|
|
|
|
|
|
// A sparseEntry holds a single entry in a sparse file's sparse map.
|
|
|
|
//
|
|
|
|
// Sparse files are represented using a series of sparseEntrys.
|
|
|
|
// Despite the name, a sparseEntry represents an actual data fragment that
|
|
|
|
// references data found in the underlying archive stream. All regions not
|
|
|
|
// covered by a sparseEntry are logically filled with zeros.
|
|
|
|
//
|
|
|
|
// For example, if the underlying raw file contains the 10-byte data:
|
|
|
|
// var compactData = "abcdefgh"
|
|
|
|
//
|
|
|
|
// And the sparse map has the following entries:
|
|
|
|
// var sp = []sparseEntry{
|
|
|
|
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
|
|
|
|
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// Then the content of the resulting sparse file with a "real" size of 25 is:
|
|
|
|
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
|
|
|
|
type sparseEntry struct {
|
|
|
|
offset int64 // Starting position of the fragment
|
|
|
|
numBytes int64 // Length of the fragment
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Keywords for GNU sparse files in a PAX extended header
|
|
|
|
const (
|
|
|
|
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
|
|
|
|
paxGNUSparseOffset = "GNU.sparse.offset"
|
|
|
|
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
|
|
|
|
paxGNUSparseMap = "GNU.sparse.map"
|
|
|
|
paxGNUSparseName = "GNU.sparse.name"
|
|
|
|
paxGNUSparseMajor = "GNU.sparse.major"
|
|
|
|
paxGNUSparseMinor = "GNU.sparse.minor"
|
|
|
|
paxGNUSparseSize = "GNU.sparse.size"
|
|
|
|
paxGNUSparseRealSize = "GNU.sparse.realsize"
|
|
|
|
)
|
|
|
|
|
2009-07-08 19:31:14 -06:00
|
|
|
// NewReader creates a new Reader reading from r.
|
2009-12-15 16:33:31 -07:00
|
|
|
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
// Next advances to the next entry in the tar archive.
|
2014-12-29 23:28:02 -07:00
|
|
|
//
|
|
|
|
// io.EOF is returned at the end of the input.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (tr *Reader) Next() (*Header, error) {
|
2013-02-10 17:36:29 -07:00
|
|
|
if tr.err != nil {
|
2015-09-16 01:58:56 -06:00
|
|
|
return nil, tr.err
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr, err := tr.next()
|
|
|
|
tr.err = err
|
|
|
|
return hdr, err
|
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) next() (*Header, error) {
|
2015-09-16 01:58:56 -06:00
|
|
|
var extHdrs map[string]string
|
|
|
|
|
|
|
|
// Externally, Next iterates through the tar archive as if it is a series of
|
|
|
|
// files. Internally, the tar format often uses fake "files" to add meta
|
|
|
|
// data that describes the next file. These meta data "files" should not
|
|
|
|
// normally be visible to the outside. As such, this loop iterates through
|
|
|
|
// one or more "header files" until it finds a "normal file".
|
|
|
|
loop:
|
|
|
|
for {
|
2016-08-29 17:10:32 -06:00
|
|
|
if err := tr.skipUnread(); err != nil {
|
|
|
|
return nil, err
|
2015-06-13 02:53:06 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr, rawHdr, err := tr.readHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-08-20 02:46:32 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
if err := tr.handleRegularFile(hdr); err != nil {
|
|
|
|
return nil, err
|
2015-06-13 02:53:06 -06:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-09-16 01:58:56 -06:00
|
|
|
// Check for PAX/GNU special headers and files.
|
|
|
|
switch hdr.Typeflag {
|
|
|
|
case TypeXHeader:
|
2016-08-29 17:10:32 -06:00
|
|
|
extHdrs, err = parsePAX(tr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-03 19:12:31 -07:00
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
continue loop // This is a meta header affecting the next header
|
|
|
|
case TypeGNULongName, TypeGNULongLink:
|
2016-08-29 17:10:32 -06:00
|
|
|
realname, err := ioutil.ReadAll(tr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-09-28 17:38:16 -06:00
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
|
|
|
// Convert GNU extensions to use PAX headers.
|
|
|
|
if extHdrs == nil {
|
|
|
|
extHdrs = make(map[string]string)
|
|
|
|
}
|
|
|
|
var p parser
|
|
|
|
switch hdr.Typeflag {
|
|
|
|
case TypeGNULongName:
|
|
|
|
extHdrs[paxPath] = p.parseString(realname)
|
|
|
|
case TypeGNULongLink:
|
|
|
|
extHdrs[paxLinkpath] = p.parseString(realname)
|
|
|
|
}
|
|
|
|
if p.err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, p.err
|
2015-09-16 01:58:56 -06:00
|
|
|
}
|
|
|
|
continue loop // This is a meta header affecting the next header
|
|
|
|
default:
|
2016-08-20 02:46:32 -06:00
|
|
|
// The old GNU sparse format is handled here since it is technically
|
|
|
|
// just a regular file with additional attributes.
|
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
if err := mergePAX(hdr, extHdrs); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
2016-08-20 02:46:32 -06:00
|
|
|
// TODO(dsnet): The extended headers may have updated the size.
|
|
|
|
// Thus, we must setup the regFileReader again here.
|
|
|
|
//
|
|
|
|
// See golang.org/issue/15573
|
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil {
|
|
|
|
return nil, err
|
2015-09-16 01:58:56 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return hdr, nil // This is a file, so stop
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-20 02:46:32 -06:00
|
|
|
// handleRegularFile sets up the current file reader and padding such that it
|
|
|
|
// can only read the following logical data section. It will properly handle
|
|
|
|
// special headers that contain no data section.
|
|
|
|
func (tr *Reader) handleRegularFile(hdr *Header) error {
|
|
|
|
nb := hdr.Size
|
|
|
|
if isHeaderOnlyType(hdr.Typeflag) {
|
|
|
|
nb = 0
|
|
|
|
}
|
|
|
|
if nb < 0 {
|
|
|
|
return ErrHeader
|
|
|
|
}
|
|
|
|
|
|
|
|
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
|
|
|
|
tr.curr = ®FileReader{r: tr.r, nb: nb}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleSparseFile checks if the current file is a sparse format of any type
|
|
|
|
// and sets the curr reader appropriately.
|
|
|
|
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error {
|
|
|
|
var sp []sparseEntry
|
|
|
|
var err error
|
|
|
|
if hdr.Typeflag == TypeGNUSparse {
|
|
|
|
var p parser
|
|
|
|
hdr.Size = p.parseNumeric(rawHdr.GNU().RealSize())
|
|
|
|
if p.err != nil {
|
|
|
|
return p.err
|
|
|
|
}
|
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
sp, err = tr.readOldGNUSparseMap(rawHdr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-08-20 02:46:32 -06:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If sp is non-nil, then this is a sparse file.
|
|
|
|
// Note that it is possible for len(sp) to be zero.
|
|
|
|
if sp != nil {
|
|
|
|
tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2014-04-03 14:01:04 -06:00
|
|
|
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
|
|
|
|
// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
|
|
|
|
// be treated as a regular file.
|
|
|
|
func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
|
|
|
|
var sparseFormat string
|
|
|
|
|
|
|
|
// Check for sparse format indicators
|
|
|
|
major, majorOk := headers[paxGNUSparseMajor]
|
|
|
|
minor, minorOk := headers[paxGNUSparseMinor]
|
|
|
|
sparseName, sparseNameOk := headers[paxGNUSparseName]
|
|
|
|
_, sparseMapOk := headers[paxGNUSparseMap]
|
|
|
|
sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
|
|
|
|
sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
|
|
|
|
|
|
|
|
// Identify which, if any, sparse format applies from which PAX headers are set
|
|
|
|
if majorOk && minorOk {
|
|
|
|
sparseFormat = major + "." + minor
|
|
|
|
} else if sparseNameOk && sparseMapOk {
|
|
|
|
sparseFormat = "0.1"
|
|
|
|
} else if sparseSizeOk {
|
|
|
|
sparseFormat = "0.0"
|
|
|
|
} else {
|
|
|
|
// Not a PAX format GNU sparse file.
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for unknown sparse format
|
|
|
|
if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update hdr from GNU sparse PAX headers
|
|
|
|
if sparseNameOk {
|
|
|
|
hdr.Name = sparseName
|
|
|
|
}
|
|
|
|
if sparseSizeOk {
|
|
|
|
realSize, err := strconv.ParseInt(sparseSize, 10, 0)
|
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
hdr.Size = realSize
|
|
|
|
} else if sparseRealSizeOk {
|
|
|
|
realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
|
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
hdr.Size = realSize
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set up the sparse map, according to the particular sparse format in use
|
|
|
|
var sp []sparseEntry
|
|
|
|
var err error
|
|
|
|
switch sparseFormat {
|
|
|
|
case "0.0", "0.1":
|
|
|
|
sp, err = readGNUSparseMap0x1(headers)
|
|
|
|
case "1.0":
|
|
|
|
sp, err = readGNUSparseMap1x0(tr.curr)
|
|
|
|
}
|
|
|
|
return sp, err
|
|
|
|
}
|
|
|
|
|
2013-02-10 17:36:29 -07:00
|
|
|
// mergePAX merges well known headers according to PAX standard.
|
|
|
|
// In general headers with the same name as those found
|
|
|
|
// in the header struct overwrite those found in the header
|
|
|
|
// struct with higher precision or longer values. Esp. useful
|
|
|
|
// for name and linkname fields.
|
2016-08-29 17:10:32 -06:00
|
|
|
func mergePAX(hdr *Header, headers map[string]string) (err error) {
|
|
|
|
var id64 int64
|
2013-02-10 17:36:29 -07:00
|
|
|
for k, v := range headers {
|
|
|
|
switch k {
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxPath:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Name = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxLinkpath:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Linkname = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxUname:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Uname = v
|
2016-08-29 17:10:32 -06:00
|
|
|
case paxGname:
|
|
|
|
hdr.Gname = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxUid:
|
2016-08-29 17:10:32 -06:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 0)
|
|
|
|
hdr.Uid = int(id64)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxGid:
|
2016-08-29 17:10:32 -06:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 0)
|
|
|
|
hdr.Gid = int(id64)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxAtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.AccessTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxMtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.ModTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxCtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.ChangeTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxSize:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.Size, err = strconv.ParseInt(v, 10, 0)
|
2014-02-13 02:08:30 -07:00
|
|
|
default:
|
|
|
|
if strings.HasPrefix(k, paxXattr) {
|
|
|
|
if hdr.Xattrs == nil {
|
|
|
|
hdr.Xattrs = make(map[string]string)
|
|
|
|
}
|
|
|
|
hdr.Xattrs[k[len(paxXattr):]] = v
|
|
|
|
}
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
if err != nil {
|
|
|
|
return ErrHeader
|
|
|
|
}
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parsePAXTime takes a string of the form %d.%d as described in
|
|
|
|
// the PAX specification.
|
|
|
|
func parsePAXTime(t string) (time.Time, error) {
|
|
|
|
buf := []byte(t)
|
|
|
|
pos := bytes.IndexByte(buf, '.')
|
|
|
|
var seconds, nanoseconds int64
|
|
|
|
var err error
|
|
|
|
if pos == -1 {
|
|
|
|
seconds, err = strconv.ParseInt(t, 10, 0)
|
|
|
|
if err != nil {
|
|
|
|
return time.Time{}, err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
|
|
|
|
if err != nil {
|
|
|
|
return time.Time{}, err
|
|
|
|
}
|
2016-04-15 14:39:47 -06:00
|
|
|
nanoBuf := string(buf[pos+1:])
|
2013-02-10 17:36:29 -07:00
|
|
|
// Pad as needed before converting to a decimal.
|
|
|
|
// For example .030 -> .030000000 -> 30000000 nanoseconds
|
2016-04-15 14:39:47 -06:00
|
|
|
if len(nanoBuf) < maxNanoSecondIntSize {
|
2013-02-10 17:36:29 -07:00
|
|
|
// Right pad
|
2016-04-15 14:39:47 -06:00
|
|
|
nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf))
|
|
|
|
} else if len(nanoBuf) > maxNanoSecondIntSize {
|
2013-02-10 17:36:29 -07:00
|
|
|
// Right truncate
|
2016-04-15 14:39:47 -06:00
|
|
|
nanoBuf = nanoBuf[:maxNanoSecondIntSize]
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2016-04-15 14:39:47 -06:00
|
|
|
nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0)
|
2013-02-10 17:36:29 -07:00
|
|
|
if err != nil {
|
|
|
|
return time.Time{}, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ts := time.Unix(seconds, nanoseconds)
|
|
|
|
return ts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parsePAX parses PAX headers.
|
|
|
|
// If an extended header (type 'x') is invalid, ErrHeader is returned
|
|
|
|
func parsePAX(r io.Reader) (map[string]string, error) {
|
|
|
|
buf, err := ioutil.ReadAll(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-09-28 14:49:35 -06:00
|
|
|
sbuf := string(buf)
|
2014-04-03 14:01:04 -06:00
|
|
|
|
|
|
|
// For GNU PAX sparse format 0.0 support.
|
|
|
|
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
|
|
|
|
var sparseMap bytes.Buffer
|
|
|
|
|
2013-02-10 17:36:29 -07:00
|
|
|
headers := make(map[string]string)
|
|
|
|
// Each record is constructed as
|
|
|
|
// "%d %s=%s\n", length, keyword, value
|
2015-09-28 14:49:35 -06:00
|
|
|
for len(sbuf) > 0 {
|
|
|
|
key, value, residual, err := parsePAXRecord(sbuf)
|
|
|
|
if err != nil {
|
2013-02-10 17:36:29 -07:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2015-09-28 14:49:35 -06:00
|
|
|
sbuf = residual
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2016-04-14 20:09:36 -06:00
|
|
|
keyStr := key
|
2014-04-03 14:01:04 -06:00
|
|
|
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
|
|
|
|
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
|
2015-09-28 14:49:35 -06:00
|
|
|
sparseMap.WriteString(value)
|
2014-04-03 14:01:04 -06:00
|
|
|
sparseMap.Write([]byte{','})
|
|
|
|
} else {
|
|
|
|
// Normal key. Set the value in the headers map.
|
2016-04-14 20:09:36 -06:00
|
|
|
headers[keyStr] = value
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if sparseMap.Len() != 0 {
|
|
|
|
// Add sparse info to headers, chopping off the extra comma
|
|
|
|
sparseMap.Truncate(sparseMap.Len() - 1)
|
|
|
|
headers[paxGNUSparseMap] = sparseMap.String()
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
|
|
|
return headers, nil
|
|
|
|
}
|
|
|
|
|
2015-09-28 14:49:35 -06:00
|
|
|
// parsePAXRecord parses the input PAX record string into a key-value pair.
|
|
|
|
// If parsing is successful, it will slice off the currently read record and
|
|
|
|
// return the remainder as r.
|
|
|
|
//
|
|
|
|
// A PAX record is of the following form:
|
|
|
|
// "%d %s=%s\n" % (size, key, value)
|
|
|
|
func parsePAXRecord(s string) (k, v, r string, err error) {
|
|
|
|
// The size field ends at the first space.
|
|
|
|
sp := strings.IndexByte(s, ' ')
|
|
|
|
if sp == -1 {
|
|
|
|
return "", "", s, ErrHeader
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the first token as a decimal integer.
|
|
|
|
n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
|
|
|
|
if perr != nil || n < 5 || int64(len(s)) < n {
|
|
|
|
return "", "", s, ErrHeader
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract everything between the space and the final newline.
|
|
|
|
rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
|
|
|
|
if nl != "\n" {
|
|
|
|
return "", "", s, ErrHeader
|
|
|
|
}
|
|
|
|
|
|
|
|
// The first equals separates the key from the value.
|
|
|
|
eq := strings.IndexByte(rec, '=')
|
|
|
|
if eq == -1 {
|
|
|
|
return "", "", s, ErrHeader
|
|
|
|
}
|
|
|
|
return rec[:eq], rec[eq+1:], rem, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseString parses bytes as a NUL-terminated C-style string.
|
2009-06-09 00:22:56 -06:00
|
|
|
// If a NUL byte is not found then the whole slice is returned as a string.
|
2015-09-28 14:49:35 -06:00
|
|
|
func (*parser) parseString(b []byte) string {
|
2009-12-15 16:33:31 -07:00
|
|
|
n := 0
|
2009-06-09 00:22:56 -06:00
|
|
|
for n < len(b) && b[n] != 0 {
|
2009-11-09 13:07:39 -07:00
|
|
|
n++
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return string(b[0:n])
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-12-02 16:41:44 -07:00
|
|
|
// parseNumeric parses the input as being encoded in either base-256 or octal.
|
|
|
|
// This function may return negative numbers.
|
|
|
|
// If parsing fails or an integer overflow occurs, err will be set.
|
2015-09-28 14:49:35 -06:00
|
|
|
func (p *parser) parseNumeric(b []byte) int64 {
|
2015-12-02 16:41:44 -07:00
|
|
|
// Check for base-256 (binary) format first.
|
|
|
|
// If the first bit is set, then all following bits constitute a two's
|
|
|
|
// complement encoded number in big-endian byte order.
|
2012-11-08 14:50:10 -07:00
|
|
|
if len(b) > 0 && b[0]&0x80 != 0 {
|
2015-12-02 16:41:44 -07:00
|
|
|
// Handling negative numbers relies on the following identity:
|
|
|
|
// -a-1 == ^a
|
|
|
|
//
|
|
|
|
// If the number is negative, we use an inversion mask to invert the
|
|
|
|
// data bytes and treat the value as an unsigned number.
|
|
|
|
var inv byte // 0x00 if positive or zero, 0xff if negative
|
|
|
|
if b[0]&0x40 != 0 {
|
|
|
|
inv = 0xff
|
|
|
|
}
|
|
|
|
|
|
|
|
var x uint64
|
2012-11-08 14:50:10 -07:00
|
|
|
for i, c := range b {
|
2015-12-02 16:41:44 -07:00
|
|
|
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
|
2012-11-08 14:50:10 -07:00
|
|
|
if i == 0 {
|
2015-12-02 16:41:44 -07:00
|
|
|
c &= 0x7f // Ignore signal bit in first byte
|
|
|
|
}
|
|
|
|
if (x >> 56) > 0 {
|
|
|
|
p.err = ErrHeader // Integer overflow
|
|
|
|
return 0
|
2012-11-08 14:50:10 -07:00
|
|
|
}
|
2015-12-02 16:41:44 -07:00
|
|
|
x = x<<8 | uint64(c)
|
|
|
|
}
|
|
|
|
if (x >> 63) > 0 {
|
|
|
|
p.err = ErrHeader // Integer overflow
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if inv == 0xff {
|
|
|
|
return ^int64(x)
|
2012-11-08 14:50:10 -07:00
|
|
|
}
|
2015-12-02 16:41:44 -07:00
|
|
|
return int64(x)
|
2012-11-08 14:50:10 -07:00
|
|
|
}
|
|
|
|
|
2015-12-02 16:41:44 -07:00
|
|
|
// Normal case is base-8 (octal) format.
|
2015-09-28 14:49:35 -06:00
|
|
|
return p.parseOctal(b)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *parser) parseOctal(b []byte) int64 {
|
2013-05-14 14:40:42 -06:00
|
|
|
// Because unused fields are filled with NULs, we need
|
|
|
|
// to skip leading NULs. Fields may also be padded with
|
|
|
|
// spaces or NULs.
|
|
|
|
// So we remove leading and trailing NULs and spaces to
|
|
|
|
// be sure.
|
|
|
|
b = bytes.Trim(b, " \x00")
|
|
|
|
|
|
|
|
if len(b) == 0 {
|
|
|
|
return 0
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
2015-09-28 14:49:35 -06:00
|
|
|
x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
|
|
|
|
if perr != nil {
|
|
|
|
p.err = ErrHeader
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return int64(x)
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 03:30:29 -06:00
|
|
|
// skipUnread skips any unread bytes in the existing file entry, as well as any
|
|
|
|
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
|
|
|
|
// encountered in the data portion; it is okay to hit io.EOF in the padding.
|
|
|
|
//
|
|
|
|
// Note that this function still works properly even when sparse files are being
|
|
|
|
// used since numBytes returns the bytes remaining in the underlying io.Reader.
|
|
|
|
func (tr *Reader) skipUnread() error {
|
|
|
|
dataSkip := tr.numBytes() // Number of data bytes to skip
|
|
|
|
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
|
2014-04-03 14:01:04 -06:00
|
|
|
tr.curr, tr.pad = nil, 0
|
2015-10-01 03:30:29 -06:00
|
|
|
|
|
|
|
// If possible, Seek to the last byte before the end of the data section.
|
|
|
|
// Do this because Seek is often lazy about reporting errors; this will mask
|
|
|
|
// the fact that the tar stream may be truncated. We can rely on the
|
|
|
|
// io.CopyN done shortly afterwards to trigger any IO errors.
|
|
|
|
var seekSkipped int64 // Number of bytes skipped via Seek
|
|
|
|
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
|
|
|
|
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
|
|
|
// io.Seeker, but calling Seek always returns an error and performs
|
|
|
|
// no action. Thus, we try an innocent seek to the current position
|
|
|
|
// to see if Seek is really supported.
|
2016-04-12 22:35:37 -06:00
|
|
|
pos1, err := sr.Seek(0, io.SeekCurrent)
|
2015-10-01 03:30:29 -06:00
|
|
|
if err == nil {
|
|
|
|
// Seek seems supported, so perform the real Seek.
|
2016-04-12 22:35:37 -06:00
|
|
|
pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
|
2015-10-01 03:30:29 -06:00
|
|
|
if err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return err
|
2015-10-01 03:30:29 -06:00
|
|
|
}
|
|
|
|
seekSkipped = pos2 - pos1
|
2009-12-14 12:35:02 -07:00
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2015-10-01 03:30:29 -06:00
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
|
|
|
|
if err == io.EOF && seekSkipped+copySkipped < dataSkip {
|
|
|
|
err = io.ErrUnexpectedEOF
|
2015-10-01 03:30:29 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 03:30:29 -06:00
|
|
|
// readHeader reads the next block header and assumes that the underlying reader
|
2016-08-20 02:46:32 -06:00
|
|
|
// is already aligned to a block boundary. It returns the raw block of the
|
|
|
|
// header in case further processing is required.
|
2015-10-01 03:30:29 -06:00
|
|
|
//
|
|
|
|
// The err will be set to io.EOF only when one of the following occurs:
|
|
|
|
// * Exactly 0 bytes are read and EOF is hit.
|
|
|
|
// * Exactly 1 block of zeros is read and EOF is hit.
|
|
|
|
// * At least 2 blocks of zeros are read.
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) readHeader() (*Header, *block, error) {
|
2009-06-09 00:22:56 -06:00
|
|
|
// Two blocks of zero bytes marks the end of the archive.
|
2016-08-29 17:10:32 -06:00
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
2016-08-29 17:10:32 -06:00
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, ErrHeader // Zero block and then non-zero block
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-09-17 01:22:56 -06:00
|
|
|
// Verify the header matches a known format.
|
|
|
|
format := tr.blk.GetFormat()
|
|
|
|
if format == formatUnknown {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, ErrHeader
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-09-28 14:49:35 -06:00
|
|
|
var p parser
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr := new(Header)
|
2009-06-10 22:32:36 -06:00
|
|
|
|
2015-09-17 01:22:56 -06:00
|
|
|
// Unpack the V7 header.
|
|
|
|
v7 := tr.blk.V7()
|
|
|
|
hdr.Name = p.parseString(v7.Name())
|
|
|
|
hdr.Mode = p.parseNumeric(v7.Mode())
|
|
|
|
hdr.Uid = int(p.parseNumeric(v7.UID()))
|
|
|
|
hdr.Gid = int(p.parseNumeric(v7.GID()))
|
|
|
|
hdr.Size = p.parseNumeric(v7.Size())
|
|
|
|
hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
|
|
|
|
hdr.Typeflag = v7.TypeFlag()[0]
|
|
|
|
hdr.Linkname = p.parseString(v7.LinkName())
|
|
|
|
|
|
|
|
// Unpack format specific fields.
|
|
|
|
if format > formatV7 {
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
hdr.Uname = p.parseString(ustar.UserName())
|
|
|
|
hdr.Gname = p.parseString(ustar.GroupName())
|
2009-06-10 22:32:36 -06:00
|
|
|
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
|
2015-09-17 01:22:56 -06:00
|
|
|
hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
|
|
|
|
hdr.Devminor = p.parseNumeric(ustar.DevMinor())
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
|
2009-12-15 16:33:31 -07:00
|
|
|
var prefix string
|
2009-06-10 22:32:36 -06:00
|
|
|
switch format {
|
2015-09-17 01:22:56 -06:00
|
|
|
case formatUSTAR, formatGNU:
|
|
|
|
// TODO(dsnet): Do not use the prefix field for the GNU format!
|
|
|
|
// See golang.org/issues/12594
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
prefix = p.parseString(ustar.Prefix())
|
|
|
|
case formatSTAR:
|
|
|
|
star := tr.blk.STAR()
|
|
|
|
prefix = p.parseString(star.Prefix())
|
|
|
|
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
|
|
|
|
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
if len(prefix) > 0 {
|
2009-11-09 13:07:39 -07:00
|
|
|
hdr.Name = prefix + "/" + hdr.Name
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return hdr, &tr.blk, p.err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2014-04-03 14:01:04 -06:00
|
|
|
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
|
|
|
|
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
|
|
|
|
// then one or more extension headers are used to store the rest of the sparse map.
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) readOldGNUSparseMap(blk *block) ([]sparseEntry, error) {
|
2015-09-28 14:49:35 -06:00
|
|
|
var p parser
|
2015-09-17 01:22:56 -06:00
|
|
|
var s sparseArray = blk.GNU().Sparse()
|
|
|
|
var sp = make([]sparseEntry, 0, s.MaxEntries())
|
|
|
|
for i := 0; i < s.MaxEntries(); i++ {
|
|
|
|
offset := p.parseOctal(s.Entry(i).Offset())
|
|
|
|
numBytes := p.parseOctal(s.Entry(i).NumBytes())
|
2015-09-28 14:49:35 -06:00
|
|
|
if p.err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, p.err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
if offset == 0 && numBytes == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
|
|
|
}
|
|
|
|
|
2015-09-17 01:22:56 -06:00
|
|
|
for s.IsExtended()[0] > 0 {
|
2014-04-03 14:01:04 -06:00
|
|
|
// There are more entries. Read an extension header and parse its entries.
|
2015-09-17 01:22:56 -06:00
|
|
|
var blk block
|
2016-08-29 17:10:32 -06:00
|
|
|
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
|
|
|
|
return nil, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
s = blk.Sparse()
|
|
|
|
|
|
|
|
for i := 0; i < s.MaxEntries(); i++ {
|
|
|
|
offset := p.parseOctal(s.Entry(i).Offset())
|
|
|
|
numBytes := p.parseOctal(s.Entry(i).NumBytes())
|
2015-09-28 14:49:35 -06:00
|
|
|
if p.err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, p.err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
if offset == 0 && numBytes == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
|
|
|
}
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return sp, nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 1.0. The format of the sparse map consists of a series of
|
|
|
|
// newline-terminated numeric fields. The first field is the number of entries
|
|
|
|
// and is always present. Following this are the entries, consisting of two
|
|
|
|
// fields (offset, numBytes). This function must stop reading at the end
|
|
|
|
// boundary of the block containing the last newline.
|
|
|
|
//
|
|
|
|
// Note that the GNU manual says that numeric values should be encoded in octal
|
|
|
|
// format. However, the GNU tar utility itself outputs these values in decimal.
|
|
|
|
// As such, this library treats values as being encoded in decimal.
|
2014-04-03 14:01:04 -06:00
|
|
|
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
|
2015-10-01 02:35:15 -06:00
|
|
|
var cntNewline int64
|
|
|
|
var buf bytes.Buffer
|
|
|
|
var blk = make([]byte, blockSize)
|
|
|
|
|
|
|
|
// feedTokens copies data in numBlock chunks from r into buf until there are
|
|
|
|
// at least cnt newlines in buf. It will not read more blocks than needed.
|
|
|
|
var feedTokens = func(cnt int64) error {
|
|
|
|
for cntNewline < cnt {
|
|
|
|
if _, err := io.ReadFull(r, blk); err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
err = io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
return err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
buf.Write(blk)
|
|
|
|
for _, c := range blk {
|
|
|
|
if c == '\n' {
|
|
|
|
cntNewline++
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
return nil
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// nextToken gets the next token delimited by a newline. This assumes that
|
|
|
|
// at least one newline exists in the buffer.
|
|
|
|
var nextToken = func() string {
|
|
|
|
cntNewline--
|
|
|
|
tok, _ := buf.ReadString('\n')
|
|
|
|
return tok[:len(tok)-1] // Cut off newline
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// Parse for the number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
|
|
|
if err := feedTokens(1); err != nil {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// Parse for all member entries.
|
|
|
|
// numEntries is trusted after this since a potential attacker must have
|
|
|
|
// committed resources proportional to what this library used.
|
|
|
|
if err := feedTokens(2 * numEntries); err != nil {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
sp := make([]sparseEntry, 0, numEntries)
|
|
|
|
for i := int64(0); i < numEntries; i++ {
|
2015-10-01 02:35:15 -06:00
|
|
|
offset, err := strconv.ParseInt(nextToken(), 10, 64)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil {
|
2015-10-01 02:35:15 -06:00
|
|
|
return nil, ErrHeader
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil {
|
2015-10-01 02:35:15 -06:00
|
|
|
return nil, ErrHeader
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
|
|
|
}
|
|
|
|
return sp, nil
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 0.1. The sparse map is stored in the PAX headers.
|
|
|
|
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
|
|
|
|
// Get number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
|
|
|
numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
|
|
|
|
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// There should be two numbers in sparseMap for each entry.
|
|
|
|
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
|
2014-04-03 14:01:04 -06:00
|
|
|
if int64(len(sparseMap)) != 2*numEntries {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// Loop through the entries in the sparse map.
|
|
|
|
// numEntries is trusted now.
|
2014-04-03 14:01:04 -06:00
|
|
|
sp := make([]sparseEntry, 0, numEntries)
|
|
|
|
for i := int64(0); i < numEntries; i++ {
|
2015-10-01 02:04:24 -06:00
|
|
|
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2015-10-01 02:04:24 -06:00
|
|
|
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
|
|
|
}
|
|
|
|
return sp, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// numBytes returns the number of bytes left to read in the current file's entry
|
|
|
|
// in the tar archive, or 0 if there is no current file.
|
|
|
|
func (tr *Reader) numBytes() int64 {
|
|
|
|
if tr.curr == nil {
|
|
|
|
// No current file, so no bytes
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return tr.curr.numBytes()
|
|
|
|
}
|
|
|
|
|
2009-06-09 00:22:56 -06:00
|
|
|
// Read reads from the current entry in the tar archive.
|
2011-11-03 15:01:30 -06:00
|
|
|
// It returns 0, io.EOF when it reaches the end of that entry,
|
2009-06-09 00:22:56 -06:00
|
|
|
// until Next is called to advance to the next entry.
|
2015-12-17 00:10:14 -07:00
|
|
|
//
|
|
|
|
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
|
|
|
|
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
|
|
|
|
// the Header.Size claims.
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) Read(b []byte) (int, error) {
|
2015-10-01 04:08:18 -06:00
|
|
|
if tr.err != nil {
|
|
|
|
return 0, tr.err
|
|
|
|
}
|
2014-05-15 16:18:05 -06:00
|
|
|
if tr.curr == nil {
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
2015-10-01 04:08:18 -06:00
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
n, err := tr.curr.Read(b)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
tr.err = err
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return n, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func (rfr *regFileReader) Read(b []byte) (n int, err error) {
|
|
|
|
if rfr.nb == 0 {
|
2009-12-14 12:35:02 -07:00
|
|
|
// file consumed
|
2011-11-01 20:04:37 -06:00
|
|
|
return 0, io.EOF
|
2009-12-14 12:35:02 -07:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
if int64(len(b)) > rfr.nb {
|
|
|
|
b = b[0:rfr.nb]
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
n, err = rfr.r.Read(b)
|
|
|
|
rfr.nb -= int64(n)
|
2009-12-14 12:35:02 -07:00
|
|
|
|
2014-04-03 14:01:04 -06:00
|
|
|
if err == io.EOF && rfr.nb > 0 {
|
2009-12-14 12:35:02 -07:00
|
|
|
err = io.ErrUnexpectedEOF
|
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
|
|
|
// numBytes returns the number of bytes left to read in the file's data in the tar archive.
|
|
|
|
func (rfr *regFileReader) numBytes() int64 {
|
|
|
|
return rfr.nb
|
|
|
|
}
|
|
|
|
|
2015-09-28 17:38:16 -06:00
|
|
|
// newSparseFileReader creates a new sparseFileReader, but validates all of the
|
|
|
|
// sparse entries before doing so.
|
|
|
|
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
|
|
|
|
if total < 0 {
|
|
|
|
return nil, ErrHeader // Total size cannot be negative
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate all sparse entries. These are the same checks as performed by
|
|
|
|
// the BSD tar utility.
|
|
|
|
for i, s := range sp {
|
|
|
|
switch {
|
|
|
|
case s.offset < 0 || s.numBytes < 0:
|
|
|
|
return nil, ErrHeader // Negative values are never okay
|
|
|
|
case s.offset > math.MaxInt64-s.numBytes:
|
|
|
|
return nil, ErrHeader // Integer overflow with large length
|
|
|
|
case s.offset+s.numBytes > total:
|
|
|
|
return nil, ErrHeader // Region extends beyond the "real" size
|
|
|
|
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
|
|
|
|
return nil, ErrHeader // Regions can't overlap and must be in order
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// readHole reads a sparse hole ending at endOffset.
|
|
|
|
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
|
|
|
|
n64 := endOffset - sfr.pos
|
2014-04-03 14:01:04 -06:00
|
|
|
if n64 > int64(len(b)) {
|
|
|
|
n64 = int64(len(b))
|
|
|
|
}
|
|
|
|
n := int(n64)
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
b[i] = 0
|
|
|
|
}
|
|
|
|
sfr.pos += n64
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read reads the sparse file data in expanded form.
|
|
|
|
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
|
2015-09-28 17:38:16 -06:00
|
|
|
// Skip past all empty fragments.
|
|
|
|
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
|
|
|
|
sfr.sp = sfr.sp[1:]
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are no more fragments, then it is possible that there
|
|
|
|
// is one last sparse hole.
|
2014-04-03 14:01:04 -06:00
|
|
|
if len(sfr.sp) == 0 {
|
2015-09-28 17:38:16 -06:00
|
|
|
// This behavior matches the BSD tar utility.
|
|
|
|
// However, GNU tar stops returning data even if sfr.total is unmet.
|
|
|
|
if sfr.pos < sfr.total {
|
|
|
|
return sfr.readHole(b, sfr.total), nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
2015-09-28 17:38:16 -06:00
|
|
|
|
|
|
|
// In front of a data fragment, so read a hole.
|
2014-04-03 14:01:04 -06:00
|
|
|
if sfr.pos < sfr.sp[0].offset {
|
2015-09-28 17:38:16 -06:00
|
|
|
return sfr.readHole(b, sfr.sp[0].offset), nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-09-28 17:38:16 -06:00
|
|
|
// In a data fragment, so read from it.
|
|
|
|
// This math is overflow free since we verify that offset and numBytes can
|
|
|
|
// be safely added when creating the sparseFileReader.
|
|
|
|
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
|
|
|
|
bytesLeft := endPos - sfr.pos // Bytes left in fragment
|
2014-04-03 14:01:04 -06:00
|
|
|
if int64(len(b)) > bytesLeft {
|
2015-09-28 17:38:16 -06:00
|
|
|
b = b[:bytesLeft]
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
n, err = sfr.rfr.Read(b)
|
|
|
|
sfr.pos += int64(n)
|
2015-09-28 17:38:16 -06:00
|
|
|
if err == io.EOF {
|
|
|
|
if sfr.pos < endPos {
|
|
|
|
err = io.ErrUnexpectedEOF // There was supposed to be more data
|
|
|
|
} else if sfr.pos < sfr.total {
|
|
|
|
err = nil // There is still an implicit sparse hole at the end
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-09-28 17:38:16 -06:00
|
|
|
if sfr.pos == endPos {
|
|
|
|
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2015-09-28 17:38:16 -06:00
|
|
|
return n, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// numBytes returns the number of bytes left to read in the sparse file's
|
|
|
|
// sparse-encoded data in the tar archive.
|
|
|
|
func (sfr *sparseFileReader) numBytes() int64 {
|
2015-09-28 17:38:16 -06:00
|
|
|
return sfr.rfr.numBytes()
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|