2009-06-09 00:22:56 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package tar
|
|
|
|
|
|
|
|
// TODO(dsymonds):
|
2009-07-06 23:59:31 -06:00
|
|
|
// - pax extensions
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
import (
|
2009-12-15 16:33:31 -07:00
|
|
|
"bytes"
|
2011-11-01 20:04:37 -06:00
|
|
|
"errors"
|
2009-12-15 16:33:31 -07:00
|
|
|
"io"
|
2011-04-27 16:57:22 -06:00
|
|
|
"io/ioutil"
|
2009-12-15 16:33:31 -07:00
|
|
|
"os"
|
|
|
|
"strconv"
|
2011-11-30 10:01:46 -07:00
|
|
|
"time"
|
2009-06-09 00:22:56 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2012-01-24 12:48:48 -07:00
|
|
|
ErrHeader = errors.New("invalid tar header")
|
2009-06-09 00:22:56 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
// A Reader provides sequential access to the contents of a tar archive.
|
2009-07-08 19:31:14 -06:00
|
|
|
// A tar archive consists of a sequence of files.
|
2009-06-09 00:22:56 -06:00
|
|
|
// The Next method advances to the next file in the archive (including the first),
|
|
|
|
// and then it can be treated as an io.Reader to access the file's data.
|
|
|
|
//
|
|
|
|
// Example:
|
2010-04-11 11:29:07 -06:00
|
|
|
// tr := tar.NewReader(r)
|
2009-08-12 14:19:17 -06:00
|
|
|
// for {
|
2010-04-11 11:29:07 -06:00
|
|
|
// hdr, err := tr.Next()
|
2011-11-03 15:01:30 -06:00
|
|
|
// if err == io.EOF {
|
2009-06-09 00:22:56 -06:00
|
|
|
// // end of tar archive
|
|
|
|
// break
|
|
|
|
// }
|
2011-04-12 18:56:33 -06:00
|
|
|
// if err != nil {
|
|
|
|
// // handle error
|
|
|
|
// }
|
2010-04-11 11:29:07 -06:00
|
|
|
// io.Copy(data, tr)
|
2009-08-12 14:19:17 -06:00
|
|
|
// }
|
2009-06-09 00:22:56 -06:00
|
|
|
type Reader struct {
|
2009-12-15 16:33:31 -07:00
|
|
|
r io.Reader
|
2011-11-01 20:04:37 -06:00
|
|
|
err error
|
2009-12-15 16:33:31 -07:00
|
|
|
nb int64 // number of unread bytes for current file entry
|
|
|
|
pad int64 // amount of padding (ignored) after current file entry
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2009-07-08 19:31:14 -06:00
|
|
|
// NewReader creates a new Reader reading from r.
|
2009-12-15 16:33:31 -07:00
|
|
|
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
// Next advances to the next entry in the tar archive.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (tr *Reader) Next() (*Header, error) {
|
2009-12-15 16:33:31 -07:00
|
|
|
var hdr *Header
|
2009-06-09 00:22:56 -06:00
|
|
|
if tr.err == nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
tr.skipUnread()
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
if tr.err == nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
hdr = tr.readHeader()
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return hdr, tr.err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Parse bytes as a NUL-terminated C-style string.
|
|
|
|
// If a NUL byte is not found then the whole slice is returned as a string.
|
|
|
|
func cString(b []byte) string {
|
2009-12-15 16:33:31 -07:00
|
|
|
n := 0
|
2009-06-09 00:22:56 -06:00
|
|
|
for n < len(b) && b[n] != 0 {
|
2009-11-09 13:07:39 -07:00
|
|
|
n++
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return string(b[0:n])
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2009-06-10 22:32:36 -06:00
|
|
|
func (tr *Reader) octal(b []byte) int64 {
|
2009-08-09 16:03:30 -06:00
|
|
|
// Removing leading spaces.
|
|
|
|
for len(b) > 0 && b[0] == ' ' {
|
2009-11-20 12:45:05 -07:00
|
|
|
b = b[1:]
|
2009-08-09 16:03:30 -06:00
|
|
|
}
|
|
|
|
// Removing trailing NULs and spaces.
|
|
|
|
for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') {
|
2009-11-09 13:07:39 -07:00
|
|
|
b = b[0 : len(b)-1]
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
2011-12-05 13:48:46 -07:00
|
|
|
x, err := strconv.ParseUint(cString(b), 8, 64)
|
2009-06-09 00:22:56 -06:00
|
|
|
if err != nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
tr.err = err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return int64(x)
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Skip any unread bytes in the existing file entry, as well as any alignment padding.
|
|
|
|
func (tr *Reader) skipUnread() {
|
2009-12-15 16:33:31 -07:00
|
|
|
nr := tr.nb + tr.pad // number of bytes to skip
|
|
|
|
tr.nb, tr.pad = 0, 0
|
2009-08-18 18:47:03 -06:00
|
|
|
if sr, ok := tr.r.(io.Seeker); ok {
|
2011-04-04 14:53:52 -06:00
|
|
|
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
|
2009-12-14 12:35:02 -07:00
|
|
|
return
|
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2011-09-30 14:13:39 -06:00
|
|
|
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func (tr *Reader) verifyChecksum(header []byte) bool {
|
|
|
|
if tr.err != nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
return false
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2009-12-15 16:33:31 -07:00
|
|
|
given := tr.octal(header[148:156])
|
|
|
|
unsigned, signed := checksum(header)
|
|
|
|
return given == unsigned || given == signed
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func (tr *Reader) readHeader() *Header {
|
2009-12-15 16:33:31 -07:00
|
|
|
header := make([]byte, blockSize)
|
2009-09-14 18:20:29 -06:00
|
|
|
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
return nil
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Two blocks of zero bytes marks the end of the archive.
|
2009-11-09 22:09:34 -07:00
|
|
|
if bytes.Equal(header, zeroBlock[0:blockSize]) {
|
2009-09-14 18:20:29 -06:00
|
|
|
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
2009-11-09 13:07:39 -07:00
|
|
|
return nil
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-14 12:35:02 -07:00
|
|
|
if bytes.Equal(header, zeroBlock[0:blockSize]) {
|
2011-11-01 20:04:37 -06:00
|
|
|
tr.err = io.EOF
|
2009-12-14 12:35:02 -07:00
|
|
|
} else {
|
2012-01-24 12:48:48 -07:00
|
|
|
tr.err = ErrHeader // zero block and then non-zero block
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
return nil
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if !tr.verifyChecksum(header) {
|
2012-01-24 12:48:48 -07:00
|
|
|
tr.err = ErrHeader
|
2009-12-15 16:33:31 -07:00
|
|
|
return nil
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unpack
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr := new(Header)
|
|
|
|
s := slicer(header)
|
|
|
|
|
|
|
|
hdr.Name = cString(s.next(100))
|
|
|
|
hdr.Mode = tr.octal(s.next(8))
|
2010-04-22 15:01:33 -06:00
|
|
|
hdr.Uid = int(tr.octal(s.next(8)))
|
|
|
|
hdr.Gid = int(tr.octal(s.next(8)))
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr.Size = tr.octal(s.next(12))
|
2011-11-30 10:01:46 -07:00
|
|
|
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
|
2009-12-15 16:33:31 -07:00
|
|
|
s.next(8) // chksum
|
|
|
|
hdr.Typeflag = s.next(1)[0]
|
|
|
|
hdr.Linkname = cString(s.next(100))
|
2009-06-10 22:32:36 -06:00
|
|
|
|
|
|
|
// The remainder of the header depends on the value of magic.
|
2009-07-06 23:59:31 -06:00
|
|
|
// The original (v7) version of tar had no explicit magic field,
|
|
|
|
// so its magic bytes, like the rest of the block, are NULs.
|
2009-12-15 16:33:31 -07:00
|
|
|
magic := string(s.next(8)) // contains version field as well.
|
|
|
|
var format string
|
2009-06-10 22:32:36 -06:00
|
|
|
switch magic {
|
2009-12-15 16:33:31 -07:00
|
|
|
case "ustar\x0000": // POSIX tar (1003.1-1988)
|
2009-06-10 22:32:36 -06:00
|
|
|
if string(header[508:512]) == "tar\x00" {
|
2009-11-09 13:07:39 -07:00
|
|
|
format = "star"
|
2009-06-10 22:32:36 -06:00
|
|
|
} else {
|
2009-11-09 13:07:39 -07:00
|
|
|
format = "posix"
|
2009-10-06 15:55:39 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
case "ustar \x00": // old GNU tar
|
2009-11-09 13:07:39 -07:00
|
|
|
format = "gnu"
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
switch format {
|
|
|
|
case "posix", "gnu", "star":
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr.Uname = cString(s.next(32))
|
|
|
|
hdr.Gname = cString(s.next(32))
|
|
|
|
devmajor := s.next(8)
|
|
|
|
devminor := s.next(8)
|
2009-06-10 22:32:36 -06:00
|
|
|
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr.Devmajor = tr.octal(devmajor)
|
|
|
|
hdr.Devminor = tr.octal(devminor)
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
var prefix string
|
2009-06-10 22:32:36 -06:00
|
|
|
switch format {
|
|
|
|
case "posix", "gnu":
|
2009-11-09 13:07:39 -07:00
|
|
|
prefix = cString(s.next(155))
|
2009-06-10 22:32:36 -06:00
|
|
|
case "star":
|
2009-12-15 16:33:31 -07:00
|
|
|
prefix = cString(s.next(131))
|
2011-11-30 10:01:46 -07:00
|
|
|
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
|
|
|
|
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
if len(prefix) > 0 {
|
2009-11-09 13:07:39 -07:00
|
|
|
hdr.Name = prefix + "/" + hdr.Name
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
if tr.err != nil {
|
2012-01-24 12:48:48 -07:00
|
|
|
tr.err = ErrHeader
|
2009-12-15 16:33:31 -07:00
|
|
|
return nil
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Maximum value of hdr.Size is 64 GB (12 octal digits),
|
|
|
|
// so there's no risk of int64 overflowing.
|
2009-12-15 16:33:31 -07:00
|
|
|
tr.nb = int64(hdr.Size)
|
|
|
|
tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
|
2009-06-09 00:22:56 -06:00
|
|
|
|
2009-12-15 16:33:31 -07:00
|
|
|
return hdr
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Read reads from the current entry in the tar archive.
|
2011-11-03 15:01:30 -06:00
|
|
|
// It returns 0, io.EOF when it reaches the end of that entry,
|
2009-06-09 00:22:56 -06:00
|
|
|
// until Next is called to advance to the next entry.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (tr *Reader) Read(b []byte) (n int, err error) {
|
2009-12-14 12:35:02 -07:00
|
|
|
if tr.nb == 0 {
|
|
|
|
// file consumed
|
2011-11-01 20:04:37 -06:00
|
|
|
return 0, io.EOF
|
2009-12-14 12:35:02 -07:00
|
|
|
}
|
|
|
|
|
2009-06-09 00:22:56 -06:00
|
|
|
if int64(len(b)) > tr.nb {
|
2009-11-09 22:09:34 -07:00
|
|
|
b = b[0:tr.nb]
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
n, err = tr.r.Read(b)
|
|
|
|
tr.nb -= int64(n)
|
2009-12-14 12:35:02 -07:00
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
if err == io.EOF && tr.nb > 0 {
|
2009-12-14 12:35:02 -07:00
|
|
|
err = io.ErrUnexpectedEOF
|
|
|
|
}
|
2009-12-15 16:33:31 -07:00
|
|
|
tr.err = err
|
|
|
|
return
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|