2009-06-09 00:22:56 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package tar
|
|
|
|
|
|
|
|
import (
|
2009-12-15 16:33:31 -07:00
|
|
|
"bytes"
|
|
|
|
"io"
|
2011-04-27 16:57:22 -06:00
|
|
|
"io/ioutil"
|
2009-12-15 16:33:31 -07:00
|
|
|
"strconv"
|
2013-02-10 17:36:29 -07:00
|
|
|
"strings"
|
2011-11-30 10:01:46 -07:00
|
|
|
"time"
|
2009-06-09 00:22:56 -06:00
|
|
|
)
|
|
|
|
|
2017-08-25 16:34:35 -06:00
|
|
|
// Reader provides sequential access to the contents of a tar archive.
|
|
|
|
// Reader.Next advances to the next file in the archive (including the first),
|
|
|
|
// and then Reader can be treated as an io.Reader to access the file's data.
|
2009-06-09 00:22:56 -06:00
|
|
|
type Reader struct {
|
2015-09-17 01:22:56 -06:00
|
|
|
r io.Reader
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
pad int64 // Amount of padding (ignored) after current file entry
|
|
|
|
curr fileReader // Reader for current file entry
|
|
|
|
blk block // Buffer to use as temporary local storage
|
2016-08-29 17:10:32 -06:00
|
|
|
|
|
|
|
// err is a persistent error.
|
|
|
|
// It is only the responsibility of every exported method of Reader to
|
|
|
|
// ensure that this error is sticky.
|
|
|
|
err error
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
type fileReader interface {
|
2014-04-03 14:01:04 -06:00
|
|
|
io.Reader
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
fileState
|
2014-04-03 14:01:04 -06:00
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
WriteTo(io.Writer) (int64, error)
|
2015-09-28 17:38:16 -06:00
|
|
|
}
|
|
|
|
|
2009-07-08 19:31:14 -06:00
|
|
|
// NewReader creates a new Reader reading from r.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
func NewReader(r io.Reader) *Reader {
|
|
|
|
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
|
|
|
|
// Next advances to the next entry in the tar archive.
|
2017-08-25 16:34:35 -06:00
|
|
|
// The Header.Size determines how many bytes can be read for the next file.
|
|
|
|
// Any remaining data in the current file is automatically discarded.
|
2014-12-29 23:28:02 -07:00
|
|
|
//
|
|
|
|
// io.EOF is returned at the end of the input.
|
2011-11-01 20:04:37 -06:00
|
|
|
func (tr *Reader) Next() (*Header, error) {
|
2013-02-10 17:36:29 -07:00
|
|
|
if tr.err != nil {
|
2015-09-16 01:58:56 -06:00
|
|
|
return nil, tr.err
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr, err := tr.next()
|
|
|
|
tr.err = err
|
|
|
|
return hdr, err
|
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) next() (*Header, error) {
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
var paxHdrs map[string]string
|
|
|
|
var gnuLongName, gnuLongLink string
|
2015-09-16 01:58:56 -06:00
|
|
|
|
|
|
|
// Externally, Next iterates through the tar archive as if it is a series of
|
|
|
|
// files. Internally, the tar format often uses fake "files" to add meta
|
|
|
|
// data that describes the next file. These meta data "files" should not
|
|
|
|
// normally be visible to the outside. As such, this loop iterates through
|
|
|
|
// one or more "header files" until it finds a "normal file".
|
2017-08-23 16:56:24 -06:00
|
|
|
format := FormatUSTAR | FormatPAX | FormatGNU
|
2015-09-16 01:58:56 -06:00
|
|
|
for {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// Discard the remainder of the file and any padding.
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, err
|
2015-06-13 02:53:06 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
tr.pad = 0
|
|
|
|
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr, rawHdr, err := tr.readHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-08-20 02:46:32 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
if err := tr.handleRegularFile(hdr); err != nil {
|
|
|
|
return nil, err
|
2015-06-13 02:53:06 -06:00
|
|
|
}
|
2017-08-23 16:56:24 -06:00
|
|
|
format.mayOnlyBe(hdr.Format)
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-09-16 01:58:56 -06:00
|
|
|
// Check for PAX/GNU special headers and files.
|
|
|
|
switch hdr.Typeflag {
|
2017-08-25 16:15:41 -06:00
|
|
|
case TypeXHeader, TypeXGlobalHeader:
|
2017-08-23 16:56:24 -06:00
|
|
|
format.mayOnlyBe(FormatPAX)
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
paxHdrs, err = parsePAX(tr)
|
2016-08-29 17:10:32 -06:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-03 19:12:31 -07:00
|
|
|
}
|
2017-08-25 16:15:41 -06:00
|
|
|
if hdr.Typeflag == TypeXGlobalHeader {
|
|
|
|
mergePAX(hdr, paxHdrs)
|
|
|
|
return &Header{
|
archive/tar: use placeholder name for global PAX records
Several usages of tar (reasonably) just use the Header.FileInfo
to determine the type of the header. However, the os.FileMode type
is not expressive enough to represent "files" that are not files
at all, but some form of metadata.
Thus, Header{Typeflag: TypeXGlobalHeader}.FileInfo().Mode().IsRegular()
reports true, even though the expected result may have been false.
To reduce (not eliminate) the possibility of failure for such usages,
use the placeholder filename from the global PAX headers.
Thus, in the event the user did not handle special "meta" headers
specifically, they will just be written to disk as a regular file.
As an example use case, the "git archive --format=tgz" command produces
an archive where the first "file" is a global PAX header with the
name "global_pax_header". For users that do not explicitly check
the Header.Typeflag field to ignore such headers, they may end up
extracting a file named "global_pax_header". While it is a bogus file,
it at least does not stop the extraction process.
Updates #22748
Change-Id: I28448b528dcfacb4e92311824c33c71b482f49c9
Reviewed-on: https://go-review.googlesource.com/78355
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-11-16 11:15:34 -07:00
|
|
|
Name: hdr.Name,
|
2017-08-25 16:15:41 -06:00
|
|
|
Typeflag: hdr.Typeflag,
|
|
|
|
Xattrs: hdr.Xattrs,
|
|
|
|
PAXRecords: hdr.PAXRecords,
|
|
|
|
Format: format,
|
|
|
|
}, nil
|
|
|
|
}
|
2018-02-23 22:22:04 -07:00
|
|
|
continue // This is a meta header affecting the next header
|
2015-09-16 01:58:56 -06:00
|
|
|
case TypeGNULongName, TypeGNULongLink:
|
2017-08-23 16:56:24 -06:00
|
|
|
format.mayOnlyBe(FormatGNU)
|
2016-08-29 17:10:32 -06:00
|
|
|
realname, err := ioutil.ReadAll(tr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-09-28 17:38:16 -06:00
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
|
|
|
var p parser
|
|
|
|
switch hdr.Typeflag {
|
|
|
|
case TypeGNULongName:
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
gnuLongName = p.parseString(realname)
|
2015-09-16 01:58:56 -06:00
|
|
|
case TypeGNULongLink:
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
gnuLongLink = p.parseString(realname)
|
2015-09-16 01:58:56 -06:00
|
|
|
}
|
2018-02-23 22:22:04 -07:00
|
|
|
continue // This is a meta header affecting the next header
|
2015-09-16 01:58:56 -06:00
|
|
|
default:
|
2016-08-20 02:46:32 -06:00
|
|
|
// The old GNU sparse format is handled here since it is technically
|
|
|
|
// just a regular file with additional attributes.
|
|
|
|
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
if err := mergePAX(hdr, paxHdrs); err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
if gnuLongName != "" {
|
|
|
|
hdr.Name = gnuLongName
|
|
|
|
}
|
|
|
|
if gnuLongLink != "" {
|
|
|
|
hdr.Linkname = gnuLongLink
|
|
|
|
}
|
2018-01-01 17:16:43 -07:00
|
|
|
if hdr.Typeflag == TypeRegA {
|
|
|
|
if strings.HasSuffix(hdr.Name, "/") {
|
|
|
|
hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
|
|
|
|
} else {
|
|
|
|
hdr.Typeflag = TypeReg
|
|
|
|
}
|
2017-10-06 02:40:58 -06:00
|
|
|
}
|
2015-09-16 01:58:56 -06:00
|
|
|
|
2016-09-02 15:37:35 -06:00
|
|
|
// The extended headers may have updated the size.
|
|
|
|
// Thus, setup the regFileReader again after merging PAX headers.
|
|
|
|
if err := tr.handleRegularFile(hdr); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-08-20 02:46:32 -06:00
|
|
|
|
2016-09-02 15:37:35 -06:00
|
|
|
// Sparse formats rely on being able to read from the logical data
|
|
|
|
// section; there must be a preceding call to handleRegularFile.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, err
|
2015-09-16 01:58:56 -06:00
|
|
|
}
|
2017-08-23 16:56:24 -06:00
|
|
|
|
|
|
|
// Set the final guess at the format.
|
|
|
|
if format.has(FormatUSTAR) && format.has(FormatPAX) {
|
|
|
|
format.mayOnlyBe(FormatUSTAR)
|
|
|
|
}
|
|
|
|
hdr.Format = format
|
2016-08-29 17:10:32 -06:00
|
|
|
return hdr, nil // This is a file, so stop
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-20 02:46:32 -06:00
|
|
|
// handleRegularFile sets up the current file reader and padding such that it
|
|
|
|
// can only read the following logical data section. It will properly handle
|
|
|
|
// special headers that contain no data section.
|
|
|
|
func (tr *Reader) handleRegularFile(hdr *Header) error {
|
|
|
|
nb := hdr.Size
|
|
|
|
if isHeaderOnlyType(hdr.Typeflag) {
|
|
|
|
nb = 0
|
|
|
|
}
|
|
|
|
if nb < 0 {
|
|
|
|
return ErrHeader
|
|
|
|
}
|
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
tr.pad = blockPadding(nb)
|
2016-08-20 02:46:32 -06:00
|
|
|
tr.curr = ®FileReader{r: tr.r, nb: nb}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleSparseFile checks if the current file is a sparse format of any type
|
|
|
|
// and sets the curr reader appropriately.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
var spd sparseDatas
|
2016-08-20 02:46:32 -06:00
|
|
|
var err error
|
|
|
|
if hdr.Typeflag == TypeGNUSparse {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
|
2016-08-20 02:46:32 -06:00
|
|
|
} else {
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
spd, err = tr.readGNUSparsePAXHeaders(hdr)
|
2016-08-20 02:46:32 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// If sp is non-nil, then this is a sparse file.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// Note that it is possible for len(sp) == 0.
|
|
|
|
if err == nil && spd != nil {
|
|
|
|
if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
|
|
|
|
return ErrHeader
|
|
|
|
}
|
|
|
|
sph := invertSparseEntries(spd, hdr.Size)
|
|
|
|
tr.curr = &sparseFileReader{tr.curr, sph, 0}
|
2016-08-20 02:46:32 -06:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
|
|
|
|
// If they are found, then this function reads the sparse map and returns it.
|
|
|
|
// This assumes that 0.0 headers have already been converted to 0.1 headers
|
2018-02-18 10:20:57 -07:00
|
|
|
// by the PAX header parsing logic.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// Identify the version of GNU headers.
|
|
|
|
var is1x0 bool
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
switch {
|
|
|
|
case major == "0" && (minor == "0" || minor == "1"):
|
|
|
|
is1x0 = false
|
|
|
|
case major == "1" && minor == "0":
|
|
|
|
is1x0 = true
|
|
|
|
case major != "" || minor != "":
|
|
|
|
return nil, nil // Unknown GNU sparse PAX version
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
case hdr.PAXRecords[paxGNUSparseMap] != "":
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
|
|
|
|
default:
|
|
|
|
return nil, nil // Not a PAX format GNU sparse file.
|
|
|
|
}
|
2017-08-23 16:56:24 -06:00
|
|
|
hdr.Format.mayOnlyBe(FormatPAX)
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
|
|
|
|
// Update hdr from GNU sparse PAX headers.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
hdr.Name = name
|
|
|
|
}
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
size := hdr.PAXRecords[paxGNUSparseSize]
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if size == "" {
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
size = hdr.PAXRecords[paxGNUSparseRealSize]
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
|
|
|
if size != "" {
|
|
|
|
n, err := strconv.ParseInt(size, 10, 64)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
hdr.Size = n
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// Read the sparse map according to the appropriate format.
|
|
|
|
if is1x0 {
|
|
|
|
return readGNUSparseMap1x0(tr.curr)
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2017-11-07 12:43:44 -07:00
|
|
|
return readGNUSparseMap0x1(hdr.PAXRecords)
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2017-08-25 16:15:41 -06:00
|
|
|
// mergePAX merges paxHdrs into hdr for all relevant fields of Header.
|
|
|
|
func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
|
|
|
|
for k, v := range paxHdrs {
|
|
|
|
if v == "" {
|
|
|
|
continue // Keep the original USTAR value
|
|
|
|
}
|
|
|
|
var id64 int64
|
2013-02-10 17:36:29 -07:00
|
|
|
switch k {
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxPath:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Name = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxLinkpath:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Linkname = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxUname:
|
2013-02-10 17:36:29 -07:00
|
|
|
hdr.Uname = v
|
2016-08-29 17:10:32 -06:00
|
|
|
case paxGname:
|
|
|
|
hdr.Gname = v
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxUid:
|
2016-09-02 22:03:57 -06:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 64)
|
|
|
|
hdr.Uid = int(id64) // Integer overflow possible
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxGid:
|
2016-09-02 22:03:57 -06:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 64)
|
|
|
|
hdr.Gid = int(id64) // Integer overflow possible
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxAtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.AccessTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxMtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.ModTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxCtime:
|
2016-08-29 17:10:32 -06:00
|
|
|
hdr.ChangeTime, err = parsePAXTime(v)
|
2013-08-18 18:45:44 -06:00
|
|
|
case paxSize:
|
2016-09-02 22:03:57 -06:00
|
|
|
hdr.Size, err = strconv.ParseInt(v, 10, 64)
|
2014-02-13 02:08:30 -07:00
|
|
|
default:
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
if strings.HasPrefix(k, paxSchilyXattr) {
|
2014-02-13 02:08:30 -07:00
|
|
|
if hdr.Xattrs == nil {
|
|
|
|
hdr.Xattrs = make(map[string]string)
|
|
|
|
}
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
hdr.Xattrs[k[len(paxSchilyXattr):]] = v
|
2014-02-13 02:08:30 -07:00
|
|
|
}
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
if err != nil {
|
|
|
|
return ErrHeader
|
|
|
|
}
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
2017-08-25 16:15:41 -06:00
|
|
|
hdr.PAXRecords = paxHdrs
|
2013-02-10 17:36:29 -07:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// parsePAX parses PAX headers.
|
|
|
|
// If an extended header (type 'x') is invalid, ErrHeader is returned
|
|
|
|
func parsePAX(r io.Reader) (map[string]string, error) {
|
|
|
|
buf, err := ioutil.ReadAll(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-09-28 14:49:35 -06:00
|
|
|
sbuf := string(buf)
|
2014-04-03 14:01:04 -06:00
|
|
|
|
|
|
|
// For GNU PAX sparse format 0.0 support.
|
2016-10-18 18:22:25 -06:00
|
|
|
// This function transforms the sparse format 0.0 headers into format 0.1
|
|
|
|
// headers since 0.0 headers were not PAX compliant.
|
|
|
|
var sparseMap []string
|
2014-04-03 14:01:04 -06:00
|
|
|
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
paxHdrs := make(map[string]string)
|
2015-09-28 14:49:35 -06:00
|
|
|
for len(sbuf) > 0 {
|
|
|
|
key, value, residual, err := parsePAXRecord(sbuf)
|
|
|
|
if err != nil {
|
2013-02-10 17:36:29 -07:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2015-09-28 14:49:35 -06:00
|
|
|
sbuf = residual
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2016-10-18 18:22:25 -06:00
|
|
|
switch key {
|
|
|
|
case paxGNUSparseOffset, paxGNUSparseNumBytes:
|
|
|
|
// Validate sparse header order and value.
|
|
|
|
if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
|
|
|
|
(len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
|
|
|
|
strings.Contains(value, ",") {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
sparseMap = append(sparseMap, value)
|
|
|
|
default:
|
2017-08-25 16:15:41 -06:00
|
|
|
paxHdrs[key] = value
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
2016-10-18 18:22:25 -06:00
|
|
|
if len(sparseMap) > 0 {
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
return paxHdrs, nil
|
2013-02-10 17:36:29 -07:00
|
|
|
}
|
|
|
|
|
2015-10-01 03:30:29 -06:00
|
|
|
// readHeader reads the next block header and assumes that the underlying reader
|
2016-08-20 02:46:32 -06:00
|
|
|
// is already aligned to a block boundary. It returns the raw block of the
|
|
|
|
// header in case further processing is required.
|
2015-10-01 03:30:29 -06:00
|
|
|
//
|
|
|
|
// The err will be set to io.EOF only when one of the following occurs:
|
|
|
|
// * Exactly 0 bytes are read and EOF is hit.
|
|
|
|
// * Exactly 1 block of zeros is read and EOF is hit.
|
|
|
|
// * At least 2 blocks of zeros are read.
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) readHeader() (*Header, *block, error) {
|
2009-06-09 00:22:56 -06:00
|
|
|
// Two blocks of zero bytes marks the end of the archive.
|
2016-08-29 17:10:32 -06:00
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
2016-08-29 17:10:32 -06:00
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2015-09-17 01:22:56 -06:00
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, ErrHeader // Zero block and then non-zero block
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-09-17 01:22:56 -06:00
|
|
|
// Verify the header matches a known format.
|
|
|
|
format := tr.blk.GetFormat()
|
2017-08-23 16:56:24 -06:00
|
|
|
if format == FormatUnknown {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, nil, ErrHeader
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2015-09-28 14:49:35 -06:00
|
|
|
var p parser
|
2009-12-15 16:33:31 -07:00
|
|
|
hdr := new(Header)
|
2009-06-10 22:32:36 -06:00
|
|
|
|
2015-09-17 01:22:56 -06:00
|
|
|
// Unpack the V7 header.
|
|
|
|
v7 := tr.blk.V7()
|
2017-08-23 16:56:24 -06:00
|
|
|
hdr.Typeflag = v7.TypeFlag()[0]
|
2015-09-17 01:22:56 -06:00
|
|
|
hdr.Name = p.parseString(v7.Name())
|
2017-08-23 16:56:24 -06:00
|
|
|
hdr.Linkname = p.parseString(v7.LinkName())
|
|
|
|
hdr.Size = p.parseNumeric(v7.Size())
|
2015-09-17 01:22:56 -06:00
|
|
|
hdr.Mode = p.parseNumeric(v7.Mode())
|
|
|
|
hdr.Uid = int(p.parseNumeric(v7.UID()))
|
|
|
|
hdr.Gid = int(p.parseNumeric(v7.GID()))
|
|
|
|
hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
|
|
|
|
|
|
|
|
// Unpack format specific fields.
|
|
|
|
if format > formatV7 {
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
hdr.Uname = p.parseString(ustar.UserName())
|
|
|
|
hdr.Gname = p.parseString(ustar.GroupName())
|
2017-08-14 17:14:08 -06:00
|
|
|
hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
|
|
|
|
hdr.Devminor = p.parseNumeric(ustar.DevMinor())
|
2015-09-17 01:22:56 -06:00
|
|
|
|
2009-12-15 16:33:31 -07:00
|
|
|
var prefix string
|
2017-08-23 16:56:24 -06:00
|
|
|
switch {
|
|
|
|
case format.has(FormatUSTAR | FormatPAX):
|
|
|
|
hdr.Format = format
|
2015-09-17 01:22:56 -06:00
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
prefix = p.parseString(ustar.Prefix())
|
2017-08-23 16:56:24 -06:00
|
|
|
|
|
|
|
// For Format detection, check if block is properly formatted since
|
|
|
|
// the parser is more liberal than what USTAR actually permits.
|
|
|
|
notASCII := func(r rune) bool { return r >= 0x80 }
|
|
|
|
if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
|
|
|
|
hdr.Format = FormatUnknown // Non-ASCII characters in block.
|
|
|
|
}
|
|
|
|
nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
|
|
|
|
if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
|
|
|
|
nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
|
|
|
|
hdr.Format = FormatUnknown // Numeric fields must end in NUL
|
|
|
|
}
|
|
|
|
case format.has(formatSTAR):
|
2015-09-17 01:22:56 -06:00
|
|
|
star := tr.blk.STAR()
|
|
|
|
prefix = p.parseString(star.Prefix())
|
|
|
|
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
|
|
|
|
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
|
2017-08-23 16:56:24 -06:00
|
|
|
case format.has(FormatGNU):
|
|
|
|
hdr.Format = format
|
2017-08-07 18:58:43 -06:00
|
|
|
var p2 parser
|
2016-10-18 18:51:04 -06:00
|
|
|
gnu := tr.blk.GNU()
|
2017-08-07 18:58:43 -06:00
|
|
|
if b := gnu.AccessTime(); b[0] != 0 {
|
|
|
|
hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
|
|
|
|
}
|
|
|
|
if b := gnu.ChangeTime(); b[0] != 0 {
|
|
|
|
hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prior to Go1.8, the Writer had a bug where it would output
|
|
|
|
// an invalid tar file in certain rare situations because the logic
|
|
|
|
// incorrectly believed that the old GNU format had a prefix field.
|
|
|
|
// This is wrong and leads to an output file that mangles the
|
|
|
|
// atime and ctime fields, which are often left unused.
|
|
|
|
//
|
|
|
|
// In order to continue reading tar files created by former, buggy
|
|
|
|
// versions of Go, we skeptically parse the atime and ctime fields.
|
|
|
|
// If we are unable to parse them and the prefix field looks like
|
|
|
|
// an ASCII string, then we fallback on the pre-Go1.8 behavior
|
|
|
|
// of treating these fields as the USTAR prefix field.
|
|
|
|
//
|
|
|
|
// Note that this will not use the fallback logic for all possible
|
|
|
|
// files generated by a pre-Go1.8 toolchain. If the generated file
|
|
|
|
// happened to have a prefix field that parses as valid
|
|
|
|
// atime and ctime fields (e.g., when they are valid octal strings),
|
|
|
|
// then it is impossible to distinguish between an valid GNU file
|
|
|
|
// and an invalid pre-Go1.8 file.
|
|
|
|
//
|
|
|
|
// See https://golang.org/issues/12594
|
|
|
|
// See https://golang.org/issues/21005
|
|
|
|
if p2.err != nil {
|
|
|
|
hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
if s := p.parseString(ustar.Prefix()); isASCII(s) {
|
|
|
|
prefix = s
|
|
|
|
}
|
2017-08-23 16:56:24 -06:00
|
|
|
hdr.Format = FormatUnknown // Buggy file is not GNU
|
2017-08-07 18:58:43 -06:00
|
|
|
}
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
if len(prefix) > 0 {
|
2009-11-09 13:07:39 -07:00
|
|
|
hdr.Name = prefix + "/" + hdr.Name
|
2009-06-10 22:32:36 -06:00
|
|
|
}
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return hdr, &tr.blk, p.err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
|
|
|
|
2016-09-02 17:17:37 -06:00
|
|
|
// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
|
|
|
|
// The sparse map is stored in the tar header if it's small enough.
|
|
|
|
// If it's larger than four entries, then one or more extension headers are used
|
|
|
|
// to store the rest of the sparse map.
|
|
|
|
//
|
|
|
|
// The Header.Size does not reflect the size of any extended headers used.
|
|
|
|
// Thus, this function will read from the raw io.Reader to fetch extra headers.
|
|
|
|
// This method mutates blk in the process.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
|
2016-09-02 17:17:37 -06:00
|
|
|
// Make sure that the input format is GNU.
|
|
|
|
// Unfortunately, the STAR format also has a sparse header format that uses
|
|
|
|
// the same type flag but has a completely different layout.
|
2017-08-23 16:56:24 -06:00
|
|
|
if blk.GetFormat() != FormatGNU {
|
2016-09-02 17:17:37 -06:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2017-08-23 16:56:24 -06:00
|
|
|
hdr.Format.mayOnlyBe(FormatGNU)
|
2016-09-02 17:17:37 -06:00
|
|
|
|
2015-09-28 14:49:35 -06:00
|
|
|
var p parser
|
2016-09-02 17:17:37 -06:00
|
|
|
hdr.Size = p.parseNumeric(blk.GNU().RealSize())
|
|
|
|
if p.err != nil {
|
|
|
|
return nil, p.err
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
s := blk.GNU().Sparse()
|
|
|
|
spd := make(sparseDatas, 0, s.MaxEntries())
|
2016-09-02 17:17:37 -06:00
|
|
|
for {
|
2015-09-17 01:22:56 -06:00
|
|
|
for i := 0; i < s.MaxEntries(); i++ {
|
2016-09-02 17:17:37 -06:00
|
|
|
// This termination condition is identical to GNU and BSD tar.
|
|
|
|
if s.Entry(i).Offset()[0] == 0x00 {
|
|
|
|
break // Don't return, need to process extended headers (even if empty)
|
|
|
|
}
|
|
|
|
offset := p.parseNumeric(s.Entry(i).Offset())
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
length := p.parseNumeric(s.Entry(i).Length())
|
2015-09-28 14:49:35 -06:00
|
|
|
if p.err != nil {
|
2016-08-29 17:10:32 -06:00
|
|
|
return nil, p.err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2017-11-15 12:27:10 -07:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2016-09-02 17:17:37 -06:00
|
|
|
|
|
|
|
if s.IsExtended()[0] > 0 {
|
|
|
|
// There are more entries. Read an extension header and parse its entries.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
|
2016-09-02 17:17:37 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
s = blk.Sparse()
|
|
|
|
continue
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return spd, nil // Done
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 1.0. The format of the sparse map consists of a series of
|
|
|
|
// newline-terminated numeric fields. The first field is the number of entries
|
|
|
|
// and is always present. Following this are the entries, consisting of two
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// fields (offset, length). This function must stop reading at the end
|
2015-10-01 02:35:15 -06:00
|
|
|
// boundary of the block containing the last newline.
|
|
|
|
//
|
|
|
|
// Note that the GNU manual says that numeric values should be encoded in octal
|
|
|
|
// format. However, the GNU tar utility itself outputs these values in decimal.
|
|
|
|
// As such, this library treats values as being encoded in decimal.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
|
|
|
|
var (
|
|
|
|
cntNewline int64
|
|
|
|
buf bytes.Buffer
|
|
|
|
blk block
|
|
|
|
)
|
|
|
|
|
|
|
|
// feedTokens copies data in blocks from r into buf until there are
|
2015-10-01 02:35:15 -06:00
|
|
|
// at least cnt newlines in buf. It will not read more blocks than needed.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
feedTokens := func(n int64) error {
|
|
|
|
for cntNewline < n {
|
|
|
|
if _, err := mustReadFull(r, blk[:]); err != nil {
|
2015-10-01 02:35:15 -06:00
|
|
|
return err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
buf.Write(blk[:])
|
2015-10-01 02:35:15 -06:00
|
|
|
for _, c := range blk {
|
|
|
|
if c == '\n' {
|
|
|
|
cntNewline++
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
return nil
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// nextToken gets the next token delimited by a newline. This assumes that
|
|
|
|
// at least one newline exists in the buffer.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
nextToken := func() string {
|
2015-10-01 02:35:15 -06:00
|
|
|
cntNewline--
|
|
|
|
tok, _ := buf.ReadString('\n')
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return strings.TrimRight(tok, "\n")
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// Parse for the number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
|
|
|
if err := feedTokens(1); err != nil {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
2015-10-01 02:35:15 -06:00
|
|
|
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
2015-10-01 02:35:15 -06:00
|
|
|
// Parse for all member entries.
|
|
|
|
// numEntries is trusted after this since a potential attacker must have
|
|
|
|
// committed resources proportional to what this library used.
|
|
|
|
if err := feedTokens(2 * numEntries); err != nil {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, err
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
spd := make(sparseDatas, 0, numEntries)
|
2014-04-03 14:01:04 -06:00
|
|
|
for i := int64(0); i < numEntries; i++ {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
|
|
|
|
length, err2 := strconv.ParseInt(nextToken(), 10, 64)
|
|
|
|
if err1 != nil || err2 != nil {
|
2015-10-01 02:35:15 -06:00
|
|
|
return nil, ErrHeader
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2017-11-15 12:27:10 -07:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return spd, nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 0.1. The sparse map is stored in the PAX headers.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
|
2015-10-01 02:04:24 -06:00
|
|
|
// Get number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
|
2015-10-01 02:04:24 -06:00
|
|
|
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// There should be two numbers in sparseMap for each entry.
|
archive/tar: support arbitrary PAX records
This CL adds the following new publicly visible API:
type Header struct { ...; PAXRecords map[string]string }
The new Header.PAXRecords field is a map of all PAX extended header records.
We suggest (but do not enforce) that users use VENDOR-prefixed keys
according to the following in the PAX specification:
<<<
The standard developers have reserved keyword name space for vendor extensions.
It is suggested that the format to be used is:
VENDOR.keyword
where VENDOR is the name of the vendor or organization in all uppercase letters.
>>>
When reading, the Header.PAXRecords is populated with all PAX records
encountered so far, including basic ones (e.g., "path", "mtime", etc).
When writing, the fields of Header will be merged into PAXRecords,
overwriting any records that may conflict.
Since PAXRecords is a more expressive feature than Xattrs and
is entirely a superset of Xattrs, we mark Xattrs as deprecated,
and steer users towards the new PAXRecords API.
The issue has a discussion about adding a Header.SetPAXRecord method
to help validate records and keep the Header fields in sync.
However, we do not include that in this CL since that helper
method can always be added in the future.
There is no support for global records.
Fixes #14472
Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071
Reviewed-on: https://go-review.googlesource.com/58390
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
|
|
|
sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if len(sparseMap) == 1 && sparseMap[0] == "" {
|
|
|
|
sparseMap = sparseMap[:0]
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
if int64(len(sparseMap)) != 2*numEntries {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-01 02:04:24 -06:00
|
|
|
// Loop through the entries in the sparse map.
|
|
|
|
// numEntries is trusted now.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
spd := make(sparseDatas, 0, numEntries)
|
|
|
|
for len(sparseMap) >= 2 {
|
|
|
|
offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
|
|
|
|
length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
|
|
|
|
if err1 != nil || err2 != nil {
|
2014-04-03 14:01:04 -06:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2017-11-15 12:27:10 -07:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
sparseMap = sparseMap[2:]
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return spd, nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2017-08-26 03:12:56 -06:00
|
|
|
// Read reads from the current file in the tar archive.
|
|
|
|
// It returns (0, io.EOF) when it reaches the end of that file,
|
|
|
|
// until Next is called to advance to the next file.
|
2015-12-17 00:10:14 -07:00
|
|
|
//
|
2017-08-26 03:12:56 -06:00
|
|
|
// If the current file is sparse, then the regions marked as a hole
|
2017-08-25 16:34:35 -06:00
|
|
|
// are read back as NUL-bytes.
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
//
|
2017-10-03 03:53:01 -06:00
|
|
|
// Calling Read on special types like TypeLink, TypeSymlink, TypeChar,
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
|
2015-12-17 00:10:14 -07:00
|
|
|
// the Header.Size claims.
|
2016-08-29 17:10:32 -06:00
|
|
|
func (tr *Reader) Read(b []byte) (int, error) {
|
2015-10-01 04:08:18 -06:00
|
|
|
if tr.err != nil {
|
|
|
|
return 0, tr.err
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
n, err := tr.curr.Read(b)
|
2014-04-03 14:01:04 -06:00
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
tr.err = err
|
|
|
|
}
|
2016-08-29 17:10:32 -06:00
|
|
|
return n, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2017-11-15 12:27:10 -07:00
|
|
|
// writeTo writes the content of the current file to w.
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
// The bytes written matches the number of remaining bytes in the current file.
|
|
|
|
//
|
|
|
|
// If the current file is sparse and w is an io.WriteSeeker,
|
2017-11-15 12:27:10 -07:00
|
|
|
// then writeTo uses Seek to skip past holes defined in Header.SparseHoles,
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
// assuming that skipped regions are filled with NULs.
|
|
|
|
// This always writes the last byte to ensure w is the right size.
|
2017-11-15 12:27:10 -07:00
|
|
|
//
|
|
|
|
// TODO(dsnet): Re-export this when adding sparse file support.
|
|
|
|
// See https://golang.org/issue/22735
|
|
|
|
func (tr *Reader) writeTo(w io.Writer) (int64, error) {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if tr.err != nil {
|
|
|
|
return 0, tr.err
|
2009-12-14 12:35:02 -07:00
|
|
|
}
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
n, err := tr.curr.WriteTo(w)
|
|
|
|
if err != nil {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
tr.err = err
|
|
|
|
}
|
|
|
|
return n, err
|
2009-06-09 00:22:56 -06:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// regFileReader is a fileReader for reading data from a regular file entry.
|
|
|
|
type regFileReader struct {
|
|
|
|
r io.Reader // Underlying Reader
|
|
|
|
nb int64 // Number of remaining bytes to read
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
2017-09-25 16:03:49 -06:00
|
|
|
func (fr *regFileReader) Read(b []byte) (n int, err error) {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if int64(len(b)) > fr.nb {
|
|
|
|
b = b[:fr.nb]
|
|
|
|
}
|
2017-09-25 16:03:49 -06:00
|
|
|
if len(b) > 0 {
|
|
|
|
n, err = fr.r.Read(b)
|
|
|
|
fr.nb -= int64(n)
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
switch {
|
|
|
|
case err == io.EOF && fr.nb > 0:
|
|
|
|
return n, io.ErrUnexpectedEOF
|
|
|
|
case err == nil && fr.nb == 0:
|
|
|
|
return n, io.EOF
|
|
|
|
default:
|
|
|
|
return n, err
|
2015-09-28 17:38:16 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
|
|
|
|
return io.Copy(w, struct{ io.Reader }{fr})
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
2015-09-28 17:38:16 -06:00
|
|
|
|
2017-11-07 12:43:44 -07:00
|
|
|
func (fr regFileReader) LogicalRemaining() int64 {
|
|
|
|
return fr.nb
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
}
|
2017-11-07 12:43:44 -07:00
|
|
|
|
|
|
|
func (fr regFileReader) PhysicalRemaining() int64 {
|
|
|
|
return fr.nb
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// sparseFileReader is a fileReader for reading data from a sparse file entry.
|
|
|
|
type sparseFileReader struct {
|
|
|
|
fr fileReader // Underlying fileReader
|
|
|
|
sp sparseHoles // Normalized list of sparse holes
|
|
|
|
pos int64 // Current position in sparse file
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
finished := int64(len(b)) >= sr.LogicalRemaining()
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
if finished {
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
b = b[:sr.LogicalRemaining()]
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
b0 := b
|
|
|
|
endPos := sr.pos + int64(len(b))
|
|
|
|
for endPos > sr.pos && err == nil {
|
|
|
|
var nf int // Bytes read in fragment
|
|
|
|
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
|
|
|
|
if sr.pos < holeStart { // In a data fragment
|
|
|
|
bf := b[:min(int64(len(b)), holeStart-sr.pos)]
|
|
|
|
nf, err = tryReadFull(sr.fr, bf)
|
|
|
|
} else { // In a hole fragment
|
|
|
|
bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
|
|
|
|
nf, err = tryReadFull(zeroReader{}, bf)
|
|
|
|
}
|
|
|
|
b = b[nf:]
|
|
|
|
sr.pos += int64(nf)
|
|
|
|
if sr.pos >= holeEnd && len(sr.sp) > 1 {
|
|
|
|
sr.sp = sr.sp[1:] // Ensure last fragment always remains
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
}
|
2015-09-28 17:38:16 -06:00
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
n = len(b0) - len(b)
|
|
|
|
switch {
|
|
|
|
case err == io.EOF:
|
|
|
|
return n, errMissData // Less data in dense file than sparse file
|
|
|
|
case err != nil:
|
|
|
|
return n, err
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return n, errUnrefData // More data in dense file than sparse file
|
|
|
|
case finished:
|
|
|
|
return n, io.EOF
|
|
|
|
default:
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
|
|
|
|
ws, ok := w.(io.WriteSeeker)
|
|
|
|
if ok {
|
|
|
|
if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
|
|
|
|
ok = false // Not all io.Seeker can really seek
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !ok {
|
|
|
|
return io.Copy(w, struct{ io.Reader }{sr})
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
var writeLastByte bool
|
|
|
|
pos0 := sr.pos
|
|
|
|
for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
var nf int64 // Size of fragment
|
|
|
|
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
|
|
|
|
if sr.pos < holeStart { // In a data fragment
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
nf = holeStart - sr.pos
|
|
|
|
nf, err = io.CopyN(ws, sr.fr, nf)
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
} else { // In a hole fragment
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
nf = holeEnd - sr.pos
|
|
|
|
if sr.PhysicalRemaining() == 0 {
|
|
|
|
writeLastByte = true
|
|
|
|
nf--
|
|
|
|
}
|
|
|
|
_, err = ws.Seek(nf, io.SeekCurrent)
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
|
|
|
sr.pos += nf
|
|
|
|
if sr.pos >= holeEnd && len(sr.sp) > 1 {
|
|
|
|
sr.sp = sr.sp[1:] // Ensure last fragment always remains
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
// If the last fragment is a hole, then seek to 1-byte before EOF, and
|
|
|
|
// write a single byte to ensure the file is the right size.
|
|
|
|
if writeLastByte && err == nil {
|
|
|
|
_, err = ws.Write([]byte{0})
|
|
|
|
sr.pos++
|
|
|
|
}
|
|
|
|
|
|
|
|
n = sr.pos - pos0
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
switch {
|
|
|
|
case err == io.EOF:
|
|
|
|
return n, errMissData // Less data in dense file than sparse file
|
|
|
|
case err != nil:
|
|
|
|
return n, err
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return n, errUnrefData // More data in dense file than sparse file
|
|
|
|
default:
|
|
|
|
return n, nil
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
func (sr sparseFileReader) LogicalRemaining() int64 {
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
|
|
|
|
}
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
func (sr sparseFileReader) PhysicalRemaining() int64 {
|
|
|
|
return sr.fr.PhysicalRemaining()
|
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
|
|
|
|
type zeroReader struct{}
|
|
|
|
|
|
|
|
func (zeroReader) Read(b []byte) (int, error) {
|
|
|
|
for i := range b {
|
|
|
|
b[i] = 0
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
return len(b), nil
|
|
|
|
}
|
2014-04-03 14:01:04 -06:00
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// mustReadFull is like io.ReadFull except it returns
|
|
|
|
// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
|
|
|
|
func mustReadFull(r io.Reader, b []byte) (int, error) {
|
|
|
|
n, err := tryReadFull(r, b)
|
|
|
|
if err == io.EOF {
|
|
|
|
err = io.ErrUnexpectedEOF
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
2015-09-28 17:38:16 -06:00
|
|
|
return n, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
|
|
|
|
archive/tar: refactor Reader support for sparse files
This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.
As a result of this CL, there are some new publicly visible API changes:
type SparseEntry struct { Offset, Length int64 }
type Header struct { ...; SparseHoles []SparseEntry }
A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.
It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.
Some unexported helper functions were added to common.go:
func validateSparseEntries(sp []SparseEntry, size int64) bool
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry
The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).
Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.
Updates #13548
Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
|
|
|
// tryReadFull is like io.ReadFull except it returns
|
|
|
|
// io.EOF when it is hit before len(b) bytes are read.
|
|
|
|
func tryReadFull(r io.Reader, b []byte) (n int, err error) {
|
|
|
|
for len(b) > n && err == nil {
|
|
|
|
var nn int
|
|
|
|
nn, err = r.Read(b[n:])
|
|
|
|
n += nn
|
|
|
|
}
|
|
|
|
if len(b) == n && err == io.EOF {
|
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
return n, err
|
2014-04-03 14:01:04 -06:00
|
|
|
}
|
archive/tar: add Reader.WriteTo and Writer.ReadFrom
To support the efficient packing and extracting of sparse files,
add two new methods:
func Reader.WriteTo(io.Writer) (int64, error)
func Writer.ReadFrom(io.Reader) (int64, error)
If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
* for Reader.WriteTo to write a single byte
to ensure that the resulting filesize is correct.
* for Writer.ReadFrom to read a single byte
to verify that the input filesize is correct.
The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.
File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.
For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.
Other reasons we choose this approach over special-casing *os.File because:
* The Reader already has special-case logic for io.Seeker
* As much as possible, we want to decouple OS-specific logic from
Reader and Writer.
* This allows other abstractions over *os.File to also benefit from
the "skip past holes" logic.
* It is easier to test, since it is harder to mock an *os.File.
Updates #13548
Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
|
|
|
|
|
|
|
// discard skips n bytes in r, reporting an error if unable to do so.
|
|
|
|
func discard(r io.Reader, n int64) error {
|
|
|
|
// If possible, Seek to the last byte before the end of the data section.
|
|
|
|
// Do this because Seek is often lazy about reporting errors; this will mask
|
|
|
|
// the fact that the stream may be truncated. We can rely on the
|
|
|
|
// io.CopyN done shortly afterwards to trigger any IO errors.
|
|
|
|
var seekSkipped int64 // Number of bytes skipped via Seek
|
|
|
|
if sr, ok := r.(io.Seeker); ok && n > 1 {
|
|
|
|
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
|
|
|
// io.Seeker, but calling Seek always returns an error and performs
|
|
|
|
// no action. Thus, we try an innocent seek to the current position
|
|
|
|
// to see if Seek is really supported.
|
|
|
|
pos1, err := sr.Seek(0, io.SeekCurrent)
|
|
|
|
if pos1 >= 0 && err == nil {
|
|
|
|
// Seek seems supported, so perform the real Seek.
|
|
|
|
pos2, err := sr.Seek(n-1, io.SeekCurrent)
|
|
|
|
if pos2 < 0 || err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
seekSkipped = pos2 - pos1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
|
|
|
if err == io.EOF && seekSkipped+copySkipped < n {
|
|
|
|
err = io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|