1
0
mirror of https://github.com/golang/go synced 2024-11-08 05:36:13 -07:00
go/src/archive/tar/reader.go

859 lines
26 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
import (
"bytes"
"io"
"io/ioutil"
"strconv"
"strings"
"time"
)
// Reader provides sequential access to the contents of a tar archive.
// Reader.Next advances to the next file in the archive (including the first),
// and then Reader can be treated as an io.Reader to access the file's data.
type Reader struct {
r io.Reader
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
pad int64 // Amount of padding (ignored) after current file entry
curr fileReader // Reader for current file entry
blk block // Buffer to use as temporary local storage
// err is a persistent error.
// It is only the responsibility of every exported method of Reader to
// ensure that this error is sticky.
err error
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
type fileReader interface {
io.Reader
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
fileState
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
WriteTo(io.Writer) (int64, error)
}
// NewReader creates a new Reader reading from r.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
func NewReader(r io.Reader) *Reader {
return &Reader{r: r, curr: &regFileReader{r, 0}}
}
// Next advances to the next entry in the tar archive.
// The Header.Size determines how many bytes can be read for the next file.
// Any remaining data in the current file is automatically discarded.
//
// io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) {
if tr.err != nil {
return nil, tr.err
}
hdr, err := tr.next()
tr.err = err
return hdr, err
}
func (tr *Reader) next() (*Header, error) {
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
var paxHdrs map[string]string
var gnuLongName, gnuLongLink string
// Externally, Next iterates through the tar archive as if it is a series of
// files. Internally, the tar format often uses fake "files" to add meta
// data that describes the next file. These meta data "files" should not
// normally be visible to the outside. As such, this loop iterates through
// one or more "header files" until it finds a "normal file".
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
format := FormatUSTAR | FormatPAX | FormatGNU
for {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// Discard the remainder of the file and any padding.
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return nil, err
}
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
return nil, err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
tr.pad = 0
hdr, rawHdr, err := tr.readHeader()
if err != nil {
return nil, err
}
if err := tr.handleRegularFile(hdr); err != nil {
return nil, err
}
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
format.mayOnlyBe(hdr.Format)
// Check for PAX/GNU special headers and files.
switch hdr.Typeflag {
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
case TypeXHeader, TypeXGlobalHeader:
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
format.mayOnlyBe(FormatPAX)
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
paxHdrs, err = parsePAX(tr)
if err != nil {
return nil, err
archive/tar: properly handle header-only "files" in Reader Certain special type-flags, specifically 1, 2, 3, 4, 5, 6, do not have a data section. Thus, regardless of what the size field says, we should not attempt to read any data for these special types. The relevant PAX and USTAR specification says: <<< If the typeflag field is set to specify a file to be of type 1 (a link) or 2 (a symbolic link), the size field shall be specified as zero. If the typeflag field is set to specify a file of type 5 (directory), the size field shall be interpreted as described under the definition of that record type. No data logical records are stored for types 1, 2, or 5. If the typeflag field is set to 3 (character special file), 4 (block special file), or 6 (FIFO), the meaning of the size field is unspecified by this volume of POSIX.1-2008, and no data logical records shall be stored on the medium. Additionally, for type 6, the size field shall be ignored when reading. If the typeflag field is set to any other value, the number of logical records written following the header shall be (size+511)/512, ignoring any fraction in the result of the division. >>> Contrary to the specification, we do not assert that the size field is zero for type 1 and 2 since we liberally accept non-conforming formats. Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed Reviewed-on: https://go-review.googlesource.com/16614 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-03 19:12:31 -07:00
}
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
if hdr.Typeflag == TypeXGlobalHeader {
mergePAX(hdr, paxHdrs)
return &Header{
Name: hdr.Name,
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
Typeflag: hdr.Typeflag,
Xattrs: hdr.Xattrs,
PAXRecords: hdr.PAXRecords,
Format: format,
}, nil
}
continue // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink:
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
format.mayOnlyBe(FormatGNU)
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var p parser
switch hdr.Typeflag {
case TypeGNULongName:
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
gnuLongName = p.parseString(realname)
case TypeGNULongLink:
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
gnuLongLink = p.parseString(realname)
}
continue // This is a meta header affecting the next header
default:
// The old GNU sparse format is handled here since it is technically
// just a regular file with additional attributes.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
if err := mergePAX(hdr, paxHdrs); err != nil {
return nil, err
}
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
if gnuLongName != "" {
hdr.Name = gnuLongName
}
if gnuLongLink != "" {
hdr.Linkname = gnuLongLink
}
if hdr.Typeflag == TypeRegA {
if strings.HasSuffix(hdr.Name, "/") {
hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
} else {
hdr.Typeflag = TypeReg
}
}
// The extended headers may have updated the size.
// Thus, setup the regFileReader again after merging PAX headers.
if err := tr.handleRegularFile(hdr); err != nil {
return nil, err
}
// Sparse formats rely on being able to read from the logical data
// section; there must be a preceding call to handleRegularFile.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
return nil, err
}
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
// Set the final guess at the format.
if format.has(FormatUSTAR) && format.has(FormatPAX) {
format.mayOnlyBe(FormatUSTAR)
}
hdr.Format = format
return hdr, nil // This is a file, so stop
}
}
}
// handleRegularFile sets up the current file reader and padding such that it
// can only read the following logical data section. It will properly handle
// special headers that contain no data section.
func (tr *Reader) handleRegularFile(hdr *Header) error {
nb := hdr.Size
if isHeaderOnlyType(hdr.Typeflag) {
nb = 0
}
if nb < 0 {
return ErrHeader
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
tr.pad = blockPadding(nb)
tr.curr = &regFileReader{r: tr.r, nb: nb}
return nil
}
// handleSparseFile checks if the current file is a sparse format of any type
// and sets the curr reader appropriately.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
var spd sparseDatas
var err error
if hdr.Typeflag == TypeGNUSparse {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
} else {
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
spd, err = tr.readGNUSparsePAXHeaders(hdr)
}
// If sp is non-nil, then this is a sparse file.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// Note that it is possible for len(sp) == 0.
if err == nil && spd != nil {
if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
return ErrHeader
}
sph := invertSparseEntries(spd, hdr.Size)
tr.curr = &sparseFileReader{tr.curr, sph, 0}
}
return err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
// If they are found, then this function reads the sparse map and returns it.
// This assumes that 0.0 headers have already been converted to 0.1 headers
// by the PAX header parsing logic.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// Identify the version of GNU headers.
var is1x0 bool
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
switch {
case major == "0" && (minor == "0" || minor == "1"):
is1x0 = false
case major == "1" && minor == "0":
is1x0 = true
case major != "" || minor != "":
return nil, nil // Unknown GNU sparse PAX version
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
case hdr.PAXRecords[paxGNUSparseMap] != "":
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
default:
return nil, nil // Not a PAX format GNU sparse file.
}
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
hdr.Format.mayOnlyBe(FormatPAX)
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// Update hdr from GNU sparse PAX headers.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
hdr.Name = name
}
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
size := hdr.PAXRecords[paxGNUSparseSize]
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if size == "" {
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
size = hdr.PAXRecords[paxGNUSparseRealSize]
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
if size != "" {
n, err := strconv.ParseInt(size, 10, 64)
if err != nil {
return nil, ErrHeader
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
hdr.Size = n
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// Read the sparse map according to the appropriate format.
if is1x0 {
return readGNUSparseMap1x0(tr.curr)
}
return readGNUSparseMap0x1(hdr.PAXRecords)
}
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
// mergePAX merges paxHdrs into hdr for all relevant fields of Header.
func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
for k, v := range paxHdrs {
if v == "" {
continue // Keep the original USTAR value
}
var id64 int64
switch k {
case paxPath:
hdr.Name = v
case paxLinkpath:
hdr.Linkname = v
case paxUname:
hdr.Uname = v
case paxGname:
hdr.Gname = v
case paxUid:
id64, err = strconv.ParseInt(v, 10, 64)
hdr.Uid = int(id64) // Integer overflow possible
case paxGid:
id64, err = strconv.ParseInt(v, 10, 64)
hdr.Gid = int(id64) // Integer overflow possible
case paxAtime:
hdr.AccessTime, err = parsePAXTime(v)
case paxMtime:
hdr.ModTime, err = parsePAXTime(v)
case paxCtime:
hdr.ChangeTime, err = parsePAXTime(v)
case paxSize:
hdr.Size, err = strconv.ParseInt(v, 10, 64)
default:
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
if strings.HasPrefix(k, paxSchilyXattr) {
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
}
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
hdr.Xattrs[k[len(paxSchilyXattr):]] = v
}
}
if err != nil {
return ErrHeader
}
}
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
hdr.PAXRecords = paxHdrs
return nil
}
// parsePAX parses PAX headers.
// If an extended header (type 'x') is invalid, ErrHeader is returned
func parsePAX(r io.Reader) (map[string]string, error) {
buf, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
sbuf := string(buf)
// For GNU PAX sparse format 0.0 support.
// This function transforms the sparse format 0.0 headers into format 0.1
// headers since 0.0 headers were not PAX compliant.
var sparseMap []string
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
paxHdrs := make(map[string]string)
for len(sbuf) > 0 {
key, value, residual, err := parsePAXRecord(sbuf)
if err != nil {
return nil, ErrHeader
}
sbuf = residual
switch key {
case paxGNUSparseOffset, paxGNUSparseNumBytes:
// Validate sparse header order and value.
if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
(len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
strings.Contains(value, ",") {
return nil, ErrHeader
}
sparseMap = append(sparseMap, value)
default:
archive/tar: add raw support for global PAX records The PAX specification says the following: <<< 'g' represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header. Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. >>> This CL adds support for parsing and composing global PAX records, but intentionally does not provide support for automatically persisting the global state across files. Changes made: * When Reader encounters a TypeXGlobalRecord header, it parses the PAX records and returns them to the user ad-verbatim. Reader does not store them in its state, ensuring it has no effect on future Next calls. * When Writer receives a TypeXGlobalRecord header, it writes the PAX records to the archive ad-verbatim. It does not store them in its state, ensuring it has no effect on future WriteHeader calls. * The restriction regarding empty record values is lifted since this value is used to represent deletion in global headers. Why provide raw support only: * Some archives in the wild have a global header section (often empty) and it is the user's responsibility to manually read and discard it's body. The logic added here allows users to more easily skip over these sections. * For users that do care about global headers, having access to the raw records allows them to implement the functionality of global headers themselves and manually persist the global state across files. * We can still upgrade to a full implementation in the future. Why we don't provide full support: * Even though the PAX specification describes their operation in detail, both the GNU and BSD tar tools (which are the most common implementations) do not have a consistent interpretation of many details. * Global headers were a controversial feature in PAX, by admission of the specification itself: <<< The concept of a global extended header (typeflag g) was controversial. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. >>> * Having state persist from entry-to-entry complicates the implementation for a feature that is not widely used and not well supported. Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8 Reviewed-on: https://go-review.googlesource.com/59190 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
2017-08-25 16:15:41 -06:00
paxHdrs[key] = value
}
}
if len(sparseMap) > 0 {
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
}
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
return paxHdrs, nil
}
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary. It returns the raw block of the
// header in case further processing is required.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() (*Header, *block, error) {
// Two blocks of zero bytes marks the end of the archive.
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
return nil, nil, err // EOF is okay here; exactly 0 bytes read
}
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
}
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
}
return nil, nil, ErrHeader // Zero block and then non-zero block
}
// Verify the header matches a known format.
format := tr.blk.GetFormat()
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
if format == FormatUnknown {
return nil, nil, ErrHeader
}
var p parser
hdr := new(Header)
// Unpack the V7 header.
v7 := tr.blk.V7()
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
hdr.Typeflag = v7.TypeFlag()[0]
hdr.Name = p.parseString(v7.Name())
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
hdr.Linkname = p.parseString(v7.LinkName())
hdr.Size = p.parseNumeric(v7.Size())
hdr.Mode = p.parseNumeric(v7.Mode())
hdr.Uid = int(p.parseNumeric(v7.UID()))
hdr.Gid = int(p.parseNumeric(v7.GID()))
hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
// Unpack format specific fields.
if format > formatV7 {
ustar := tr.blk.USTAR()
hdr.Uname = p.parseString(ustar.UserName())
hdr.Gname = p.parseString(ustar.GroupName())
hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
hdr.Devminor = p.parseNumeric(ustar.DevMinor())
var prefix string
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
switch {
case format.has(FormatUSTAR | FormatPAX):
hdr.Format = format
ustar := tr.blk.USTAR()
prefix = p.parseString(ustar.Prefix())
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
// For Format detection, check if block is properly formatted since
// the parser is more liberal than what USTAR actually permits.
notASCII := func(r rune) bool { return r >= 0x80 }
if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
hdr.Format = FormatUnknown // Non-ASCII characters in block.
}
nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
hdr.Format = FormatUnknown // Numeric fields must end in NUL
}
case format.has(formatSTAR):
star := tr.blk.STAR()
prefix = p.parseString(star.Prefix())
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
case format.has(FormatGNU):
hdr.Format = format
var p2 parser
gnu := tr.blk.GNU()
if b := gnu.AccessTime(); b[0] != 0 {
hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
}
if b := gnu.ChangeTime(); b[0] != 0 {
hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
}
// Prior to Go1.8, the Writer had a bug where it would output
// an invalid tar file in certain rare situations because the logic
// incorrectly believed that the old GNU format had a prefix field.
// This is wrong and leads to an output file that mangles the
// atime and ctime fields, which are often left unused.
//
// In order to continue reading tar files created by former, buggy
// versions of Go, we skeptically parse the atime and ctime fields.
// If we are unable to parse them and the prefix field looks like
// an ASCII string, then we fallback on the pre-Go1.8 behavior
// of treating these fields as the USTAR prefix field.
//
// Note that this will not use the fallback logic for all possible
// files generated by a pre-Go1.8 toolchain. If the generated file
// happened to have a prefix field that parses as valid
// atime and ctime fields (e.g., when they are valid octal strings),
// then it is impossible to distinguish between an valid GNU file
// and an invalid pre-Go1.8 file.
//
// See https://golang.org/issues/12594
// See https://golang.org/issues/21005
if p2.err != nil {
hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
ustar := tr.blk.USTAR()
if s := p.parseString(ustar.Prefix()); isASCII(s) {
prefix = s
}
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
hdr.Format = FormatUnknown // Buggy file is not GNU
}
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name
}
}
return hdr, &tr.blk, p.err
}
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough.
// If it's larger than four entries, then one or more extension headers are used
// to store the rest of the sparse map.
//
// The Header.Size does not reflect the size of any extended headers used.
// Thus, this function will read from the raw io.Reader to fetch extra headers.
// This method mutates blk in the process.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
// Make sure that the input format is GNU.
// Unfortunately, the STAR format also has a sparse header format that uses
// the same type flag but has a completely different layout.
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
if blk.GetFormat() != FormatGNU {
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
return nil, ErrHeader
}
archive/tar: support reporting and selecting the format The Reader and Writer are now at feature parity, meaning that everything that can be parsed by the Reader, can also be composed by the Writer. This position enables us to support selection of the format in a backwards compatible way, since it ensures that everything that can be read can also be round-trip written. As such, we add the following new API: type Format int const FormatUnknown Format = 0 ... type Header struct { ...; Format Format } The new Header.Format field is populated by the Reader on the best guess on what the format is. Note that the Reader is very liberal in what it permits, so a hybrid TAR file using aspects of multiple formats can still be decoded, but will be reported as FormatUnknown. Even though Reader has full support for V7 and basic support for STAR, it will still report those formats as unknown (and the constants for those formats are not even exported). The reasons for this is because the Writer has no support for V7 or STAR. Leaving it as unknown allows the Writer to choose a format usually USTAR or GNU that can encode the equivalent Header. When writing, the Header.allowedFormats will take the Format field into consideration if it is a known format. Fixes #18710 Change-Id: I00980c475d067c6969d3414e1ff0224fdd89cd49 Reviewed-on: https://go-review.googlesource.com/58230 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-23 16:56:24 -06:00
hdr.Format.mayOnlyBe(FormatGNU)
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
var p parser
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
hdr.Size = p.parseNumeric(blk.GNU().RealSize())
if p.err != nil {
return nil, p.err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
s := blk.GNU().Sparse()
spd := make(sparseDatas, 0, s.MaxEntries())
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
for {
for i := 0; i < s.MaxEntries(); i++ {
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
// This termination condition is identical to GNU and BSD tar.
if s.Entry(i).Offset()[0] == 0x00 {
break // Don't return, need to process extended headers (even if empty)
}
offset := p.parseNumeric(s.Entry(i).Offset())
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
length := p.parseNumeric(s.Entry(i).Length())
if p.err != nil {
return nil, p.err
}
spd = append(spd, sparseEntry{Offset: offset, Length: length})
}
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
if s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
archive/tar: fix and cleanup readOldGNUSparseMap * Assert that the format is GNU. Both GNU and STAR have some form of sparse file support with incompatible header structures. Worse yet, both formats use the 'S' type flag to indicate the presence of a sparse file. As such, we should check the format (based on magic numbers) and fail early. * Move realsize parsing logic into readOldGNUSparseMap. This is related to the sparse parsing logic and belongs here. * Fix the termination condition for parsing sparse fields. The termination condition for reading the sparse fields is to simply check if the first byte of the offset field is NULL. This does not seem to be documented in the GNU manual, but this is the check done by the both the GNU and BSD implementations: http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731 https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207 * Fix the parsing of sparse fields to use parseNumeric. This is what GNU and BSD do. The previous two links show that GNU and BSD both handle base-256 and base-8. * Detect truncated streams. The call to io.ReadFull does not check if the error is io.EOF. Getting io.EOF in this method is never okay and should always be converted to io.ErrUnexpectedEOF. * Simplify the function. The logic is essentially a do-while loop so we can remove some redundant code. Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e Reviewed-on: https://go-review.googlesource.com/28471 Reviewed-by: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org>
2016-09-02 17:17:37 -06:00
return nil, err
}
s = blk.Sparse()
continue
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return spd, nil // Done
}
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
// version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// fields (offset, length). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
var (
cntNewline int64
buf bytes.Buffer
blk block
)
// feedTokens copies data in blocks from r into buf until there are
// at least cnt newlines in buf. It will not read more blocks than needed.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
feedTokens := func(n int64) error {
for cntNewline < n {
if _, err := mustReadFull(r, blk[:]); err != nil {
return err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
buf.Write(blk[:])
for _, c := range blk {
if c == '\n' {
cntNewline++
}
}
}
return nil
}
// nextToken gets the next token delimited by a newline. This assumes that
// at least one newline exists in the buffer.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
nextToken := func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return strings.TrimRight(tok, "\n")
}
// Parse for the number of entries.
// Use integer overflow resistant math to check this.
if err := feedTokens(1); err != nil {
return nil, err
}
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
}
// Parse for all member entries.
// numEntries is trusted after this since a potential attacker must have
// committed resources proportional to what this library used.
if err := feedTokens(2 * numEntries); err != nil {
return nil, err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
spd := make(sparseDatas, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
length, err2 := strconv.ParseInt(nextToken(), 10, 64)
if err1 != nil || err2 != nil {
return nil, ErrHeader
}
spd = append(spd, sparseEntry{Offset: offset, Length: length})
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return spd, nil
}
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// version 0.1. The sparse map is stored in the PAX headers.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
// Get number of entries.
// Use integer overflow resistant math to check this.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
}
// There should be two numbers in sparseMap for each entry.
archive/tar: support arbitrary PAX records This CL adds the following new publicly visible API: type Header struct { ...; PAXRecords map[string]string } The new Header.PAXRecords field is a map of all PAX extended header records. We suggest (but do not enforce) that users use VENDOR-prefixed keys according to the following in the PAX specification: <<< The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. >>> When reading, the Header.PAXRecords is populated with all PAX records encountered so far, including basic ones (e.g., "path", "mtime", etc). When writing, the fields of Header will be merged into PAXRecords, overwriting any records that may conflict. Since PAXRecords is a more expressive feature than Xattrs and is entirely a superset of Xattrs, we mark Xattrs as deprecated, and steer users towards the new PAXRecords API. The issue has a discussion about adding a Header.SetPAXRecord method to help validate records and keep the Header fields in sync. However, we do not include that in this CL since that helper method can always be added in the future. There is no support for global records. Fixes #14472 Change-Id: If285a52749acc733476cf75a2c7ad15bc1542071 Reviewed-on: https://go-review.googlesource.com/58390 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-24 00:44:33 -06:00
sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if len(sparseMap) == 1 && sparseMap[0] == "" {
sparseMap = sparseMap[:0]
}
if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader
}
// Loop through the entries in the sparse map.
// numEntries is trusted now.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
spd := make(sparseDatas, 0, numEntries)
for len(sparseMap) >= 2 {
offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
if err1 != nil || err2 != nil {
return nil, ErrHeader
}
spd = append(spd, sparseEntry{Offset: offset, Length: length})
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
sparseMap = sparseMap[2:]
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return spd, nil
}
// Read reads from the current file in the tar archive.
// It returns (0, io.EOF) when it reaches the end of that file,
// until Next is called to advance to the next file.
//
// If the current file is sparse, then the regions marked as a hole
// are read back as NUL-bytes.
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
//
// Calling Read on special types like TypeLink, TypeSymlink, TypeChar,
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
// the Header.Size claims.
func (tr *Reader) Read(b []byte) (int, error) {
if tr.err != nil {
return 0, tr.err
}
n, err := tr.curr.Read(b)
if err != nil && err != io.EOF {
tr.err = err
}
return n, err
}
// writeTo writes the content of the current file to w.
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
// The bytes written matches the number of remaining bytes in the current file.
//
// If the current file is sparse and w is an io.WriteSeeker,
// then writeTo uses Seek to skip past holes defined in Header.SparseHoles,
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
// assuming that skipped regions are filled with NULs.
// This always writes the last byte to ensure w is the right size.
//
// TODO(dsnet): Re-export this when adding sparse file support.
// See https://golang.org/issue/22735
func (tr *Reader) writeTo(w io.Writer) (int64, error) {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if tr.err != nil {
return 0, tr.err
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
n, err := tr.curr.WriteTo(w)
if err != nil {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
tr.err = err
}
return n, err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// regFileReader is a fileReader for reading data from a regular file entry.
type regFileReader struct {
r io.Reader // Underlying Reader
nb int64 // Number of remaining bytes to read
}
func (fr *regFileReader) Read(b []byte) (n int, err error) {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if int64(len(b)) > fr.nb {
b = b[:fr.nb]
}
if len(b) > 0 {
n, err = fr.r.Read(b)
fr.nb -= int64(n)
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
switch {
case err == io.EOF && fr.nb > 0:
return n, io.ErrUnexpectedEOF
case err == nil && fr.nb == 0:
return n, io.EOF
default:
return n, err
}
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
return io.Copy(w, struct{ io.Reader }{fr})
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
func (fr regFileReader) LogicalRemaining() int64 {
return fr.nb
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
}
func (fr regFileReader) PhysicalRemaining() int64 {
return fr.nb
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
// sparseFileReader is a fileReader for reading data from a sparse file entry.
type sparseFileReader struct {
fr fileReader // Underlying fileReader
sp sparseHoles // Normalized list of sparse holes
pos int64 // Current position in sparse file
}
func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
finished := int64(len(b)) >= sr.LogicalRemaining()
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
if finished {
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
b = b[:sr.LogicalRemaining()]
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
b0 := b
endPos := sr.pos + int64(len(b))
for endPos > sr.pos && err == nil {
var nf int // Bytes read in fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
bf := b[:min(int64(len(b)), holeStart-sr.pos)]
nf, err = tryReadFull(sr.fr, bf)
} else { // In a hole fragment
bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
nf, err = tryReadFull(zeroReader{}, bf)
}
b = b[nf:]
sr.pos += int64(nf)
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
n = len(b0) - len(b)
switch {
case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file
case err != nil:
return n, err
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return n, errUnrefData // More data in dense file than sparse file
case finished:
return n, io.EOF
default:
return n, nil
}
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
ws, ok := w.(io.WriteSeeker)
if ok {
if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
ok = false // Not all io.Seeker can really seek
}
}
if !ok {
return io.Copy(w, struct{ io.Reader }{sr})
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
var writeLastByte bool
pos0 := sr.pos
for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
var nf int64 // Size of fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
nf = holeStart - sr.pos
nf, err = io.CopyN(ws, sr.fr, nf)
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
} else { // In a hole fragment
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
nf = holeEnd - sr.pos
if sr.PhysicalRemaining() == 0 {
writeLastByte = true
nf--
}
_, err = ws.Seek(nf, io.SeekCurrent)
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
sr.pos += nf
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
// If the last fragment is a hole, then seek to 1-byte before EOF, and
// write a single byte to ensure the file is the right size.
if writeLastByte && err == nil {
_, err = ws.Write([]byte{0})
sr.pos++
}
n = sr.pos - pos0
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
switch {
case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file
case err != nil:
return n, err
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return n, errUnrefData // More data in dense file than sparse file
default:
return n, nil
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
func (sr sparseFileReader) LogicalRemaining() int64 {
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
func (sr sparseFileReader) PhysicalRemaining() int64 {
return sr.fr.PhysicalRemaining()
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
type zeroReader struct{}
func (zeroReader) Read(b []byte) (int, error) {
for i := range b {
b[i] = 0
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
return len(b), nil
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// mustReadFull is like io.ReadFull except it returns
// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
func mustReadFull(r io.Reader, b []byte) (int, error) {
n, err := tryReadFull(r, b)
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return n, err
}
archive/tar: refactor Reader support for sparse files This CL is the first step (of two) for adding sparse file support to the Writer. This CL only refactors the logic of sparse-file handling in the Reader so that common logic can be easily shared by the Writer. As a result of this CL, there are some new publicly visible API changes: type SparseEntry struct { Offset, Length int64 } type Header struct { ...; SparseHoles []SparseEntry } A new type is defined to represent a sparse fragment and a new field Header.SparseHoles is added to represent the sparse holes in a file. The API intentionally represent sparse files using hole fragments, rather than data fragments so that the zero value of SparseHoles naturally represents a normal file (i.e., a file without any holes). The Reader now populates SparseHoles for sparse files. It is necessary to export the sparse hole information, otherwise it would be impossible for the Writer to specify that it is trying to encode a sparse file, and what it looks like. Some unexported helper functions were added to common.go: func validateSparseEntries(sp []SparseEntry, size int64) bool func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry The validation logic that used to be in newSparseFileReader is now moved to validateSparseEntries so that the Writer can use it in the future. alignSparseEntries is currently unused by the Reader, but will be used by the Writer in the future. Since TAR represents sparse files by only recording the data fragments, we add the invertSparseEntries function to convert a list of data fragments to a normalized list of hole fragments (and vice-versa). Some other high-level changes: * skipUnread is deleted, where most of it's logic is moved to the Discard methods on regFileReader and sparseFileReader. * readGNUSparsePAXHeaders was rewritten to be simpler. * regFileReader and sparseFileReader were completely rewritten in simpler and easier to understand logic. * A bug was fixed in sparseFileReader.Read where it failed to report an error if the logical size of the file ends before consuming all of the underlying data. * The tests for sparse-file support was completely rewritten. Updates #13548 Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2 Reviewed-on: https://go-review.googlesource.com/56771 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-14 23:03:25 -06:00
// tryReadFull is like io.ReadFull except it returns
// io.EOF when it is hit before len(b) bytes are read.
func tryReadFull(r io.Reader, b []byte) (n int, err error) {
for len(b) > n && err == nil {
var nn int
nn, err = r.Read(b[n:])
n += nn
}
if len(b) == n && err == io.EOF {
err = nil
}
return n, err
}
archive/tar: add Reader.WriteTo and Writer.ReadFrom To support the efficient packing and extracting of sparse files, add two new methods: func Reader.WriteTo(io.Writer) (int64, error) func Writer.ReadFrom(io.Reader) (int64, error) If the current archive entry is sparse and the provided io.{Reader,Writer} is also an io.Seeker, then use Seek to skip past the holes. If the last region in a file entry is a hole, then we seek to 1 byte before the EOF: * for Reader.WriteTo to write a single byte to ensure that the resulting filesize is correct. * for Writer.ReadFrom to read a single byte to verify that the input filesize is correct. The downside of this approach is when the last region in the sparse file is a hole. In the case of Reader.WriteTo, the 1-byte write will cause the last fragment to have a single chunk allocated. However, the goal of ReadFrom/WriteTo is *not* the ability to exactly reproduce sparse files (in terms of the location of sparse holes), but rather to provide an efficient way to create them. File systems already impose their own restrictions on how the sparse file will be created. Some filesystems (e.g., HFS+) don't support sparseness and seeking forward simply causes the FS to write zeros. Other filesystems have different chunk sizes, which will cause chunk allocations at boundaries different from what was in the original sparse file. In either case, it should not be a normal expectation of users that the location of holes in sparse files exactly matches the source. For users that really desire to have exact reproduction of sparse holes, they can wrap os.File with their own io.WriteSeeker that discards the final 1-byte write and uses File.Truncate to resize the file to the correct size. Other reasons we choose this approach over special-casing *os.File because: * The Reader already has special-case logic for io.Seeker * As much as possible, we want to decouple OS-specific logic from Reader and Writer. * This allows other abstractions over *os.File to also benefit from the "skip past holes" logic. * It is easier to test, since it is harder to mock an *os.File. Updates #13548 Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a Reviewed-on: https://go-review.googlesource.com/60872 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-08-31 15:52:20 -06:00
// discard skips n bytes in r, reporting an error if unable to do so.
func discard(r io.Reader, n int64) error {
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := r.(io.Seeker); ok && n > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if pos1 >= 0 && err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(n-1, io.SeekCurrent)
if pos2 < 0 || err != nil {
return err
}
seekSkipped = pos2 - pos1
}
}
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
if err == io.EOF && seekSkipped+copySkipped < n {
err = io.ErrUnexpectedEOF
}
return err
}